162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Resizable virtual memory filesystem for Linux.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Copyright (C) 2000 Linus Torvalds.
562306a36Sopenharmony_ci *		 2000 Transmeta Corp.
662306a36Sopenharmony_ci *		 2000-2001 Christoph Rohland
762306a36Sopenharmony_ci *		 2000-2001 SAP AG
862306a36Sopenharmony_ci *		 2002 Red Hat Inc.
962306a36Sopenharmony_ci * Copyright (C) 2002-2011 Hugh Dickins.
1062306a36Sopenharmony_ci * Copyright (C) 2011 Google Inc.
1162306a36Sopenharmony_ci * Copyright (C) 2002-2005 VERITAS Software Corporation.
1262306a36Sopenharmony_ci * Copyright (C) 2004 Andi Kleen, SuSE Labs
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * Extended attribute support for tmpfs:
1562306a36Sopenharmony_ci * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
1662306a36Sopenharmony_ci * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * tiny-shmem:
1962306a36Sopenharmony_ci * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci * This file is released under the GPL.
2262306a36Sopenharmony_ci */
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#include <linux/fs.h>
2562306a36Sopenharmony_ci#include <linux/init.h>
2662306a36Sopenharmony_ci#include <linux/vfs.h>
2762306a36Sopenharmony_ci#include <linux/mount.h>
2862306a36Sopenharmony_ci#include <linux/ramfs.h>
2962306a36Sopenharmony_ci#include <linux/pagemap.h>
3062306a36Sopenharmony_ci#include <linux/file.h>
3162306a36Sopenharmony_ci#include <linux/fileattr.h>
3262306a36Sopenharmony_ci#include <linux/mm.h>
3362306a36Sopenharmony_ci#include <linux/random.h>
3462306a36Sopenharmony_ci#include <linux/sched/signal.h>
3562306a36Sopenharmony_ci#include <linux/export.h>
3662306a36Sopenharmony_ci#include <linux/shmem_fs.h>
3762306a36Sopenharmony_ci#include <linux/swap.h>
3862306a36Sopenharmony_ci#include <linux/uio.h>
3962306a36Sopenharmony_ci#include <linux/hugetlb.h>
4062306a36Sopenharmony_ci#include <linux/fs_parser.h>
4162306a36Sopenharmony_ci#include <linux/swapfile.h>
4262306a36Sopenharmony_ci#include <linux/iversion.h>
4362306a36Sopenharmony_ci#include "swap.h"
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_cistatic struct vfsmount *shm_mnt;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#ifdef CONFIG_SHMEM
4862306a36Sopenharmony_ci/*
4962306a36Sopenharmony_ci * This virtual memory filesystem is heavily based on the ramfs. It
5062306a36Sopenharmony_ci * extends ramfs by the ability to use swap and honor resource limits
5162306a36Sopenharmony_ci * which makes it a completely usable filesystem.
5262306a36Sopenharmony_ci */
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci#include <linux/xattr.h>
5562306a36Sopenharmony_ci#include <linux/exportfs.h>
5662306a36Sopenharmony_ci#include <linux/posix_acl.h>
5762306a36Sopenharmony_ci#include <linux/posix_acl_xattr.h>
5862306a36Sopenharmony_ci#include <linux/mman.h>
5962306a36Sopenharmony_ci#include <linux/string.h>
6062306a36Sopenharmony_ci#include <linux/slab.h>
6162306a36Sopenharmony_ci#include <linux/backing-dev.h>
6262306a36Sopenharmony_ci#include <linux/writeback.h>
6362306a36Sopenharmony_ci#include <linux/pagevec.h>
6462306a36Sopenharmony_ci#include <linux/percpu_counter.h>
6562306a36Sopenharmony_ci#include <linux/falloc.h>
6662306a36Sopenharmony_ci#include <linux/splice.h>
6762306a36Sopenharmony_ci#include <linux/security.h>
6862306a36Sopenharmony_ci#include <linux/swapops.h>
6962306a36Sopenharmony_ci#include <linux/mempolicy.h>
7062306a36Sopenharmony_ci#include <linux/namei.h>
7162306a36Sopenharmony_ci#include <linux/ctype.h>
7262306a36Sopenharmony_ci#include <linux/migrate.h>
7362306a36Sopenharmony_ci#include <linux/highmem.h>
7462306a36Sopenharmony_ci#include <linux/seq_file.h>
7562306a36Sopenharmony_ci#include <linux/magic.h>
7662306a36Sopenharmony_ci#include <linux/syscalls.h>
7762306a36Sopenharmony_ci#include <linux/fcntl.h>
7862306a36Sopenharmony_ci#include <uapi/linux/memfd.h>
7962306a36Sopenharmony_ci#include <linux/rmap.h>
8062306a36Sopenharmony_ci#include <linux/uuid.h>
8162306a36Sopenharmony_ci#include <linux/quotaops.h>
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci#include <linux/uaccess.h>
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci#include "internal.h"
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci#define BLOCKS_PER_PAGE  (PAGE_SIZE/512)
8862306a36Sopenharmony_ci#define VM_ACCT(size)    (PAGE_ALIGN(size) >> PAGE_SHIFT)
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci/* Pretend that each entry is of this size in directory's i_size */
9162306a36Sopenharmony_ci#define BOGO_DIRENT_SIZE 20
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci/* Pretend that one inode + its dentry occupy this much memory */
9462306a36Sopenharmony_ci#define BOGO_INODE_SIZE 1024
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
9762306a36Sopenharmony_ci#define SHORT_SYMLINK_LEN 128
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci/*
10062306a36Sopenharmony_ci * shmem_fallocate communicates with shmem_fault or shmem_writepage via
10162306a36Sopenharmony_ci * inode->i_private (with i_rwsem making sure that it has only one user at
10262306a36Sopenharmony_ci * a time): we would prefer not to enlarge the shmem inode just for that.
10362306a36Sopenharmony_ci */
10462306a36Sopenharmony_cistruct shmem_falloc {
10562306a36Sopenharmony_ci	wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
10662306a36Sopenharmony_ci	pgoff_t start;		/* start of range currently being fallocated */
10762306a36Sopenharmony_ci	pgoff_t next;		/* the next page offset to be fallocated */
10862306a36Sopenharmony_ci	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
10962306a36Sopenharmony_ci	pgoff_t nr_unswapped;	/* how often writepage refused to swap out */
11062306a36Sopenharmony_ci};
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_cistruct shmem_options {
11362306a36Sopenharmony_ci	unsigned long long blocks;
11462306a36Sopenharmony_ci	unsigned long long inodes;
11562306a36Sopenharmony_ci	struct mempolicy *mpol;
11662306a36Sopenharmony_ci	kuid_t uid;
11762306a36Sopenharmony_ci	kgid_t gid;
11862306a36Sopenharmony_ci	umode_t mode;
11962306a36Sopenharmony_ci	bool full_inums;
12062306a36Sopenharmony_ci	int huge;
12162306a36Sopenharmony_ci	int seen;
12262306a36Sopenharmony_ci	bool noswap;
12362306a36Sopenharmony_ci	unsigned short quota_types;
12462306a36Sopenharmony_ci	struct shmem_quota_limits qlimits;
12562306a36Sopenharmony_ci#define SHMEM_SEEN_BLOCKS 1
12662306a36Sopenharmony_ci#define SHMEM_SEEN_INODES 2
12762306a36Sopenharmony_ci#define SHMEM_SEEN_HUGE 4
12862306a36Sopenharmony_ci#define SHMEM_SEEN_INUMS 8
12962306a36Sopenharmony_ci#define SHMEM_SEEN_NOSWAP 16
13062306a36Sopenharmony_ci#define SHMEM_SEEN_QUOTA 32
13162306a36Sopenharmony_ci};
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
13462306a36Sopenharmony_cistatic unsigned long shmem_default_max_blocks(void)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci	return totalram_pages() / 2;
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cistatic unsigned long shmem_default_max_inodes(void)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	unsigned long nr_pages = totalram_pages();
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	return min3(nr_pages - totalhigh_pages(), nr_pages / 2,
14462306a36Sopenharmony_ci			ULONG_MAX / BOGO_INODE_SIZE);
14562306a36Sopenharmony_ci}
14662306a36Sopenharmony_ci#endif
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_cistatic int shmem_swapin_folio(struct inode *inode, pgoff_t index,
14962306a36Sopenharmony_ci			     struct folio **foliop, enum sgp_type sgp,
15062306a36Sopenharmony_ci			     gfp_t gfp, struct vm_area_struct *vma,
15162306a36Sopenharmony_ci			     vm_fault_t *fault_type);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_cistatic inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	return sb->s_fs_info;
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci/*
15962306a36Sopenharmony_ci * shmem_file_setup pre-accounts the whole fixed size of a VM object,
16062306a36Sopenharmony_ci * for shared memory and for shared anonymous (/dev/zero) mappings
16162306a36Sopenharmony_ci * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
16262306a36Sopenharmony_ci * consistent with the pre-accounting of private mappings ...
16362306a36Sopenharmony_ci */
16462306a36Sopenharmony_cistatic inline int shmem_acct_size(unsigned long flags, loff_t size)
16562306a36Sopenharmony_ci{
16662306a36Sopenharmony_ci	return (flags & VM_NORESERVE) ?
16762306a36Sopenharmony_ci		0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
16862306a36Sopenharmony_ci}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic inline void shmem_unacct_size(unsigned long flags, loff_t size)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	if (!(flags & VM_NORESERVE))
17362306a36Sopenharmony_ci		vm_unacct_memory(VM_ACCT(size));
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_cistatic inline int shmem_reacct_size(unsigned long flags,
17762306a36Sopenharmony_ci		loff_t oldsize, loff_t newsize)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	if (!(flags & VM_NORESERVE)) {
18062306a36Sopenharmony_ci		if (VM_ACCT(newsize) > VM_ACCT(oldsize))
18162306a36Sopenharmony_ci			return security_vm_enough_memory_mm(current->mm,
18262306a36Sopenharmony_ci					VM_ACCT(newsize) - VM_ACCT(oldsize));
18362306a36Sopenharmony_ci		else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
18462306a36Sopenharmony_ci			vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
18562306a36Sopenharmony_ci	}
18662306a36Sopenharmony_ci	return 0;
18762306a36Sopenharmony_ci}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci/*
19062306a36Sopenharmony_ci * ... whereas tmpfs objects are accounted incrementally as
19162306a36Sopenharmony_ci * pages are allocated, in order to allow large sparse files.
19262306a36Sopenharmony_ci * shmem_get_folio reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
19362306a36Sopenharmony_ci * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
19462306a36Sopenharmony_ci */
19562306a36Sopenharmony_cistatic inline int shmem_acct_block(unsigned long flags, long pages)
19662306a36Sopenharmony_ci{
19762306a36Sopenharmony_ci	if (!(flags & VM_NORESERVE))
19862306a36Sopenharmony_ci		return 0;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	return security_vm_enough_memory_mm(current->mm,
20162306a36Sopenharmony_ci			pages * VM_ACCT(PAGE_SIZE));
20262306a36Sopenharmony_ci}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_cistatic inline void shmem_unacct_blocks(unsigned long flags, long pages)
20562306a36Sopenharmony_ci{
20662306a36Sopenharmony_ci	if (flags & VM_NORESERVE)
20762306a36Sopenharmony_ci		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cistatic int shmem_inode_acct_block(struct inode *inode, long pages)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
21362306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
21462306a36Sopenharmony_ci	int err = -ENOSPC;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	if (shmem_acct_block(info->flags, pages))
21762306a36Sopenharmony_ci		return err;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	might_sleep();	/* when quotas */
22062306a36Sopenharmony_ci	if (sbinfo->max_blocks) {
22162306a36Sopenharmony_ci		if (percpu_counter_compare(&sbinfo->used_blocks,
22262306a36Sopenharmony_ci					   sbinfo->max_blocks - pages) > 0)
22362306a36Sopenharmony_ci			goto unacct;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci		err = dquot_alloc_block_nodirty(inode, pages);
22662306a36Sopenharmony_ci		if (err)
22762306a36Sopenharmony_ci			goto unacct;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci		percpu_counter_add(&sbinfo->used_blocks, pages);
23062306a36Sopenharmony_ci	} else {
23162306a36Sopenharmony_ci		err = dquot_alloc_block_nodirty(inode, pages);
23262306a36Sopenharmony_ci		if (err)
23362306a36Sopenharmony_ci			goto unacct;
23462306a36Sopenharmony_ci	}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	return 0;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ciunacct:
23962306a36Sopenharmony_ci	shmem_unacct_blocks(info->flags, pages);
24062306a36Sopenharmony_ci	return err;
24162306a36Sopenharmony_ci}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic void shmem_inode_unacct_blocks(struct inode *inode, long pages)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
24662306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	might_sleep();	/* when quotas */
24962306a36Sopenharmony_ci	dquot_free_block_nodirty(inode, pages);
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	if (sbinfo->max_blocks)
25262306a36Sopenharmony_ci		percpu_counter_sub(&sbinfo->used_blocks, pages);
25362306a36Sopenharmony_ci	shmem_unacct_blocks(info->flags, pages);
25462306a36Sopenharmony_ci}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_cistatic const struct super_operations shmem_ops;
25762306a36Sopenharmony_ciconst struct address_space_operations shmem_aops;
25862306a36Sopenharmony_cistatic const struct file_operations shmem_file_operations;
25962306a36Sopenharmony_cistatic const struct inode_operations shmem_inode_operations;
26062306a36Sopenharmony_cistatic const struct inode_operations shmem_dir_inode_operations;
26162306a36Sopenharmony_cistatic const struct inode_operations shmem_special_inode_operations;
26262306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_vm_ops;
26362306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_anon_vm_ops;
26462306a36Sopenharmony_cistatic struct file_system_type shmem_fs_type;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_cibool vma_is_anon_shmem(struct vm_area_struct *vma)
26762306a36Sopenharmony_ci{
26862306a36Sopenharmony_ci	return vma->vm_ops == &shmem_anon_vm_ops;
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_cibool vma_is_shmem(struct vm_area_struct *vma)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic LIST_HEAD(shmem_swaplist);
27762306a36Sopenharmony_cistatic DEFINE_MUTEX(shmem_swaplist_mutex);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_cistatic int shmem_enable_quotas(struct super_block *sb,
28262306a36Sopenharmony_ci			       unsigned short quota_types)
28362306a36Sopenharmony_ci{
28462306a36Sopenharmony_ci	int type, err = 0;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
28762306a36Sopenharmony_ci	for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
28862306a36Sopenharmony_ci		if (!(quota_types & (1 << type)))
28962306a36Sopenharmony_ci			continue;
29062306a36Sopenharmony_ci		err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
29162306a36Sopenharmony_ci					  DQUOT_USAGE_ENABLED |
29262306a36Sopenharmony_ci					  DQUOT_LIMITS_ENABLED);
29362306a36Sopenharmony_ci		if (err)
29462306a36Sopenharmony_ci			goto out_err;
29562306a36Sopenharmony_ci	}
29662306a36Sopenharmony_ci	return 0;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ciout_err:
29962306a36Sopenharmony_ci	pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
30062306a36Sopenharmony_ci		type, err);
30162306a36Sopenharmony_ci	for (type--; type >= 0; type--)
30262306a36Sopenharmony_ci		dquot_quota_off(sb, type);
30362306a36Sopenharmony_ci	return err;
30462306a36Sopenharmony_ci}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_cistatic void shmem_disable_quotas(struct super_block *sb)
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci	int type;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	for (type = 0; type < SHMEM_MAXQUOTAS; type++)
31162306a36Sopenharmony_ci		dquot_quota_off(sb, type);
31262306a36Sopenharmony_ci}
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_cistatic struct dquot __rcu **shmem_get_dquots(struct inode *inode)
31562306a36Sopenharmony_ci{
31662306a36Sopenharmony_ci	return SHMEM_I(inode)->i_dquot;
31762306a36Sopenharmony_ci}
31862306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci/*
32162306a36Sopenharmony_ci * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
32262306a36Sopenharmony_ci * produces a novel ino for the newly allocated inode.
32362306a36Sopenharmony_ci *
32462306a36Sopenharmony_ci * It may also be called when making a hard link to permit the space needed by
32562306a36Sopenharmony_ci * each dentry. However, in that case, no new inode number is needed since that
32662306a36Sopenharmony_ci * internally draws from another pool of inode numbers (currently global
32762306a36Sopenharmony_ci * get_next_ino()). This case is indicated by passing NULL as inop.
32862306a36Sopenharmony_ci */
32962306a36Sopenharmony_ci#define SHMEM_INO_BATCH 1024
33062306a36Sopenharmony_cistatic int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
33362306a36Sopenharmony_ci	ino_t ino;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	if (!(sb->s_flags & SB_KERNMOUNT)) {
33662306a36Sopenharmony_ci		raw_spin_lock(&sbinfo->stat_lock);
33762306a36Sopenharmony_ci		if (sbinfo->max_inodes) {
33862306a36Sopenharmony_ci			if (sbinfo->free_ispace < BOGO_INODE_SIZE) {
33962306a36Sopenharmony_ci				raw_spin_unlock(&sbinfo->stat_lock);
34062306a36Sopenharmony_ci				return -ENOSPC;
34162306a36Sopenharmony_ci			}
34262306a36Sopenharmony_ci			sbinfo->free_ispace -= BOGO_INODE_SIZE;
34362306a36Sopenharmony_ci		}
34462306a36Sopenharmony_ci		if (inop) {
34562306a36Sopenharmony_ci			ino = sbinfo->next_ino++;
34662306a36Sopenharmony_ci			if (unlikely(is_zero_ino(ino)))
34762306a36Sopenharmony_ci				ino = sbinfo->next_ino++;
34862306a36Sopenharmony_ci			if (unlikely(!sbinfo->full_inums &&
34962306a36Sopenharmony_ci				     ino > UINT_MAX)) {
35062306a36Sopenharmony_ci				/*
35162306a36Sopenharmony_ci				 * Emulate get_next_ino uint wraparound for
35262306a36Sopenharmony_ci				 * compatibility
35362306a36Sopenharmony_ci				 */
35462306a36Sopenharmony_ci				if (IS_ENABLED(CONFIG_64BIT))
35562306a36Sopenharmony_ci					pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n",
35662306a36Sopenharmony_ci						__func__, MINOR(sb->s_dev));
35762306a36Sopenharmony_ci				sbinfo->next_ino = 1;
35862306a36Sopenharmony_ci				ino = sbinfo->next_ino++;
35962306a36Sopenharmony_ci			}
36062306a36Sopenharmony_ci			*inop = ino;
36162306a36Sopenharmony_ci		}
36262306a36Sopenharmony_ci		raw_spin_unlock(&sbinfo->stat_lock);
36362306a36Sopenharmony_ci	} else if (inop) {
36462306a36Sopenharmony_ci		/*
36562306a36Sopenharmony_ci		 * __shmem_file_setup, one of our callers, is lock-free: it
36662306a36Sopenharmony_ci		 * doesn't hold stat_lock in shmem_reserve_inode since
36762306a36Sopenharmony_ci		 * max_inodes is always 0, and is called from potentially
36862306a36Sopenharmony_ci		 * unknown contexts. As such, use a per-cpu batched allocator
36962306a36Sopenharmony_ci		 * which doesn't require the per-sb stat_lock unless we are at
37062306a36Sopenharmony_ci		 * the batch boundary.
37162306a36Sopenharmony_ci		 *
37262306a36Sopenharmony_ci		 * We don't need to worry about inode{32,64} since SB_KERNMOUNT
37362306a36Sopenharmony_ci		 * shmem mounts are not exposed to userspace, so we don't need
37462306a36Sopenharmony_ci		 * to worry about things like glibc compatibility.
37562306a36Sopenharmony_ci		 */
37662306a36Sopenharmony_ci		ino_t *next_ino;
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci		next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
37962306a36Sopenharmony_ci		ino = *next_ino;
38062306a36Sopenharmony_ci		if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
38162306a36Sopenharmony_ci			raw_spin_lock(&sbinfo->stat_lock);
38262306a36Sopenharmony_ci			ino = sbinfo->next_ino;
38362306a36Sopenharmony_ci			sbinfo->next_ino += SHMEM_INO_BATCH;
38462306a36Sopenharmony_ci			raw_spin_unlock(&sbinfo->stat_lock);
38562306a36Sopenharmony_ci			if (unlikely(is_zero_ino(ino)))
38662306a36Sopenharmony_ci				ino++;
38762306a36Sopenharmony_ci		}
38862306a36Sopenharmony_ci		*inop = ino;
38962306a36Sopenharmony_ci		*next_ino = ++ino;
39062306a36Sopenharmony_ci		put_cpu();
39162306a36Sopenharmony_ci	}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	return 0;
39462306a36Sopenharmony_ci}
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_cistatic void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
39762306a36Sopenharmony_ci{
39862306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
39962306a36Sopenharmony_ci	if (sbinfo->max_inodes) {
40062306a36Sopenharmony_ci		raw_spin_lock(&sbinfo->stat_lock);
40162306a36Sopenharmony_ci		sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace;
40262306a36Sopenharmony_ci		raw_spin_unlock(&sbinfo->stat_lock);
40362306a36Sopenharmony_ci	}
40462306a36Sopenharmony_ci}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci/**
40762306a36Sopenharmony_ci * shmem_recalc_inode - recalculate the block usage of an inode
40862306a36Sopenharmony_ci * @inode: inode to recalc
40962306a36Sopenharmony_ci * @alloced: the change in number of pages allocated to inode
41062306a36Sopenharmony_ci * @swapped: the change in number of pages swapped from inode
41162306a36Sopenharmony_ci *
41262306a36Sopenharmony_ci * We have to calculate the free blocks since the mm can drop
41362306a36Sopenharmony_ci * undirtied hole pages behind our back.
41462306a36Sopenharmony_ci *
41562306a36Sopenharmony_ci * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
41662306a36Sopenharmony_ci * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
41762306a36Sopenharmony_ci */
41862306a36Sopenharmony_cistatic void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
42162306a36Sopenharmony_ci	long freed;
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	spin_lock(&info->lock);
42462306a36Sopenharmony_ci	info->alloced += alloced;
42562306a36Sopenharmony_ci	info->swapped += swapped;
42662306a36Sopenharmony_ci	freed = info->alloced - info->swapped -
42762306a36Sopenharmony_ci		READ_ONCE(inode->i_mapping->nrpages);
42862306a36Sopenharmony_ci	/*
42962306a36Sopenharmony_ci	 * Special case: whereas normally shmem_recalc_inode() is called
43062306a36Sopenharmony_ci	 * after i_mapping->nrpages has already been adjusted (up or down),
43162306a36Sopenharmony_ci	 * shmem_writepage() has to raise swapped before nrpages is lowered -
43262306a36Sopenharmony_ci	 * to stop a racing shmem_recalc_inode() from thinking that a page has
43362306a36Sopenharmony_ci	 * been freed.  Compensate here, to avoid the need for a followup call.
43462306a36Sopenharmony_ci	 */
43562306a36Sopenharmony_ci	if (swapped > 0)
43662306a36Sopenharmony_ci		freed += swapped;
43762306a36Sopenharmony_ci	if (freed > 0)
43862306a36Sopenharmony_ci		info->alloced -= freed;
43962306a36Sopenharmony_ci	spin_unlock(&info->lock);
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	/* The quota case may block */
44262306a36Sopenharmony_ci	if (freed > 0)
44362306a36Sopenharmony_ci		shmem_inode_unacct_blocks(inode, freed);
44462306a36Sopenharmony_ci}
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_cibool shmem_charge(struct inode *inode, long pages)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	if (shmem_inode_acct_block(inode, pages))
45162306a36Sopenharmony_ci		return false;
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
45462306a36Sopenharmony_ci	xa_lock_irq(&mapping->i_pages);
45562306a36Sopenharmony_ci	mapping->nrpages += pages;
45662306a36Sopenharmony_ci	xa_unlock_irq(&mapping->i_pages);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	shmem_recalc_inode(inode, pages, 0);
45962306a36Sopenharmony_ci	return true;
46062306a36Sopenharmony_ci}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_civoid shmem_uncharge(struct inode *inode, long pages)
46362306a36Sopenharmony_ci{
46462306a36Sopenharmony_ci	/* pages argument is currently unused: keep it to help debugging */
46562306a36Sopenharmony_ci	/* nrpages adjustment done by __filemap_remove_folio() or caller */
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	shmem_recalc_inode(inode, 0, 0);
46862306a36Sopenharmony_ci}
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci/*
47162306a36Sopenharmony_ci * Replace item expected in xarray by a new item, while holding xa_lock.
47262306a36Sopenharmony_ci */
47362306a36Sopenharmony_cistatic int shmem_replace_entry(struct address_space *mapping,
47462306a36Sopenharmony_ci			pgoff_t index, void *expected, void *replacement)
47562306a36Sopenharmony_ci{
47662306a36Sopenharmony_ci	XA_STATE(xas, &mapping->i_pages, index);
47762306a36Sopenharmony_ci	void *item;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	VM_BUG_ON(!expected);
48062306a36Sopenharmony_ci	VM_BUG_ON(!replacement);
48162306a36Sopenharmony_ci	item = xas_load(&xas);
48262306a36Sopenharmony_ci	if (item != expected)
48362306a36Sopenharmony_ci		return -ENOENT;
48462306a36Sopenharmony_ci	xas_store(&xas, replacement);
48562306a36Sopenharmony_ci	return 0;
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci/*
48962306a36Sopenharmony_ci * Sometimes, before we decide whether to proceed or to fail, we must check
49062306a36Sopenharmony_ci * that an entry was not already brought back from swap by a racing thread.
49162306a36Sopenharmony_ci *
49262306a36Sopenharmony_ci * Checking page is not enough: by the time a SwapCache page is locked, it
49362306a36Sopenharmony_ci * might be reused, and again be SwapCache, using the same swap as before.
49462306a36Sopenharmony_ci */
49562306a36Sopenharmony_cistatic bool shmem_confirm_swap(struct address_space *mapping,
49662306a36Sopenharmony_ci			       pgoff_t index, swp_entry_t swap)
49762306a36Sopenharmony_ci{
49862306a36Sopenharmony_ci	return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap);
49962306a36Sopenharmony_ci}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci/*
50262306a36Sopenharmony_ci * Definitions for "huge tmpfs": tmpfs mounted with the huge= option
50362306a36Sopenharmony_ci *
50462306a36Sopenharmony_ci * SHMEM_HUGE_NEVER:
50562306a36Sopenharmony_ci *	disables huge pages for the mount;
50662306a36Sopenharmony_ci * SHMEM_HUGE_ALWAYS:
50762306a36Sopenharmony_ci *	enables huge pages for the mount;
50862306a36Sopenharmony_ci * SHMEM_HUGE_WITHIN_SIZE:
50962306a36Sopenharmony_ci *	only allocate huge pages if the page will be fully within i_size,
51062306a36Sopenharmony_ci *	also respect fadvise()/madvise() hints;
51162306a36Sopenharmony_ci * SHMEM_HUGE_ADVISE:
51262306a36Sopenharmony_ci *	only allocate huge pages if requested with fadvise()/madvise();
51362306a36Sopenharmony_ci */
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci#define SHMEM_HUGE_NEVER	0
51662306a36Sopenharmony_ci#define SHMEM_HUGE_ALWAYS	1
51762306a36Sopenharmony_ci#define SHMEM_HUGE_WITHIN_SIZE	2
51862306a36Sopenharmony_ci#define SHMEM_HUGE_ADVISE	3
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci/*
52162306a36Sopenharmony_ci * Special values.
52262306a36Sopenharmony_ci * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
52362306a36Sopenharmony_ci *
52462306a36Sopenharmony_ci * SHMEM_HUGE_DENY:
52562306a36Sopenharmony_ci *	disables huge on shm_mnt and all mounts, for emergency use;
52662306a36Sopenharmony_ci * SHMEM_HUGE_FORCE:
52762306a36Sopenharmony_ci *	enables huge on shm_mnt and all mounts, w/o needing option, for testing;
52862306a36Sopenharmony_ci *
52962306a36Sopenharmony_ci */
53062306a36Sopenharmony_ci#define SHMEM_HUGE_DENY		(-1)
53162306a36Sopenharmony_ci#define SHMEM_HUGE_FORCE	(-2)
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
53462306a36Sopenharmony_ci/* ifdef here to avoid bloating shmem.o when not necessary */
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_cistatic int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_cibool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
53962306a36Sopenharmony_ci		   struct mm_struct *mm, unsigned long vm_flags)
54062306a36Sopenharmony_ci{
54162306a36Sopenharmony_ci	loff_t i_size;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	if (!S_ISREG(inode->i_mode))
54462306a36Sopenharmony_ci		return false;
54562306a36Sopenharmony_ci	if (mm && ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &mm->flags)))
54662306a36Sopenharmony_ci		return false;
54762306a36Sopenharmony_ci	if (shmem_huge == SHMEM_HUGE_DENY)
54862306a36Sopenharmony_ci		return false;
54962306a36Sopenharmony_ci	if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE)
55062306a36Sopenharmony_ci		return true;
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	switch (SHMEM_SB(inode->i_sb)->huge) {
55362306a36Sopenharmony_ci	case SHMEM_HUGE_ALWAYS:
55462306a36Sopenharmony_ci		return true;
55562306a36Sopenharmony_ci	case SHMEM_HUGE_WITHIN_SIZE:
55662306a36Sopenharmony_ci		index = round_up(index + 1, HPAGE_PMD_NR);
55762306a36Sopenharmony_ci		i_size = round_up(i_size_read(inode), PAGE_SIZE);
55862306a36Sopenharmony_ci		if (i_size >> PAGE_SHIFT >= index)
55962306a36Sopenharmony_ci			return true;
56062306a36Sopenharmony_ci		fallthrough;
56162306a36Sopenharmony_ci	case SHMEM_HUGE_ADVISE:
56262306a36Sopenharmony_ci		if (mm && (vm_flags & VM_HUGEPAGE))
56362306a36Sopenharmony_ci			return true;
56462306a36Sopenharmony_ci		fallthrough;
56562306a36Sopenharmony_ci	default:
56662306a36Sopenharmony_ci		return false;
56762306a36Sopenharmony_ci	}
56862306a36Sopenharmony_ci}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci#if defined(CONFIG_SYSFS)
57162306a36Sopenharmony_cistatic int shmem_parse_huge(const char *str)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	if (!strcmp(str, "never"))
57462306a36Sopenharmony_ci		return SHMEM_HUGE_NEVER;
57562306a36Sopenharmony_ci	if (!strcmp(str, "always"))
57662306a36Sopenharmony_ci		return SHMEM_HUGE_ALWAYS;
57762306a36Sopenharmony_ci	if (!strcmp(str, "within_size"))
57862306a36Sopenharmony_ci		return SHMEM_HUGE_WITHIN_SIZE;
57962306a36Sopenharmony_ci	if (!strcmp(str, "advise"))
58062306a36Sopenharmony_ci		return SHMEM_HUGE_ADVISE;
58162306a36Sopenharmony_ci	if (!strcmp(str, "deny"))
58262306a36Sopenharmony_ci		return SHMEM_HUGE_DENY;
58362306a36Sopenharmony_ci	if (!strcmp(str, "force"))
58462306a36Sopenharmony_ci		return SHMEM_HUGE_FORCE;
58562306a36Sopenharmony_ci	return -EINVAL;
58662306a36Sopenharmony_ci}
58762306a36Sopenharmony_ci#endif
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
59062306a36Sopenharmony_cistatic const char *shmem_format_huge(int huge)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	switch (huge) {
59362306a36Sopenharmony_ci	case SHMEM_HUGE_NEVER:
59462306a36Sopenharmony_ci		return "never";
59562306a36Sopenharmony_ci	case SHMEM_HUGE_ALWAYS:
59662306a36Sopenharmony_ci		return "always";
59762306a36Sopenharmony_ci	case SHMEM_HUGE_WITHIN_SIZE:
59862306a36Sopenharmony_ci		return "within_size";
59962306a36Sopenharmony_ci	case SHMEM_HUGE_ADVISE:
60062306a36Sopenharmony_ci		return "advise";
60162306a36Sopenharmony_ci	case SHMEM_HUGE_DENY:
60262306a36Sopenharmony_ci		return "deny";
60362306a36Sopenharmony_ci	case SHMEM_HUGE_FORCE:
60462306a36Sopenharmony_ci		return "force";
60562306a36Sopenharmony_ci	default:
60662306a36Sopenharmony_ci		VM_BUG_ON(1);
60762306a36Sopenharmony_ci		return "bad_val";
60862306a36Sopenharmony_ci	}
60962306a36Sopenharmony_ci}
61062306a36Sopenharmony_ci#endif
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_cistatic unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
61362306a36Sopenharmony_ci		struct shrink_control *sc, unsigned long nr_to_split)
61462306a36Sopenharmony_ci{
61562306a36Sopenharmony_ci	LIST_HEAD(list), *pos, *next;
61662306a36Sopenharmony_ci	LIST_HEAD(to_remove);
61762306a36Sopenharmony_ci	struct inode *inode;
61862306a36Sopenharmony_ci	struct shmem_inode_info *info;
61962306a36Sopenharmony_ci	struct folio *folio;
62062306a36Sopenharmony_ci	unsigned long batch = sc ? sc->nr_to_scan : 128;
62162306a36Sopenharmony_ci	int split = 0;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	if (list_empty(&sbinfo->shrinklist))
62462306a36Sopenharmony_ci		return SHRINK_STOP;
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	spin_lock(&sbinfo->shrinklist_lock);
62762306a36Sopenharmony_ci	list_for_each_safe(pos, next, &sbinfo->shrinklist) {
62862306a36Sopenharmony_ci		info = list_entry(pos, struct shmem_inode_info, shrinklist);
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci		/* pin the inode */
63162306a36Sopenharmony_ci		inode = igrab(&info->vfs_inode);
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci		/* inode is about to be evicted */
63462306a36Sopenharmony_ci		if (!inode) {
63562306a36Sopenharmony_ci			list_del_init(&info->shrinklist);
63662306a36Sopenharmony_ci			goto next;
63762306a36Sopenharmony_ci		}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci		/* Check if there's anything to gain */
64062306a36Sopenharmony_ci		if (round_up(inode->i_size, PAGE_SIZE) ==
64162306a36Sopenharmony_ci				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
64262306a36Sopenharmony_ci			list_move(&info->shrinklist, &to_remove);
64362306a36Sopenharmony_ci			goto next;
64462306a36Sopenharmony_ci		}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci		list_move(&info->shrinklist, &list);
64762306a36Sopenharmony_cinext:
64862306a36Sopenharmony_ci		sbinfo->shrinklist_len--;
64962306a36Sopenharmony_ci		if (!--batch)
65062306a36Sopenharmony_ci			break;
65162306a36Sopenharmony_ci	}
65262306a36Sopenharmony_ci	spin_unlock(&sbinfo->shrinklist_lock);
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	list_for_each_safe(pos, next, &to_remove) {
65562306a36Sopenharmony_ci		info = list_entry(pos, struct shmem_inode_info, shrinklist);
65662306a36Sopenharmony_ci		inode = &info->vfs_inode;
65762306a36Sopenharmony_ci		list_del_init(&info->shrinklist);
65862306a36Sopenharmony_ci		iput(inode);
65962306a36Sopenharmony_ci	}
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	list_for_each_safe(pos, next, &list) {
66262306a36Sopenharmony_ci		int ret;
66362306a36Sopenharmony_ci		pgoff_t index;
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci		info = list_entry(pos, struct shmem_inode_info, shrinklist);
66662306a36Sopenharmony_ci		inode = &info->vfs_inode;
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci		if (nr_to_split && split >= nr_to_split)
66962306a36Sopenharmony_ci			goto move_back;
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci		index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT;
67262306a36Sopenharmony_ci		folio = filemap_get_folio(inode->i_mapping, index);
67362306a36Sopenharmony_ci		if (IS_ERR(folio))
67462306a36Sopenharmony_ci			goto drop;
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci		/* No huge page at the end of the file: nothing to split */
67762306a36Sopenharmony_ci		if (!folio_test_large(folio)) {
67862306a36Sopenharmony_ci			folio_put(folio);
67962306a36Sopenharmony_ci			goto drop;
68062306a36Sopenharmony_ci		}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci		/*
68362306a36Sopenharmony_ci		 * Move the inode on the list back to shrinklist if we failed
68462306a36Sopenharmony_ci		 * to lock the page at this time.
68562306a36Sopenharmony_ci		 *
68662306a36Sopenharmony_ci		 * Waiting for the lock may lead to deadlock in the
68762306a36Sopenharmony_ci		 * reclaim path.
68862306a36Sopenharmony_ci		 */
68962306a36Sopenharmony_ci		if (!folio_trylock(folio)) {
69062306a36Sopenharmony_ci			folio_put(folio);
69162306a36Sopenharmony_ci			goto move_back;
69262306a36Sopenharmony_ci		}
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci		ret = split_folio(folio);
69562306a36Sopenharmony_ci		folio_unlock(folio);
69662306a36Sopenharmony_ci		folio_put(folio);
69762306a36Sopenharmony_ci
69862306a36Sopenharmony_ci		/* If split failed move the inode on the list back to shrinklist */
69962306a36Sopenharmony_ci		if (ret)
70062306a36Sopenharmony_ci			goto move_back;
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci		split++;
70362306a36Sopenharmony_cidrop:
70462306a36Sopenharmony_ci		list_del_init(&info->shrinklist);
70562306a36Sopenharmony_ci		goto put;
70662306a36Sopenharmony_cimove_back:
70762306a36Sopenharmony_ci		/*
70862306a36Sopenharmony_ci		 * Make sure the inode is either on the global list or deleted
70962306a36Sopenharmony_ci		 * from any local list before iput() since it could be deleted
71062306a36Sopenharmony_ci		 * in another thread once we put the inode (then the local list
71162306a36Sopenharmony_ci		 * is corrupted).
71262306a36Sopenharmony_ci		 */
71362306a36Sopenharmony_ci		spin_lock(&sbinfo->shrinklist_lock);
71462306a36Sopenharmony_ci		list_move(&info->shrinklist, &sbinfo->shrinklist);
71562306a36Sopenharmony_ci		sbinfo->shrinklist_len++;
71662306a36Sopenharmony_ci		spin_unlock(&sbinfo->shrinklist_lock);
71762306a36Sopenharmony_ciput:
71862306a36Sopenharmony_ci		iput(inode);
71962306a36Sopenharmony_ci	}
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	return split;
72262306a36Sopenharmony_ci}
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_cistatic long shmem_unused_huge_scan(struct super_block *sb,
72562306a36Sopenharmony_ci		struct shrink_control *sc)
72662306a36Sopenharmony_ci{
72762306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	if (!READ_ONCE(sbinfo->shrinklist_len))
73062306a36Sopenharmony_ci		return SHRINK_STOP;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	return shmem_unused_huge_shrink(sbinfo, sc, 0);
73362306a36Sopenharmony_ci}
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_cistatic long shmem_unused_huge_count(struct super_block *sb,
73662306a36Sopenharmony_ci		struct shrink_control *sc)
73762306a36Sopenharmony_ci{
73862306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
73962306a36Sopenharmony_ci	return READ_ONCE(sbinfo->shrinklist_len);
74062306a36Sopenharmony_ci}
74162306a36Sopenharmony_ci#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci#define shmem_huge SHMEM_HUGE_DENY
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_cibool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
74662306a36Sopenharmony_ci		   struct mm_struct *mm, unsigned long vm_flags)
74762306a36Sopenharmony_ci{
74862306a36Sopenharmony_ci	return false;
74962306a36Sopenharmony_ci}
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_cistatic unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
75262306a36Sopenharmony_ci		struct shrink_control *sc, unsigned long nr_to_split)
75362306a36Sopenharmony_ci{
75462306a36Sopenharmony_ci	return 0;
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci/*
75962306a36Sopenharmony_ci * Like filemap_add_folio, but error if expected item has gone.
76062306a36Sopenharmony_ci */
76162306a36Sopenharmony_cistatic int shmem_add_to_page_cache(struct folio *folio,
76262306a36Sopenharmony_ci				   struct address_space *mapping,
76362306a36Sopenharmony_ci				   pgoff_t index, void *expected, gfp_t gfp,
76462306a36Sopenharmony_ci				   struct mm_struct *charge_mm)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
76762306a36Sopenharmony_ci	long nr = folio_nr_pages(folio);
76862306a36Sopenharmony_ci	int error;
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
77162306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
77262306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
77362306a36Sopenharmony_ci	VM_BUG_ON(expected && folio_test_large(folio));
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	folio_ref_add(folio, nr);
77662306a36Sopenharmony_ci	folio->mapping = mapping;
77762306a36Sopenharmony_ci	folio->index = index;
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	if (!folio_test_swapcache(folio)) {
78062306a36Sopenharmony_ci		error = mem_cgroup_charge(folio, charge_mm, gfp);
78162306a36Sopenharmony_ci		if (error) {
78262306a36Sopenharmony_ci			if (folio_test_pmd_mappable(folio)) {
78362306a36Sopenharmony_ci				count_vm_event(THP_FILE_FALLBACK);
78462306a36Sopenharmony_ci				count_vm_event(THP_FILE_FALLBACK_CHARGE);
78562306a36Sopenharmony_ci			}
78662306a36Sopenharmony_ci			goto error;
78762306a36Sopenharmony_ci		}
78862306a36Sopenharmony_ci	}
78962306a36Sopenharmony_ci	folio_throttle_swaprate(folio, gfp);
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	do {
79262306a36Sopenharmony_ci		xas_lock_irq(&xas);
79362306a36Sopenharmony_ci		if (expected != xas_find_conflict(&xas)) {
79462306a36Sopenharmony_ci			xas_set_err(&xas, -EEXIST);
79562306a36Sopenharmony_ci			goto unlock;
79662306a36Sopenharmony_ci		}
79762306a36Sopenharmony_ci		if (expected && xas_find_conflict(&xas)) {
79862306a36Sopenharmony_ci			xas_set_err(&xas, -EEXIST);
79962306a36Sopenharmony_ci			goto unlock;
80062306a36Sopenharmony_ci		}
80162306a36Sopenharmony_ci		xas_store(&xas, folio);
80262306a36Sopenharmony_ci		if (xas_error(&xas))
80362306a36Sopenharmony_ci			goto unlock;
80462306a36Sopenharmony_ci		if (folio_test_pmd_mappable(folio)) {
80562306a36Sopenharmony_ci			count_vm_event(THP_FILE_ALLOC);
80662306a36Sopenharmony_ci			__lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
80762306a36Sopenharmony_ci		}
80862306a36Sopenharmony_ci		mapping->nrpages += nr;
80962306a36Sopenharmony_ci		__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
81062306a36Sopenharmony_ci		__lruvec_stat_mod_folio(folio, NR_SHMEM, nr);
81162306a36Sopenharmony_ciunlock:
81262306a36Sopenharmony_ci		xas_unlock_irq(&xas);
81362306a36Sopenharmony_ci	} while (xas_nomem(&xas, gfp));
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	if (xas_error(&xas)) {
81662306a36Sopenharmony_ci		error = xas_error(&xas);
81762306a36Sopenharmony_ci		goto error;
81862306a36Sopenharmony_ci	}
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	return 0;
82162306a36Sopenharmony_cierror:
82262306a36Sopenharmony_ci	folio->mapping = NULL;
82362306a36Sopenharmony_ci	folio_ref_sub(folio, nr);
82462306a36Sopenharmony_ci	return error;
82562306a36Sopenharmony_ci}
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci/*
82862306a36Sopenharmony_ci * Like delete_from_page_cache, but substitutes swap for @folio.
82962306a36Sopenharmony_ci */
83062306a36Sopenharmony_cistatic void shmem_delete_from_page_cache(struct folio *folio, void *radswap)
83162306a36Sopenharmony_ci{
83262306a36Sopenharmony_ci	struct address_space *mapping = folio->mapping;
83362306a36Sopenharmony_ci	long nr = folio_nr_pages(folio);
83462306a36Sopenharmony_ci	int error;
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	xa_lock_irq(&mapping->i_pages);
83762306a36Sopenharmony_ci	error = shmem_replace_entry(mapping, folio->index, folio, radswap);
83862306a36Sopenharmony_ci	folio->mapping = NULL;
83962306a36Sopenharmony_ci	mapping->nrpages -= nr;
84062306a36Sopenharmony_ci	__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
84162306a36Sopenharmony_ci	__lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
84262306a36Sopenharmony_ci	xa_unlock_irq(&mapping->i_pages);
84362306a36Sopenharmony_ci	folio_put(folio);
84462306a36Sopenharmony_ci	BUG_ON(error);
84562306a36Sopenharmony_ci}
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci/*
84862306a36Sopenharmony_ci * Remove swap entry from page cache, free the swap and its page cache.
84962306a36Sopenharmony_ci */
85062306a36Sopenharmony_cistatic int shmem_free_swap(struct address_space *mapping,
85162306a36Sopenharmony_ci			   pgoff_t index, void *radswap)
85262306a36Sopenharmony_ci{
85362306a36Sopenharmony_ci	void *old;
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci	old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0);
85662306a36Sopenharmony_ci	if (old != radswap)
85762306a36Sopenharmony_ci		return -ENOENT;
85862306a36Sopenharmony_ci	free_swap_and_cache(radix_to_swp_entry(radswap));
85962306a36Sopenharmony_ci	return 0;
86062306a36Sopenharmony_ci}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci/*
86362306a36Sopenharmony_ci * Determine (in bytes) how many of the shmem object's pages mapped by the
86462306a36Sopenharmony_ci * given offsets are swapped out.
86562306a36Sopenharmony_ci *
86662306a36Sopenharmony_ci * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
86762306a36Sopenharmony_ci * as long as the inode doesn't go away and racy results are not a problem.
86862306a36Sopenharmony_ci */
86962306a36Sopenharmony_ciunsigned long shmem_partial_swap_usage(struct address_space *mapping,
87062306a36Sopenharmony_ci						pgoff_t start, pgoff_t end)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	XA_STATE(xas, &mapping->i_pages, start);
87362306a36Sopenharmony_ci	struct page *page;
87462306a36Sopenharmony_ci	unsigned long swapped = 0;
87562306a36Sopenharmony_ci	unsigned long max = end - 1;
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	rcu_read_lock();
87862306a36Sopenharmony_ci	xas_for_each(&xas, page, max) {
87962306a36Sopenharmony_ci		if (xas_retry(&xas, page))
88062306a36Sopenharmony_ci			continue;
88162306a36Sopenharmony_ci		if (xa_is_value(page))
88262306a36Sopenharmony_ci			swapped++;
88362306a36Sopenharmony_ci		if (xas.xa_index == max)
88462306a36Sopenharmony_ci			break;
88562306a36Sopenharmony_ci		if (need_resched()) {
88662306a36Sopenharmony_ci			xas_pause(&xas);
88762306a36Sopenharmony_ci			cond_resched_rcu();
88862306a36Sopenharmony_ci		}
88962306a36Sopenharmony_ci	}
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	rcu_read_unlock();
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	return swapped << PAGE_SHIFT;
89462306a36Sopenharmony_ci}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci/*
89762306a36Sopenharmony_ci * Determine (in bytes) how many of the shmem object's pages mapped by the
89862306a36Sopenharmony_ci * given vma is swapped out.
89962306a36Sopenharmony_ci *
90062306a36Sopenharmony_ci * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
90162306a36Sopenharmony_ci * as long as the inode doesn't go away and racy results are not a problem.
90262306a36Sopenharmony_ci */
90362306a36Sopenharmony_ciunsigned long shmem_swap_usage(struct vm_area_struct *vma)
90462306a36Sopenharmony_ci{
90562306a36Sopenharmony_ci	struct inode *inode = file_inode(vma->vm_file);
90662306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
90762306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
90862306a36Sopenharmony_ci	unsigned long swapped;
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci	/* Be careful as we don't hold info->lock */
91162306a36Sopenharmony_ci	swapped = READ_ONCE(info->swapped);
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	/*
91462306a36Sopenharmony_ci	 * The easier cases are when the shmem object has nothing in swap, or
91562306a36Sopenharmony_ci	 * the vma maps it whole. Then we can simply use the stats that we
91662306a36Sopenharmony_ci	 * already track.
91762306a36Sopenharmony_ci	 */
91862306a36Sopenharmony_ci	if (!swapped)
91962306a36Sopenharmony_ci		return 0;
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
92262306a36Sopenharmony_ci		return swapped << PAGE_SHIFT;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	/* Here comes the more involved part */
92562306a36Sopenharmony_ci	return shmem_partial_swap_usage(mapping, vma->vm_pgoff,
92662306a36Sopenharmony_ci					vma->vm_pgoff + vma_pages(vma));
92762306a36Sopenharmony_ci}
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci/*
93062306a36Sopenharmony_ci * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
93162306a36Sopenharmony_ci */
93262306a36Sopenharmony_civoid shmem_unlock_mapping(struct address_space *mapping)
93362306a36Sopenharmony_ci{
93462306a36Sopenharmony_ci	struct folio_batch fbatch;
93562306a36Sopenharmony_ci	pgoff_t index = 0;
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	folio_batch_init(&fbatch);
93862306a36Sopenharmony_ci	/*
93962306a36Sopenharmony_ci	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
94062306a36Sopenharmony_ci	 */
94162306a36Sopenharmony_ci	while (!mapping_unevictable(mapping) &&
94262306a36Sopenharmony_ci	       filemap_get_folios(mapping, &index, ~0UL, &fbatch)) {
94362306a36Sopenharmony_ci		check_move_unevictable_folios(&fbatch);
94462306a36Sopenharmony_ci		folio_batch_release(&fbatch);
94562306a36Sopenharmony_ci		cond_resched();
94662306a36Sopenharmony_ci	}
94762306a36Sopenharmony_ci}
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_cistatic struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
95062306a36Sopenharmony_ci{
95162306a36Sopenharmony_ci	struct folio *folio;
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ci	/*
95462306a36Sopenharmony_ci	 * At first avoid shmem_get_folio(,,,SGP_READ): that fails
95562306a36Sopenharmony_ci	 * beyond i_size, and reports fallocated folios as holes.
95662306a36Sopenharmony_ci	 */
95762306a36Sopenharmony_ci	folio = filemap_get_entry(inode->i_mapping, index);
95862306a36Sopenharmony_ci	if (!folio)
95962306a36Sopenharmony_ci		return folio;
96062306a36Sopenharmony_ci	if (!xa_is_value(folio)) {
96162306a36Sopenharmony_ci		folio_lock(folio);
96262306a36Sopenharmony_ci		if (folio->mapping == inode->i_mapping)
96362306a36Sopenharmony_ci			return folio;
96462306a36Sopenharmony_ci		/* The folio has been swapped out */
96562306a36Sopenharmony_ci		folio_unlock(folio);
96662306a36Sopenharmony_ci		folio_put(folio);
96762306a36Sopenharmony_ci	}
96862306a36Sopenharmony_ci	/*
96962306a36Sopenharmony_ci	 * But read a folio back from swap if any of it is within i_size
97062306a36Sopenharmony_ci	 * (although in some cases this is just a waste of time).
97162306a36Sopenharmony_ci	 */
97262306a36Sopenharmony_ci	folio = NULL;
97362306a36Sopenharmony_ci	shmem_get_folio(inode, index, &folio, SGP_READ);
97462306a36Sopenharmony_ci	return folio;
97562306a36Sopenharmony_ci}
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci/*
97862306a36Sopenharmony_ci * Remove range of pages and swap entries from page cache, and free them.
97962306a36Sopenharmony_ci * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
98062306a36Sopenharmony_ci */
98162306a36Sopenharmony_cistatic void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
98262306a36Sopenharmony_ci								 bool unfalloc)
98362306a36Sopenharmony_ci{
98462306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
98562306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
98662306a36Sopenharmony_ci	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
98762306a36Sopenharmony_ci	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
98862306a36Sopenharmony_ci	struct folio_batch fbatch;
98962306a36Sopenharmony_ci	pgoff_t indices[PAGEVEC_SIZE];
99062306a36Sopenharmony_ci	struct folio *folio;
99162306a36Sopenharmony_ci	bool same_folio;
99262306a36Sopenharmony_ci	long nr_swaps_freed = 0;
99362306a36Sopenharmony_ci	pgoff_t index;
99462306a36Sopenharmony_ci	int i;
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	if (lend == -1)
99762306a36Sopenharmony_ci		end = -1;	/* unsigned, so actually very big */
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	if (info->fallocend > start && info->fallocend <= end && !unfalloc)
100062306a36Sopenharmony_ci		info->fallocend = start;
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci	folio_batch_init(&fbatch);
100362306a36Sopenharmony_ci	index = start;
100462306a36Sopenharmony_ci	while (index < end && find_lock_entries(mapping, &index, end - 1,
100562306a36Sopenharmony_ci			&fbatch, indices)) {
100662306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++) {
100762306a36Sopenharmony_ci			folio = fbatch.folios[i];
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci			if (xa_is_value(folio)) {
101062306a36Sopenharmony_ci				if (unfalloc)
101162306a36Sopenharmony_ci					continue;
101262306a36Sopenharmony_ci				nr_swaps_freed += !shmem_free_swap(mapping,
101362306a36Sopenharmony_ci							indices[i], folio);
101462306a36Sopenharmony_ci				continue;
101562306a36Sopenharmony_ci			}
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci			if (!unfalloc || !folio_test_uptodate(folio))
101862306a36Sopenharmony_ci				truncate_inode_folio(mapping, folio);
101962306a36Sopenharmony_ci			folio_unlock(folio);
102062306a36Sopenharmony_ci		}
102162306a36Sopenharmony_ci		folio_batch_remove_exceptionals(&fbatch);
102262306a36Sopenharmony_ci		folio_batch_release(&fbatch);
102362306a36Sopenharmony_ci		cond_resched();
102462306a36Sopenharmony_ci	}
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	/*
102762306a36Sopenharmony_ci	 * When undoing a failed fallocate, we want none of the partial folio
102862306a36Sopenharmony_ci	 * zeroing and splitting below, but shall want to truncate the whole
102962306a36Sopenharmony_ci	 * folio when !uptodate indicates that it was added by this fallocate,
103062306a36Sopenharmony_ci	 * even when [lstart, lend] covers only a part of the folio.
103162306a36Sopenharmony_ci	 */
103262306a36Sopenharmony_ci	if (unfalloc)
103362306a36Sopenharmony_ci		goto whole_folios;
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci	same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
103662306a36Sopenharmony_ci	folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
103762306a36Sopenharmony_ci	if (folio) {
103862306a36Sopenharmony_ci		same_folio = lend < folio_pos(folio) + folio_size(folio);
103962306a36Sopenharmony_ci		folio_mark_dirty(folio);
104062306a36Sopenharmony_ci		if (!truncate_inode_partial_folio(folio, lstart, lend)) {
104162306a36Sopenharmony_ci			start = folio_next_index(folio);
104262306a36Sopenharmony_ci			if (same_folio)
104362306a36Sopenharmony_ci				end = folio->index;
104462306a36Sopenharmony_ci		}
104562306a36Sopenharmony_ci		folio_unlock(folio);
104662306a36Sopenharmony_ci		folio_put(folio);
104762306a36Sopenharmony_ci		folio = NULL;
104862306a36Sopenharmony_ci	}
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci	if (!same_folio)
105162306a36Sopenharmony_ci		folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT);
105262306a36Sopenharmony_ci	if (folio) {
105362306a36Sopenharmony_ci		folio_mark_dirty(folio);
105462306a36Sopenharmony_ci		if (!truncate_inode_partial_folio(folio, lstart, lend))
105562306a36Sopenharmony_ci			end = folio->index;
105662306a36Sopenharmony_ci		folio_unlock(folio);
105762306a36Sopenharmony_ci		folio_put(folio);
105862306a36Sopenharmony_ci	}
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ciwhole_folios:
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci	index = start;
106362306a36Sopenharmony_ci	while (index < end) {
106462306a36Sopenharmony_ci		cond_resched();
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci		if (!find_get_entries(mapping, &index, end - 1, &fbatch,
106762306a36Sopenharmony_ci				indices)) {
106862306a36Sopenharmony_ci			/* If all gone or hole-punch or unfalloc, we're done */
106962306a36Sopenharmony_ci			if (index == start || end != -1)
107062306a36Sopenharmony_ci				break;
107162306a36Sopenharmony_ci			/* But if truncating, restart to make sure all gone */
107262306a36Sopenharmony_ci			index = start;
107362306a36Sopenharmony_ci			continue;
107462306a36Sopenharmony_ci		}
107562306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++) {
107662306a36Sopenharmony_ci			folio = fbatch.folios[i];
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci			if (xa_is_value(folio)) {
107962306a36Sopenharmony_ci				if (unfalloc)
108062306a36Sopenharmony_ci					continue;
108162306a36Sopenharmony_ci				if (shmem_free_swap(mapping, indices[i], folio)) {
108262306a36Sopenharmony_ci					/* Swap was replaced by page: retry */
108362306a36Sopenharmony_ci					index = indices[i];
108462306a36Sopenharmony_ci					break;
108562306a36Sopenharmony_ci				}
108662306a36Sopenharmony_ci				nr_swaps_freed++;
108762306a36Sopenharmony_ci				continue;
108862306a36Sopenharmony_ci			}
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci			folio_lock(folio);
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci			if (!unfalloc || !folio_test_uptodate(folio)) {
109362306a36Sopenharmony_ci				if (folio_mapping(folio) != mapping) {
109462306a36Sopenharmony_ci					/* Page was replaced by swap: retry */
109562306a36Sopenharmony_ci					folio_unlock(folio);
109662306a36Sopenharmony_ci					index = indices[i];
109762306a36Sopenharmony_ci					break;
109862306a36Sopenharmony_ci				}
109962306a36Sopenharmony_ci				VM_BUG_ON_FOLIO(folio_test_writeback(folio),
110062306a36Sopenharmony_ci						folio);
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_ci				if (!folio_test_large(folio)) {
110362306a36Sopenharmony_ci					truncate_inode_folio(mapping, folio);
110462306a36Sopenharmony_ci				} else if (truncate_inode_partial_folio(folio, lstart, lend)) {
110562306a36Sopenharmony_ci					/*
110662306a36Sopenharmony_ci					 * If we split a page, reset the loop so
110762306a36Sopenharmony_ci					 * that we pick up the new sub pages.
110862306a36Sopenharmony_ci					 * Otherwise the THP was entirely
110962306a36Sopenharmony_ci					 * dropped or the target range was
111062306a36Sopenharmony_ci					 * zeroed, so just continue the loop as
111162306a36Sopenharmony_ci					 * is.
111262306a36Sopenharmony_ci					 */
111362306a36Sopenharmony_ci					if (!folio_test_large(folio)) {
111462306a36Sopenharmony_ci						folio_unlock(folio);
111562306a36Sopenharmony_ci						index = start;
111662306a36Sopenharmony_ci						break;
111762306a36Sopenharmony_ci					}
111862306a36Sopenharmony_ci				}
111962306a36Sopenharmony_ci			}
112062306a36Sopenharmony_ci			folio_unlock(folio);
112162306a36Sopenharmony_ci		}
112262306a36Sopenharmony_ci		folio_batch_remove_exceptionals(&fbatch);
112362306a36Sopenharmony_ci		folio_batch_release(&fbatch);
112462306a36Sopenharmony_ci	}
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	shmem_recalc_inode(inode, 0, -nr_swaps_freed);
112762306a36Sopenharmony_ci}
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_civoid shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
113062306a36Sopenharmony_ci{
113162306a36Sopenharmony_ci	shmem_undo_range(inode, lstart, lend, false);
113262306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
113362306a36Sopenharmony_ci	inode_inc_iversion(inode);
113462306a36Sopenharmony_ci}
113562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_truncate_range);
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_cistatic int shmem_getattr(struct mnt_idmap *idmap,
113862306a36Sopenharmony_ci			 const struct path *path, struct kstat *stat,
113962306a36Sopenharmony_ci			 u32 request_mask, unsigned int query_flags)
114062306a36Sopenharmony_ci{
114162306a36Sopenharmony_ci	struct inode *inode = path->dentry->d_inode;
114262306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci	if (info->alloced - info->swapped != inode->i_mapping->nrpages)
114562306a36Sopenharmony_ci		shmem_recalc_inode(inode, 0, 0);
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	if (info->fsflags & FS_APPEND_FL)
114862306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_APPEND;
114962306a36Sopenharmony_ci	if (info->fsflags & FS_IMMUTABLE_FL)
115062306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_IMMUTABLE;
115162306a36Sopenharmony_ci	if (info->fsflags & FS_NODUMP_FL)
115262306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_NODUMP;
115362306a36Sopenharmony_ci	stat->attributes_mask |= (STATX_ATTR_APPEND |
115462306a36Sopenharmony_ci			STATX_ATTR_IMMUTABLE |
115562306a36Sopenharmony_ci			STATX_ATTR_NODUMP);
115662306a36Sopenharmony_ci	generic_fillattr(idmap, request_mask, inode, stat);
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	if (shmem_is_huge(inode, 0, false, NULL, 0))
115962306a36Sopenharmony_ci		stat->blksize = HPAGE_PMD_SIZE;
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci	if (request_mask & STATX_BTIME) {
116262306a36Sopenharmony_ci		stat->result_mask |= STATX_BTIME;
116362306a36Sopenharmony_ci		stat->btime.tv_sec = info->i_crtime.tv_sec;
116462306a36Sopenharmony_ci		stat->btime.tv_nsec = info->i_crtime.tv_nsec;
116562306a36Sopenharmony_ci	}
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	return 0;
116862306a36Sopenharmony_ci}
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_cistatic int shmem_setattr(struct mnt_idmap *idmap,
117162306a36Sopenharmony_ci			 struct dentry *dentry, struct iattr *attr)
117262306a36Sopenharmony_ci{
117362306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
117462306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
117562306a36Sopenharmony_ci	int error;
117662306a36Sopenharmony_ci	bool update_mtime = false;
117762306a36Sopenharmony_ci	bool update_ctime = true;
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci	error = setattr_prepare(idmap, dentry, attr);
118062306a36Sopenharmony_ci	if (error)
118162306a36Sopenharmony_ci		return error;
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci	if ((info->seals & F_SEAL_EXEC) && (attr->ia_valid & ATTR_MODE)) {
118462306a36Sopenharmony_ci		if ((inode->i_mode ^ attr->ia_mode) & 0111) {
118562306a36Sopenharmony_ci			return -EPERM;
118662306a36Sopenharmony_ci		}
118762306a36Sopenharmony_ci	}
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
119062306a36Sopenharmony_ci		loff_t oldsize = inode->i_size;
119162306a36Sopenharmony_ci		loff_t newsize = attr->ia_size;
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci		/* protected by i_rwsem */
119462306a36Sopenharmony_ci		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
119562306a36Sopenharmony_ci		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
119662306a36Sopenharmony_ci			return -EPERM;
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci		if (newsize != oldsize) {
119962306a36Sopenharmony_ci			error = shmem_reacct_size(SHMEM_I(inode)->flags,
120062306a36Sopenharmony_ci					oldsize, newsize);
120162306a36Sopenharmony_ci			if (error)
120262306a36Sopenharmony_ci				return error;
120362306a36Sopenharmony_ci			i_size_write(inode, newsize);
120462306a36Sopenharmony_ci			update_mtime = true;
120562306a36Sopenharmony_ci		} else {
120662306a36Sopenharmony_ci			update_ctime = false;
120762306a36Sopenharmony_ci		}
120862306a36Sopenharmony_ci		if (newsize <= oldsize) {
120962306a36Sopenharmony_ci			loff_t holebegin = round_up(newsize, PAGE_SIZE);
121062306a36Sopenharmony_ci			if (oldsize > holebegin)
121162306a36Sopenharmony_ci				unmap_mapping_range(inode->i_mapping,
121262306a36Sopenharmony_ci							holebegin, 0, 1);
121362306a36Sopenharmony_ci			if (info->alloced)
121462306a36Sopenharmony_ci				shmem_truncate_range(inode,
121562306a36Sopenharmony_ci							newsize, (loff_t)-1);
121662306a36Sopenharmony_ci			/* unmap again to remove racily COWed private pages */
121762306a36Sopenharmony_ci			if (oldsize > holebegin)
121862306a36Sopenharmony_ci				unmap_mapping_range(inode->i_mapping,
121962306a36Sopenharmony_ci							holebegin, 0, 1);
122062306a36Sopenharmony_ci		}
122162306a36Sopenharmony_ci	}
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	if (is_quota_modification(idmap, inode, attr)) {
122462306a36Sopenharmony_ci		error = dquot_initialize(inode);
122562306a36Sopenharmony_ci		if (error)
122662306a36Sopenharmony_ci			return error;
122762306a36Sopenharmony_ci	}
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci	/* Transfer quota accounting */
123062306a36Sopenharmony_ci	if (i_uid_needs_update(idmap, attr, inode) ||
123162306a36Sopenharmony_ci	    i_gid_needs_update(idmap, attr, inode)) {
123262306a36Sopenharmony_ci		error = dquot_transfer(idmap, inode, attr);
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci		if (error)
123562306a36Sopenharmony_ci			return error;
123662306a36Sopenharmony_ci	}
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci	setattr_copy(idmap, inode, attr);
123962306a36Sopenharmony_ci	if (attr->ia_valid & ATTR_MODE)
124062306a36Sopenharmony_ci		error = posix_acl_chmod(idmap, dentry, inode->i_mode);
124162306a36Sopenharmony_ci	if (!error && update_ctime) {
124262306a36Sopenharmony_ci		inode_set_ctime_current(inode);
124362306a36Sopenharmony_ci		if (update_mtime)
124462306a36Sopenharmony_ci			inode->i_mtime = inode_get_ctime(inode);
124562306a36Sopenharmony_ci		inode_inc_iversion(inode);
124662306a36Sopenharmony_ci	}
124762306a36Sopenharmony_ci	return error;
124862306a36Sopenharmony_ci}
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_cistatic void shmem_evict_inode(struct inode *inode)
125162306a36Sopenharmony_ci{
125262306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
125362306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
125462306a36Sopenharmony_ci	size_t freed = 0;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	if (shmem_mapping(inode->i_mapping)) {
125762306a36Sopenharmony_ci		shmem_unacct_size(info->flags, inode->i_size);
125862306a36Sopenharmony_ci		inode->i_size = 0;
125962306a36Sopenharmony_ci		mapping_set_exiting(inode->i_mapping);
126062306a36Sopenharmony_ci		shmem_truncate_range(inode, 0, (loff_t)-1);
126162306a36Sopenharmony_ci		if (!list_empty(&info->shrinklist)) {
126262306a36Sopenharmony_ci			spin_lock(&sbinfo->shrinklist_lock);
126362306a36Sopenharmony_ci			if (!list_empty(&info->shrinklist)) {
126462306a36Sopenharmony_ci				list_del_init(&info->shrinklist);
126562306a36Sopenharmony_ci				sbinfo->shrinklist_len--;
126662306a36Sopenharmony_ci			}
126762306a36Sopenharmony_ci			spin_unlock(&sbinfo->shrinklist_lock);
126862306a36Sopenharmony_ci		}
126962306a36Sopenharmony_ci		while (!list_empty(&info->swaplist)) {
127062306a36Sopenharmony_ci			/* Wait while shmem_unuse() is scanning this inode... */
127162306a36Sopenharmony_ci			wait_var_event(&info->stop_eviction,
127262306a36Sopenharmony_ci				       !atomic_read(&info->stop_eviction));
127362306a36Sopenharmony_ci			mutex_lock(&shmem_swaplist_mutex);
127462306a36Sopenharmony_ci			/* ...but beware of the race if we peeked too early */
127562306a36Sopenharmony_ci			if (!atomic_read(&info->stop_eviction))
127662306a36Sopenharmony_ci				list_del_init(&info->swaplist);
127762306a36Sopenharmony_ci			mutex_unlock(&shmem_swaplist_mutex);
127862306a36Sopenharmony_ci		}
127962306a36Sopenharmony_ci	}
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci	simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
128262306a36Sopenharmony_ci	shmem_free_inode(inode->i_sb, freed);
128362306a36Sopenharmony_ci	WARN_ON(inode->i_blocks);
128462306a36Sopenharmony_ci	clear_inode(inode);
128562306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
128662306a36Sopenharmony_ci	dquot_free_inode(inode);
128762306a36Sopenharmony_ci	dquot_drop(inode);
128862306a36Sopenharmony_ci#endif
128962306a36Sopenharmony_ci}
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_cistatic int shmem_find_swap_entries(struct address_space *mapping,
129262306a36Sopenharmony_ci				   pgoff_t start, struct folio_batch *fbatch,
129362306a36Sopenharmony_ci				   pgoff_t *indices, unsigned int type)
129462306a36Sopenharmony_ci{
129562306a36Sopenharmony_ci	XA_STATE(xas, &mapping->i_pages, start);
129662306a36Sopenharmony_ci	struct folio *folio;
129762306a36Sopenharmony_ci	swp_entry_t entry;
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci	rcu_read_lock();
130062306a36Sopenharmony_ci	xas_for_each(&xas, folio, ULONG_MAX) {
130162306a36Sopenharmony_ci		if (xas_retry(&xas, folio))
130262306a36Sopenharmony_ci			continue;
130362306a36Sopenharmony_ci
130462306a36Sopenharmony_ci		if (!xa_is_value(folio))
130562306a36Sopenharmony_ci			continue;
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_ci		entry = radix_to_swp_entry(folio);
130862306a36Sopenharmony_ci		/*
130962306a36Sopenharmony_ci		 * swapin error entries can be found in the mapping. But they're
131062306a36Sopenharmony_ci		 * deliberately ignored here as we've done everything we can do.
131162306a36Sopenharmony_ci		 */
131262306a36Sopenharmony_ci		if (swp_type(entry) != type)
131362306a36Sopenharmony_ci			continue;
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci		indices[folio_batch_count(fbatch)] = xas.xa_index;
131662306a36Sopenharmony_ci		if (!folio_batch_add(fbatch, folio))
131762306a36Sopenharmony_ci			break;
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci		if (need_resched()) {
132062306a36Sopenharmony_ci			xas_pause(&xas);
132162306a36Sopenharmony_ci			cond_resched_rcu();
132262306a36Sopenharmony_ci		}
132362306a36Sopenharmony_ci	}
132462306a36Sopenharmony_ci	rcu_read_unlock();
132562306a36Sopenharmony_ci
132662306a36Sopenharmony_ci	return xas.xa_index;
132762306a36Sopenharmony_ci}
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci/*
133062306a36Sopenharmony_ci * Move the swapped pages for an inode to page cache. Returns the count
133162306a36Sopenharmony_ci * of pages swapped in, or the error in case of failure.
133262306a36Sopenharmony_ci */
133362306a36Sopenharmony_cistatic int shmem_unuse_swap_entries(struct inode *inode,
133462306a36Sopenharmony_ci		struct folio_batch *fbatch, pgoff_t *indices)
133562306a36Sopenharmony_ci{
133662306a36Sopenharmony_ci	int i = 0;
133762306a36Sopenharmony_ci	int ret = 0;
133862306a36Sopenharmony_ci	int error = 0;
133962306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci	for (i = 0; i < folio_batch_count(fbatch); i++) {
134262306a36Sopenharmony_ci		struct folio *folio = fbatch->folios[i];
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci		if (!xa_is_value(folio))
134562306a36Sopenharmony_ci			continue;
134662306a36Sopenharmony_ci		error = shmem_swapin_folio(inode, indices[i],
134762306a36Sopenharmony_ci					  &folio, SGP_CACHE,
134862306a36Sopenharmony_ci					  mapping_gfp_mask(mapping),
134962306a36Sopenharmony_ci					  NULL, NULL);
135062306a36Sopenharmony_ci		if (error == 0) {
135162306a36Sopenharmony_ci			folio_unlock(folio);
135262306a36Sopenharmony_ci			folio_put(folio);
135362306a36Sopenharmony_ci			ret++;
135462306a36Sopenharmony_ci		}
135562306a36Sopenharmony_ci		if (error == -ENOMEM)
135662306a36Sopenharmony_ci			break;
135762306a36Sopenharmony_ci		error = 0;
135862306a36Sopenharmony_ci	}
135962306a36Sopenharmony_ci	return error ? error : ret;
136062306a36Sopenharmony_ci}
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci/*
136362306a36Sopenharmony_ci * If swap found in inode, free it and move page from swapcache to filecache.
136462306a36Sopenharmony_ci */
136562306a36Sopenharmony_cistatic int shmem_unuse_inode(struct inode *inode, unsigned int type)
136662306a36Sopenharmony_ci{
136762306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
136862306a36Sopenharmony_ci	pgoff_t start = 0;
136962306a36Sopenharmony_ci	struct folio_batch fbatch;
137062306a36Sopenharmony_ci	pgoff_t indices[PAGEVEC_SIZE];
137162306a36Sopenharmony_ci	int ret = 0;
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci	do {
137462306a36Sopenharmony_ci		folio_batch_init(&fbatch);
137562306a36Sopenharmony_ci		shmem_find_swap_entries(mapping, start, &fbatch, indices, type);
137662306a36Sopenharmony_ci		if (folio_batch_count(&fbatch) == 0) {
137762306a36Sopenharmony_ci			ret = 0;
137862306a36Sopenharmony_ci			break;
137962306a36Sopenharmony_ci		}
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci		ret = shmem_unuse_swap_entries(inode, &fbatch, indices);
138262306a36Sopenharmony_ci		if (ret < 0)
138362306a36Sopenharmony_ci			break;
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci		start = indices[folio_batch_count(&fbatch) - 1];
138662306a36Sopenharmony_ci	} while (true);
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci	return ret;
138962306a36Sopenharmony_ci}
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci/*
139262306a36Sopenharmony_ci * Read all the shared memory data that resides in the swap
139362306a36Sopenharmony_ci * device 'type' back into memory, so the swap device can be
139462306a36Sopenharmony_ci * unused.
139562306a36Sopenharmony_ci */
139662306a36Sopenharmony_ciint shmem_unuse(unsigned int type)
139762306a36Sopenharmony_ci{
139862306a36Sopenharmony_ci	struct shmem_inode_info *info, *next;
139962306a36Sopenharmony_ci	int error = 0;
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	if (list_empty(&shmem_swaplist))
140262306a36Sopenharmony_ci		return 0;
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci	mutex_lock(&shmem_swaplist_mutex);
140562306a36Sopenharmony_ci	list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
140662306a36Sopenharmony_ci		if (!info->swapped) {
140762306a36Sopenharmony_ci			list_del_init(&info->swaplist);
140862306a36Sopenharmony_ci			continue;
140962306a36Sopenharmony_ci		}
141062306a36Sopenharmony_ci		/*
141162306a36Sopenharmony_ci		 * Drop the swaplist mutex while searching the inode for swap;
141262306a36Sopenharmony_ci		 * but before doing so, make sure shmem_evict_inode() will not
141362306a36Sopenharmony_ci		 * remove placeholder inode from swaplist, nor let it be freed
141462306a36Sopenharmony_ci		 * (igrab() would protect from unlink, but not from unmount).
141562306a36Sopenharmony_ci		 */
141662306a36Sopenharmony_ci		atomic_inc(&info->stop_eviction);
141762306a36Sopenharmony_ci		mutex_unlock(&shmem_swaplist_mutex);
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci		error = shmem_unuse_inode(&info->vfs_inode, type);
142062306a36Sopenharmony_ci		cond_resched();
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_ci		mutex_lock(&shmem_swaplist_mutex);
142362306a36Sopenharmony_ci		next = list_next_entry(info, swaplist);
142462306a36Sopenharmony_ci		if (!info->swapped)
142562306a36Sopenharmony_ci			list_del_init(&info->swaplist);
142662306a36Sopenharmony_ci		if (atomic_dec_and_test(&info->stop_eviction))
142762306a36Sopenharmony_ci			wake_up_var(&info->stop_eviction);
142862306a36Sopenharmony_ci		if (error)
142962306a36Sopenharmony_ci			break;
143062306a36Sopenharmony_ci	}
143162306a36Sopenharmony_ci	mutex_unlock(&shmem_swaplist_mutex);
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_ci	return error;
143462306a36Sopenharmony_ci}
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci/*
143762306a36Sopenharmony_ci * Move the page from the page cache to the swap cache.
143862306a36Sopenharmony_ci */
143962306a36Sopenharmony_cistatic int shmem_writepage(struct page *page, struct writeback_control *wbc)
144062306a36Sopenharmony_ci{
144162306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
144262306a36Sopenharmony_ci	struct address_space *mapping = folio->mapping;
144362306a36Sopenharmony_ci	struct inode *inode = mapping->host;
144462306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
144562306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
144662306a36Sopenharmony_ci	swp_entry_t swap;
144762306a36Sopenharmony_ci	pgoff_t index;
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_ci	/*
145062306a36Sopenharmony_ci	 * Our capabilities prevent regular writeback or sync from ever calling
145162306a36Sopenharmony_ci	 * shmem_writepage; but a stacking filesystem might use ->writepage of
145262306a36Sopenharmony_ci	 * its underlying filesystem, in which case tmpfs should write out to
145362306a36Sopenharmony_ci	 * swap only in response to memory pressure, and not for the writeback
145462306a36Sopenharmony_ci	 * threads or sync.
145562306a36Sopenharmony_ci	 */
145662306a36Sopenharmony_ci	if (WARN_ON_ONCE(!wbc->for_reclaim))
145762306a36Sopenharmony_ci		goto redirty;
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap))
146062306a36Sopenharmony_ci		goto redirty;
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci	if (!total_swap_pages)
146362306a36Sopenharmony_ci		goto redirty;
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci	/*
146662306a36Sopenharmony_ci	 * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
146762306a36Sopenharmony_ci	 * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
146862306a36Sopenharmony_ci	 * and its shmem_writeback() needs them to be split when swapping.
146962306a36Sopenharmony_ci	 */
147062306a36Sopenharmony_ci	if (folio_test_large(folio)) {
147162306a36Sopenharmony_ci		/* Ensure the subpages are still dirty */
147262306a36Sopenharmony_ci		folio_test_set_dirty(folio);
147362306a36Sopenharmony_ci		if (split_huge_page(page) < 0)
147462306a36Sopenharmony_ci			goto redirty;
147562306a36Sopenharmony_ci		folio = page_folio(page);
147662306a36Sopenharmony_ci		folio_clear_dirty(folio);
147762306a36Sopenharmony_ci	}
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	index = folio->index;
148062306a36Sopenharmony_ci
148162306a36Sopenharmony_ci	/*
148262306a36Sopenharmony_ci	 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
148362306a36Sopenharmony_ci	 * value into swapfile.c, the only way we can correctly account for a
148462306a36Sopenharmony_ci	 * fallocated folio arriving here is now to initialize it and write it.
148562306a36Sopenharmony_ci	 *
148662306a36Sopenharmony_ci	 * That's okay for a folio already fallocated earlier, but if we have
148762306a36Sopenharmony_ci	 * not yet completed the fallocation, then (a) we want to keep track
148862306a36Sopenharmony_ci	 * of this folio in case we have to undo it, and (b) it may not be a
148962306a36Sopenharmony_ci	 * good idea to continue anyway, once we're pushing into swap.  So
149062306a36Sopenharmony_ci	 * reactivate the folio, and let shmem_fallocate() quit when too many.
149162306a36Sopenharmony_ci	 */
149262306a36Sopenharmony_ci	if (!folio_test_uptodate(folio)) {
149362306a36Sopenharmony_ci		if (inode->i_private) {
149462306a36Sopenharmony_ci			struct shmem_falloc *shmem_falloc;
149562306a36Sopenharmony_ci			spin_lock(&inode->i_lock);
149662306a36Sopenharmony_ci			shmem_falloc = inode->i_private;
149762306a36Sopenharmony_ci			if (shmem_falloc &&
149862306a36Sopenharmony_ci			    !shmem_falloc->waitq &&
149962306a36Sopenharmony_ci			    index >= shmem_falloc->start &&
150062306a36Sopenharmony_ci			    index < shmem_falloc->next)
150162306a36Sopenharmony_ci				shmem_falloc->nr_unswapped++;
150262306a36Sopenharmony_ci			else
150362306a36Sopenharmony_ci				shmem_falloc = NULL;
150462306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
150562306a36Sopenharmony_ci			if (shmem_falloc)
150662306a36Sopenharmony_ci				goto redirty;
150762306a36Sopenharmony_ci		}
150862306a36Sopenharmony_ci		folio_zero_range(folio, 0, folio_size(folio));
150962306a36Sopenharmony_ci		flush_dcache_folio(folio);
151062306a36Sopenharmony_ci		folio_mark_uptodate(folio);
151162306a36Sopenharmony_ci	}
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ci	swap = folio_alloc_swap(folio);
151462306a36Sopenharmony_ci	if (!swap.val)
151562306a36Sopenharmony_ci		goto redirty;
151662306a36Sopenharmony_ci
151762306a36Sopenharmony_ci	/*
151862306a36Sopenharmony_ci	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
151962306a36Sopenharmony_ci	 * if it's not already there.  Do it now before the folio is
152062306a36Sopenharmony_ci	 * moved to swap cache, when its pagelock no longer protects
152162306a36Sopenharmony_ci	 * the inode from eviction.  But don't unlock the mutex until
152262306a36Sopenharmony_ci	 * we've incremented swapped, because shmem_unuse_inode() will
152362306a36Sopenharmony_ci	 * prune a !swapped inode from the swaplist under this mutex.
152462306a36Sopenharmony_ci	 */
152562306a36Sopenharmony_ci	mutex_lock(&shmem_swaplist_mutex);
152662306a36Sopenharmony_ci	if (list_empty(&info->swaplist))
152762306a36Sopenharmony_ci		list_add(&info->swaplist, &shmem_swaplist);
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	if (add_to_swap_cache(folio, swap,
153062306a36Sopenharmony_ci			__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
153162306a36Sopenharmony_ci			NULL) == 0) {
153262306a36Sopenharmony_ci		shmem_recalc_inode(inode, 0, 1);
153362306a36Sopenharmony_ci		swap_shmem_alloc(swap);
153462306a36Sopenharmony_ci		shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ci		mutex_unlock(&shmem_swaplist_mutex);
153762306a36Sopenharmony_ci		BUG_ON(folio_mapped(folio));
153862306a36Sopenharmony_ci		swap_writepage(&folio->page, wbc);
153962306a36Sopenharmony_ci		return 0;
154062306a36Sopenharmony_ci	}
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	mutex_unlock(&shmem_swaplist_mutex);
154362306a36Sopenharmony_ci	put_swap_folio(folio, swap);
154462306a36Sopenharmony_ciredirty:
154562306a36Sopenharmony_ci	folio_mark_dirty(folio);
154662306a36Sopenharmony_ci	if (wbc->for_reclaim)
154762306a36Sopenharmony_ci		return AOP_WRITEPAGE_ACTIVATE;	/* Return with folio locked */
154862306a36Sopenharmony_ci	folio_unlock(folio);
154962306a36Sopenharmony_ci	return 0;
155062306a36Sopenharmony_ci}
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
155362306a36Sopenharmony_cistatic void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
155462306a36Sopenharmony_ci{
155562306a36Sopenharmony_ci	char buffer[64];
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ci	if (!mpol || mpol->mode == MPOL_DEFAULT)
155862306a36Sopenharmony_ci		return;		/* show nothing */
155962306a36Sopenharmony_ci
156062306a36Sopenharmony_ci	mpol_to_str(buffer, sizeof(buffer), mpol);
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	seq_printf(seq, ",mpol=%s", buffer);
156362306a36Sopenharmony_ci}
156462306a36Sopenharmony_ci
156562306a36Sopenharmony_cistatic struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
156662306a36Sopenharmony_ci{
156762306a36Sopenharmony_ci	struct mempolicy *mpol = NULL;
156862306a36Sopenharmony_ci	if (sbinfo->mpol) {
156962306a36Sopenharmony_ci		raw_spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
157062306a36Sopenharmony_ci		mpol = sbinfo->mpol;
157162306a36Sopenharmony_ci		mpol_get(mpol);
157262306a36Sopenharmony_ci		raw_spin_unlock(&sbinfo->stat_lock);
157362306a36Sopenharmony_ci	}
157462306a36Sopenharmony_ci	return mpol;
157562306a36Sopenharmony_ci}
157662306a36Sopenharmony_ci#else /* !CONFIG_NUMA || !CONFIG_TMPFS */
157762306a36Sopenharmony_cistatic inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
157862306a36Sopenharmony_ci{
157962306a36Sopenharmony_ci}
158062306a36Sopenharmony_cistatic inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
158162306a36Sopenharmony_ci{
158262306a36Sopenharmony_ci	return NULL;
158362306a36Sopenharmony_ci}
158462306a36Sopenharmony_ci#endif /* CONFIG_NUMA && CONFIG_TMPFS */
158562306a36Sopenharmony_ci#ifndef CONFIG_NUMA
158662306a36Sopenharmony_ci#define vm_policy vm_private_data
158762306a36Sopenharmony_ci#endif
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_cistatic void shmem_pseudo_vma_init(struct vm_area_struct *vma,
159062306a36Sopenharmony_ci		struct shmem_inode_info *info, pgoff_t index)
159162306a36Sopenharmony_ci{
159262306a36Sopenharmony_ci	/* Create a pseudo vma that just contains the policy */
159362306a36Sopenharmony_ci	vma_init(vma, NULL);
159462306a36Sopenharmony_ci	/* Bias interleave by inode number to distribute better across nodes */
159562306a36Sopenharmony_ci	vma->vm_pgoff = index + info->vfs_inode.i_ino;
159662306a36Sopenharmony_ci	vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
159762306a36Sopenharmony_ci}
159862306a36Sopenharmony_ci
159962306a36Sopenharmony_cistatic void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
160062306a36Sopenharmony_ci{
160162306a36Sopenharmony_ci	/* Drop reference taken by mpol_shared_policy_lookup() */
160262306a36Sopenharmony_ci	mpol_cond_put(vma->vm_policy);
160362306a36Sopenharmony_ci}
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_cistatic struct folio *shmem_swapin(swp_entry_t swap, gfp_t gfp,
160662306a36Sopenharmony_ci			struct shmem_inode_info *info, pgoff_t index)
160762306a36Sopenharmony_ci{
160862306a36Sopenharmony_ci	struct vm_area_struct pvma;
160962306a36Sopenharmony_ci	struct page *page;
161062306a36Sopenharmony_ci	struct vm_fault vmf = {
161162306a36Sopenharmony_ci		.vma = &pvma,
161262306a36Sopenharmony_ci	};
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci	shmem_pseudo_vma_init(&pvma, info, index);
161562306a36Sopenharmony_ci	page = swap_cluster_readahead(swap, gfp, &vmf);
161662306a36Sopenharmony_ci	shmem_pseudo_vma_destroy(&pvma);
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	if (!page)
161962306a36Sopenharmony_ci		return NULL;
162062306a36Sopenharmony_ci	return page_folio(page);
162162306a36Sopenharmony_ci}
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci/*
162462306a36Sopenharmony_ci * Make sure huge_gfp is always more limited than limit_gfp.
162562306a36Sopenharmony_ci * Some of the flags set permissions, while others set limitations.
162662306a36Sopenharmony_ci */
162762306a36Sopenharmony_cistatic gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
162862306a36Sopenharmony_ci{
162962306a36Sopenharmony_ci	gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
163062306a36Sopenharmony_ci	gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
163162306a36Sopenharmony_ci	gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
163262306a36Sopenharmony_ci	gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
163362306a36Sopenharmony_ci
163462306a36Sopenharmony_ci	/* Allow allocations only from the originally specified zones. */
163562306a36Sopenharmony_ci	result |= zoneflags;
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci	/*
163862306a36Sopenharmony_ci	 * Minimize the result gfp by taking the union with the deny flags,
163962306a36Sopenharmony_ci	 * and the intersection of the allow flags.
164062306a36Sopenharmony_ci	 */
164162306a36Sopenharmony_ci	result |= (limit_gfp & denyflags);
164262306a36Sopenharmony_ci	result |= (huge_gfp & limit_gfp) & allowflags;
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_ci	return result;
164562306a36Sopenharmony_ci}
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_cistatic struct folio *shmem_alloc_hugefolio(gfp_t gfp,
164862306a36Sopenharmony_ci		struct shmem_inode_info *info, pgoff_t index)
164962306a36Sopenharmony_ci{
165062306a36Sopenharmony_ci	struct vm_area_struct pvma;
165162306a36Sopenharmony_ci	struct address_space *mapping = info->vfs_inode.i_mapping;
165262306a36Sopenharmony_ci	pgoff_t hindex;
165362306a36Sopenharmony_ci	struct folio *folio;
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci	hindex = round_down(index, HPAGE_PMD_NR);
165662306a36Sopenharmony_ci	if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
165762306a36Sopenharmony_ci								XA_PRESENT))
165862306a36Sopenharmony_ci		return NULL;
165962306a36Sopenharmony_ci
166062306a36Sopenharmony_ci	shmem_pseudo_vma_init(&pvma, info, hindex);
166162306a36Sopenharmony_ci	folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
166262306a36Sopenharmony_ci	shmem_pseudo_vma_destroy(&pvma);
166362306a36Sopenharmony_ci	if (!folio)
166462306a36Sopenharmony_ci		count_vm_event(THP_FILE_FALLBACK);
166562306a36Sopenharmony_ci	return folio;
166662306a36Sopenharmony_ci}
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_cistatic struct folio *shmem_alloc_folio(gfp_t gfp,
166962306a36Sopenharmony_ci			struct shmem_inode_info *info, pgoff_t index)
167062306a36Sopenharmony_ci{
167162306a36Sopenharmony_ci	struct vm_area_struct pvma;
167262306a36Sopenharmony_ci	struct folio *folio;
167362306a36Sopenharmony_ci
167462306a36Sopenharmony_ci	shmem_pseudo_vma_init(&pvma, info, index);
167562306a36Sopenharmony_ci	folio = vma_alloc_folio(gfp, 0, &pvma, 0, false);
167662306a36Sopenharmony_ci	shmem_pseudo_vma_destroy(&pvma);
167762306a36Sopenharmony_ci
167862306a36Sopenharmony_ci	return folio;
167962306a36Sopenharmony_ci}
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_cistatic struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
168262306a36Sopenharmony_ci		pgoff_t index, bool huge)
168362306a36Sopenharmony_ci{
168462306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
168562306a36Sopenharmony_ci	struct folio *folio;
168662306a36Sopenharmony_ci	int nr;
168762306a36Sopenharmony_ci	int err;
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_ci	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
169062306a36Sopenharmony_ci		huge = false;
169162306a36Sopenharmony_ci	nr = huge ? HPAGE_PMD_NR : 1;
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci	err = shmem_inode_acct_block(inode, nr);
169462306a36Sopenharmony_ci	if (err)
169562306a36Sopenharmony_ci		goto failed;
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_ci	if (huge)
169862306a36Sopenharmony_ci		folio = shmem_alloc_hugefolio(gfp, info, index);
169962306a36Sopenharmony_ci	else
170062306a36Sopenharmony_ci		folio = shmem_alloc_folio(gfp, info, index);
170162306a36Sopenharmony_ci	if (folio) {
170262306a36Sopenharmony_ci		__folio_set_locked(folio);
170362306a36Sopenharmony_ci		__folio_set_swapbacked(folio);
170462306a36Sopenharmony_ci		return folio;
170562306a36Sopenharmony_ci	}
170662306a36Sopenharmony_ci
170762306a36Sopenharmony_ci	err = -ENOMEM;
170862306a36Sopenharmony_ci	shmem_inode_unacct_blocks(inode, nr);
170962306a36Sopenharmony_cifailed:
171062306a36Sopenharmony_ci	return ERR_PTR(err);
171162306a36Sopenharmony_ci}
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci/*
171462306a36Sopenharmony_ci * When a page is moved from swapcache to shmem filecache (either by the
171562306a36Sopenharmony_ci * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of
171662306a36Sopenharmony_ci * shmem_unuse_inode()), it may have been read in earlier from swap, in
171762306a36Sopenharmony_ci * ignorance of the mapping it belongs to.  If that mapping has special
171862306a36Sopenharmony_ci * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
171962306a36Sopenharmony_ci * we may need to copy to a suitable page before moving to filecache.
172062306a36Sopenharmony_ci *
172162306a36Sopenharmony_ci * In a future release, this may well be extended to respect cpuset and
172262306a36Sopenharmony_ci * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
172362306a36Sopenharmony_ci * but for now it is a simple matter of zone.
172462306a36Sopenharmony_ci */
172562306a36Sopenharmony_cistatic bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp)
172662306a36Sopenharmony_ci{
172762306a36Sopenharmony_ci	return folio_zonenum(folio) > gfp_zone(gfp);
172862306a36Sopenharmony_ci}
172962306a36Sopenharmony_ci
173062306a36Sopenharmony_cistatic int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
173162306a36Sopenharmony_ci				struct shmem_inode_info *info, pgoff_t index)
173262306a36Sopenharmony_ci{
173362306a36Sopenharmony_ci	struct folio *old, *new;
173462306a36Sopenharmony_ci	struct address_space *swap_mapping;
173562306a36Sopenharmony_ci	swp_entry_t entry;
173662306a36Sopenharmony_ci	pgoff_t swap_index;
173762306a36Sopenharmony_ci	int error;
173862306a36Sopenharmony_ci
173962306a36Sopenharmony_ci	old = *foliop;
174062306a36Sopenharmony_ci	entry = old->swap;
174162306a36Sopenharmony_ci	swap_index = swp_offset(entry);
174262306a36Sopenharmony_ci	swap_mapping = swap_address_space(entry);
174362306a36Sopenharmony_ci
174462306a36Sopenharmony_ci	/*
174562306a36Sopenharmony_ci	 * We have arrived here because our zones are constrained, so don't
174662306a36Sopenharmony_ci	 * limit chance of success by further cpuset and node constraints.
174762306a36Sopenharmony_ci	 */
174862306a36Sopenharmony_ci	gfp &= ~GFP_CONSTRAINT_MASK;
174962306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(folio_test_large(old), old);
175062306a36Sopenharmony_ci	new = shmem_alloc_folio(gfp, info, index);
175162306a36Sopenharmony_ci	if (!new)
175262306a36Sopenharmony_ci		return -ENOMEM;
175362306a36Sopenharmony_ci
175462306a36Sopenharmony_ci	folio_get(new);
175562306a36Sopenharmony_ci	folio_copy(new, old);
175662306a36Sopenharmony_ci	flush_dcache_folio(new);
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci	__folio_set_locked(new);
175962306a36Sopenharmony_ci	__folio_set_swapbacked(new);
176062306a36Sopenharmony_ci	folio_mark_uptodate(new);
176162306a36Sopenharmony_ci	new->swap = entry;
176262306a36Sopenharmony_ci	folio_set_swapcache(new);
176362306a36Sopenharmony_ci
176462306a36Sopenharmony_ci	/*
176562306a36Sopenharmony_ci	 * Our caller will very soon move newpage out of swapcache, but it's
176662306a36Sopenharmony_ci	 * a nice clean interface for us to replace oldpage by newpage there.
176762306a36Sopenharmony_ci	 */
176862306a36Sopenharmony_ci	xa_lock_irq(&swap_mapping->i_pages);
176962306a36Sopenharmony_ci	error = shmem_replace_entry(swap_mapping, swap_index, old, new);
177062306a36Sopenharmony_ci	if (!error) {
177162306a36Sopenharmony_ci		mem_cgroup_migrate(old, new);
177262306a36Sopenharmony_ci		__lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1);
177362306a36Sopenharmony_ci		__lruvec_stat_mod_folio(new, NR_SHMEM, 1);
177462306a36Sopenharmony_ci		__lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1);
177562306a36Sopenharmony_ci		__lruvec_stat_mod_folio(old, NR_SHMEM, -1);
177662306a36Sopenharmony_ci	}
177762306a36Sopenharmony_ci	xa_unlock_irq(&swap_mapping->i_pages);
177862306a36Sopenharmony_ci
177962306a36Sopenharmony_ci	if (unlikely(error)) {
178062306a36Sopenharmony_ci		/*
178162306a36Sopenharmony_ci		 * Is this possible?  I think not, now that our callers check
178262306a36Sopenharmony_ci		 * both PageSwapCache and page_private after getting page lock;
178362306a36Sopenharmony_ci		 * but be defensive.  Reverse old to newpage for clear and free.
178462306a36Sopenharmony_ci		 */
178562306a36Sopenharmony_ci		old = new;
178662306a36Sopenharmony_ci	} else {
178762306a36Sopenharmony_ci		folio_add_lru(new);
178862306a36Sopenharmony_ci		*foliop = new;
178962306a36Sopenharmony_ci	}
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_ci	folio_clear_swapcache(old);
179262306a36Sopenharmony_ci	old->private = NULL;
179362306a36Sopenharmony_ci
179462306a36Sopenharmony_ci	folio_unlock(old);
179562306a36Sopenharmony_ci	folio_put_refs(old, 2);
179662306a36Sopenharmony_ci	return error;
179762306a36Sopenharmony_ci}
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_cistatic void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
180062306a36Sopenharmony_ci					 struct folio *folio, swp_entry_t swap)
180162306a36Sopenharmony_ci{
180262306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
180362306a36Sopenharmony_ci	swp_entry_t swapin_error;
180462306a36Sopenharmony_ci	void *old;
180562306a36Sopenharmony_ci
180662306a36Sopenharmony_ci	swapin_error = make_poisoned_swp_entry();
180762306a36Sopenharmony_ci	old = xa_cmpxchg_irq(&mapping->i_pages, index,
180862306a36Sopenharmony_ci			     swp_to_radix_entry(swap),
180962306a36Sopenharmony_ci			     swp_to_radix_entry(swapin_error), 0);
181062306a36Sopenharmony_ci	if (old != swp_to_radix_entry(swap))
181162306a36Sopenharmony_ci		return;
181262306a36Sopenharmony_ci
181362306a36Sopenharmony_ci	folio_wait_writeback(folio);
181462306a36Sopenharmony_ci	delete_from_swap_cache(folio);
181562306a36Sopenharmony_ci	/*
181662306a36Sopenharmony_ci	 * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
181762306a36Sopenharmony_ci	 * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
181862306a36Sopenharmony_ci	 * in shmem_evict_inode().
181962306a36Sopenharmony_ci	 */
182062306a36Sopenharmony_ci	shmem_recalc_inode(inode, -1, -1);
182162306a36Sopenharmony_ci	swap_free(swap);
182262306a36Sopenharmony_ci}
182362306a36Sopenharmony_ci
182462306a36Sopenharmony_ci/*
182562306a36Sopenharmony_ci * Swap in the folio pointed to by *foliop.
182662306a36Sopenharmony_ci * Caller has to make sure that *foliop contains a valid swapped folio.
182762306a36Sopenharmony_ci * Returns 0 and the folio in foliop if success. On failure, returns the
182862306a36Sopenharmony_ci * error code and NULL in *foliop.
182962306a36Sopenharmony_ci */
183062306a36Sopenharmony_cistatic int shmem_swapin_folio(struct inode *inode, pgoff_t index,
183162306a36Sopenharmony_ci			     struct folio **foliop, enum sgp_type sgp,
183262306a36Sopenharmony_ci			     gfp_t gfp, struct vm_area_struct *vma,
183362306a36Sopenharmony_ci			     vm_fault_t *fault_type)
183462306a36Sopenharmony_ci{
183562306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
183662306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
183762306a36Sopenharmony_ci	struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
183862306a36Sopenharmony_ci	struct swap_info_struct *si;
183962306a36Sopenharmony_ci	struct folio *folio = NULL;
184062306a36Sopenharmony_ci	swp_entry_t swap;
184162306a36Sopenharmony_ci	int error;
184262306a36Sopenharmony_ci
184362306a36Sopenharmony_ci	VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
184462306a36Sopenharmony_ci	swap = radix_to_swp_entry(*foliop);
184562306a36Sopenharmony_ci	*foliop = NULL;
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci	if (is_poisoned_swp_entry(swap))
184862306a36Sopenharmony_ci		return -EIO;
184962306a36Sopenharmony_ci
185062306a36Sopenharmony_ci	si = get_swap_device(swap);
185162306a36Sopenharmony_ci	if (!si) {
185262306a36Sopenharmony_ci		if (!shmem_confirm_swap(mapping, index, swap))
185362306a36Sopenharmony_ci			return -EEXIST;
185462306a36Sopenharmony_ci		else
185562306a36Sopenharmony_ci			return -EINVAL;
185662306a36Sopenharmony_ci	}
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	/* Look it up and read it in.. */
185962306a36Sopenharmony_ci	folio = swap_cache_get_folio(swap, NULL, 0);
186062306a36Sopenharmony_ci	if (!folio) {
186162306a36Sopenharmony_ci		/* Or update major stats only when swapin succeeds?? */
186262306a36Sopenharmony_ci		if (fault_type) {
186362306a36Sopenharmony_ci			*fault_type |= VM_FAULT_MAJOR;
186462306a36Sopenharmony_ci			count_vm_event(PGMAJFAULT);
186562306a36Sopenharmony_ci			count_memcg_event_mm(charge_mm, PGMAJFAULT);
186662306a36Sopenharmony_ci		}
186762306a36Sopenharmony_ci		/* Here we actually start the io */
186862306a36Sopenharmony_ci		folio = shmem_swapin(swap, gfp, info, index);
186962306a36Sopenharmony_ci		if (!folio) {
187062306a36Sopenharmony_ci			error = -ENOMEM;
187162306a36Sopenharmony_ci			goto failed;
187262306a36Sopenharmony_ci		}
187362306a36Sopenharmony_ci	}
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci	/* We have to do this with folio locked to prevent races */
187662306a36Sopenharmony_ci	folio_lock(folio);
187762306a36Sopenharmony_ci	if (!folio_test_swapcache(folio) ||
187862306a36Sopenharmony_ci	    folio->swap.val != swap.val ||
187962306a36Sopenharmony_ci	    !shmem_confirm_swap(mapping, index, swap)) {
188062306a36Sopenharmony_ci		error = -EEXIST;
188162306a36Sopenharmony_ci		goto unlock;
188262306a36Sopenharmony_ci	}
188362306a36Sopenharmony_ci	if (!folio_test_uptodate(folio)) {
188462306a36Sopenharmony_ci		error = -EIO;
188562306a36Sopenharmony_ci		goto failed;
188662306a36Sopenharmony_ci	}
188762306a36Sopenharmony_ci	folio_wait_writeback(folio);
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_ci	/*
189062306a36Sopenharmony_ci	 * Some architectures may have to restore extra metadata to the
189162306a36Sopenharmony_ci	 * folio after reading from swap.
189262306a36Sopenharmony_ci	 */
189362306a36Sopenharmony_ci	arch_swap_restore(swap, folio);
189462306a36Sopenharmony_ci
189562306a36Sopenharmony_ci	if (shmem_should_replace_folio(folio, gfp)) {
189662306a36Sopenharmony_ci		error = shmem_replace_folio(&folio, gfp, info, index);
189762306a36Sopenharmony_ci		if (error)
189862306a36Sopenharmony_ci			goto failed;
189962306a36Sopenharmony_ci	}
190062306a36Sopenharmony_ci
190162306a36Sopenharmony_ci	error = shmem_add_to_page_cache(folio, mapping, index,
190262306a36Sopenharmony_ci					swp_to_radix_entry(swap), gfp,
190362306a36Sopenharmony_ci					charge_mm);
190462306a36Sopenharmony_ci	if (error)
190562306a36Sopenharmony_ci		goto failed;
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci	shmem_recalc_inode(inode, 0, -1);
190862306a36Sopenharmony_ci
190962306a36Sopenharmony_ci	if (sgp == SGP_WRITE)
191062306a36Sopenharmony_ci		folio_mark_accessed(folio);
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	delete_from_swap_cache(folio);
191362306a36Sopenharmony_ci	folio_mark_dirty(folio);
191462306a36Sopenharmony_ci	swap_free(swap);
191562306a36Sopenharmony_ci	put_swap_device(si);
191662306a36Sopenharmony_ci
191762306a36Sopenharmony_ci	*foliop = folio;
191862306a36Sopenharmony_ci	return 0;
191962306a36Sopenharmony_cifailed:
192062306a36Sopenharmony_ci	if (!shmem_confirm_swap(mapping, index, swap))
192162306a36Sopenharmony_ci		error = -EEXIST;
192262306a36Sopenharmony_ci	if (error == -EIO)
192362306a36Sopenharmony_ci		shmem_set_folio_swapin_error(inode, index, folio, swap);
192462306a36Sopenharmony_ciunlock:
192562306a36Sopenharmony_ci	if (folio) {
192662306a36Sopenharmony_ci		folio_unlock(folio);
192762306a36Sopenharmony_ci		folio_put(folio);
192862306a36Sopenharmony_ci	}
192962306a36Sopenharmony_ci	put_swap_device(si);
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_ci	return error;
193262306a36Sopenharmony_ci}
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci/*
193562306a36Sopenharmony_ci * shmem_get_folio_gfp - find page in cache, or get from swap, or allocate
193662306a36Sopenharmony_ci *
193762306a36Sopenharmony_ci * If we allocate a new one we do not mark it dirty. That's up to the
193862306a36Sopenharmony_ci * vm. If we swap it in we mark it dirty since we also free the swap
193962306a36Sopenharmony_ci * entry since a page cannot live in both the swap and page cache.
194062306a36Sopenharmony_ci *
194162306a36Sopenharmony_ci * vma, vmf, and fault_type are only supplied by shmem_fault:
194262306a36Sopenharmony_ci * otherwise they are NULL.
194362306a36Sopenharmony_ci */
194462306a36Sopenharmony_cistatic int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
194562306a36Sopenharmony_ci		struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
194662306a36Sopenharmony_ci		struct vm_area_struct *vma, struct vm_fault *vmf,
194762306a36Sopenharmony_ci		vm_fault_t *fault_type)
194862306a36Sopenharmony_ci{
194962306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
195062306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
195162306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo;
195262306a36Sopenharmony_ci	struct mm_struct *charge_mm;
195362306a36Sopenharmony_ci	struct folio *folio;
195462306a36Sopenharmony_ci	pgoff_t hindex;
195562306a36Sopenharmony_ci	gfp_t huge_gfp;
195662306a36Sopenharmony_ci	int error;
195762306a36Sopenharmony_ci	int once = 0;
195862306a36Sopenharmony_ci	int alloced = 0;
195962306a36Sopenharmony_ci
196062306a36Sopenharmony_ci	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
196162306a36Sopenharmony_ci		return -EFBIG;
196262306a36Sopenharmony_cirepeat:
196362306a36Sopenharmony_ci	if (sgp <= SGP_CACHE &&
196462306a36Sopenharmony_ci	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
196562306a36Sopenharmony_ci		return -EINVAL;
196662306a36Sopenharmony_ci	}
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	sbinfo = SHMEM_SB(inode->i_sb);
196962306a36Sopenharmony_ci	charge_mm = vma ? vma->vm_mm : NULL;
197062306a36Sopenharmony_ci
197162306a36Sopenharmony_ci	folio = filemap_get_entry(mapping, index);
197262306a36Sopenharmony_ci	if (folio && vma && userfaultfd_minor(vma)) {
197362306a36Sopenharmony_ci		if (!xa_is_value(folio))
197462306a36Sopenharmony_ci			folio_put(folio);
197562306a36Sopenharmony_ci		*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
197662306a36Sopenharmony_ci		return 0;
197762306a36Sopenharmony_ci	}
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_ci	if (xa_is_value(folio)) {
198062306a36Sopenharmony_ci		error = shmem_swapin_folio(inode, index, &folio,
198162306a36Sopenharmony_ci					  sgp, gfp, vma, fault_type);
198262306a36Sopenharmony_ci		if (error == -EEXIST)
198362306a36Sopenharmony_ci			goto repeat;
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci		*foliop = folio;
198662306a36Sopenharmony_ci		return error;
198762306a36Sopenharmony_ci	}
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci	if (folio) {
199062306a36Sopenharmony_ci		folio_lock(folio);
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci		/* Has the folio been truncated or swapped out? */
199362306a36Sopenharmony_ci		if (unlikely(folio->mapping != mapping)) {
199462306a36Sopenharmony_ci			folio_unlock(folio);
199562306a36Sopenharmony_ci			folio_put(folio);
199662306a36Sopenharmony_ci			goto repeat;
199762306a36Sopenharmony_ci		}
199862306a36Sopenharmony_ci		if (sgp == SGP_WRITE)
199962306a36Sopenharmony_ci			folio_mark_accessed(folio);
200062306a36Sopenharmony_ci		if (folio_test_uptodate(folio))
200162306a36Sopenharmony_ci			goto out;
200262306a36Sopenharmony_ci		/* fallocated folio */
200362306a36Sopenharmony_ci		if (sgp != SGP_READ)
200462306a36Sopenharmony_ci			goto clear;
200562306a36Sopenharmony_ci		folio_unlock(folio);
200662306a36Sopenharmony_ci		folio_put(folio);
200762306a36Sopenharmony_ci	}
200862306a36Sopenharmony_ci
200962306a36Sopenharmony_ci	/*
201062306a36Sopenharmony_ci	 * SGP_READ: succeed on hole, with NULL folio, letting caller zero.
201162306a36Sopenharmony_ci	 * SGP_NOALLOC: fail on hole, with NULL folio, letting caller fail.
201262306a36Sopenharmony_ci	 */
201362306a36Sopenharmony_ci	*foliop = NULL;
201462306a36Sopenharmony_ci	if (sgp == SGP_READ)
201562306a36Sopenharmony_ci		return 0;
201662306a36Sopenharmony_ci	if (sgp == SGP_NOALLOC)
201762306a36Sopenharmony_ci		return -ENOENT;
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_ci	/*
202062306a36Sopenharmony_ci	 * Fast cache lookup and swap lookup did not find it: allocate.
202162306a36Sopenharmony_ci	 */
202262306a36Sopenharmony_ci
202362306a36Sopenharmony_ci	if (vma && userfaultfd_missing(vma)) {
202462306a36Sopenharmony_ci		*fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
202562306a36Sopenharmony_ci		return 0;
202662306a36Sopenharmony_ci	}
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci	if (!shmem_is_huge(inode, index, false,
202962306a36Sopenharmony_ci			   vma ? vma->vm_mm : NULL, vma ? vma->vm_flags : 0))
203062306a36Sopenharmony_ci		goto alloc_nohuge;
203162306a36Sopenharmony_ci
203262306a36Sopenharmony_ci	huge_gfp = vma_thp_gfp_mask(vma);
203362306a36Sopenharmony_ci	huge_gfp = limit_gfp_mask(huge_gfp, gfp);
203462306a36Sopenharmony_ci	folio = shmem_alloc_and_acct_folio(huge_gfp, inode, index, true);
203562306a36Sopenharmony_ci	if (IS_ERR(folio)) {
203662306a36Sopenharmony_cialloc_nohuge:
203762306a36Sopenharmony_ci		folio = shmem_alloc_and_acct_folio(gfp, inode, index, false);
203862306a36Sopenharmony_ci	}
203962306a36Sopenharmony_ci	if (IS_ERR(folio)) {
204062306a36Sopenharmony_ci		int retry = 5;
204162306a36Sopenharmony_ci
204262306a36Sopenharmony_ci		error = PTR_ERR(folio);
204362306a36Sopenharmony_ci		folio = NULL;
204462306a36Sopenharmony_ci		if (error != -ENOSPC)
204562306a36Sopenharmony_ci			goto unlock;
204662306a36Sopenharmony_ci		/*
204762306a36Sopenharmony_ci		 * Try to reclaim some space by splitting a large folio
204862306a36Sopenharmony_ci		 * beyond i_size on the filesystem.
204962306a36Sopenharmony_ci		 */
205062306a36Sopenharmony_ci		while (retry--) {
205162306a36Sopenharmony_ci			int ret;
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_ci			ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
205462306a36Sopenharmony_ci			if (ret == SHRINK_STOP)
205562306a36Sopenharmony_ci				break;
205662306a36Sopenharmony_ci			if (ret)
205762306a36Sopenharmony_ci				goto alloc_nohuge;
205862306a36Sopenharmony_ci		}
205962306a36Sopenharmony_ci		goto unlock;
206062306a36Sopenharmony_ci	}
206162306a36Sopenharmony_ci
206262306a36Sopenharmony_ci	hindex = round_down(index, folio_nr_pages(folio));
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci	if (sgp == SGP_WRITE)
206562306a36Sopenharmony_ci		__folio_set_referenced(folio);
206662306a36Sopenharmony_ci
206762306a36Sopenharmony_ci	error = shmem_add_to_page_cache(folio, mapping, hindex,
206862306a36Sopenharmony_ci					NULL, gfp & GFP_RECLAIM_MASK,
206962306a36Sopenharmony_ci					charge_mm);
207062306a36Sopenharmony_ci	if (error)
207162306a36Sopenharmony_ci		goto unacct;
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci	folio_add_lru(folio);
207462306a36Sopenharmony_ci	shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
207562306a36Sopenharmony_ci	alloced = true;
207662306a36Sopenharmony_ci
207762306a36Sopenharmony_ci	if (folio_test_pmd_mappable(folio) &&
207862306a36Sopenharmony_ci	    DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
207962306a36Sopenharmony_ci					folio_next_index(folio) - 1) {
208062306a36Sopenharmony_ci		/*
208162306a36Sopenharmony_ci		 * Part of the large folio is beyond i_size: subject
208262306a36Sopenharmony_ci		 * to shrink under memory pressure.
208362306a36Sopenharmony_ci		 */
208462306a36Sopenharmony_ci		spin_lock(&sbinfo->shrinklist_lock);
208562306a36Sopenharmony_ci		/*
208662306a36Sopenharmony_ci		 * _careful to defend against unlocked access to
208762306a36Sopenharmony_ci		 * ->shrink_list in shmem_unused_huge_shrink()
208862306a36Sopenharmony_ci		 */
208962306a36Sopenharmony_ci		if (list_empty_careful(&info->shrinklist)) {
209062306a36Sopenharmony_ci			list_add_tail(&info->shrinklist,
209162306a36Sopenharmony_ci				      &sbinfo->shrinklist);
209262306a36Sopenharmony_ci			sbinfo->shrinklist_len++;
209362306a36Sopenharmony_ci		}
209462306a36Sopenharmony_ci		spin_unlock(&sbinfo->shrinklist_lock);
209562306a36Sopenharmony_ci	}
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci	/*
209862306a36Sopenharmony_ci	 * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio.
209962306a36Sopenharmony_ci	 */
210062306a36Sopenharmony_ci	if (sgp == SGP_FALLOC)
210162306a36Sopenharmony_ci		sgp = SGP_WRITE;
210262306a36Sopenharmony_ciclear:
210362306a36Sopenharmony_ci	/*
210462306a36Sopenharmony_ci	 * Let SGP_WRITE caller clear ends if write does not fill folio;
210562306a36Sopenharmony_ci	 * but SGP_FALLOC on a folio fallocated earlier must initialize
210662306a36Sopenharmony_ci	 * it now, lest undo on failure cancel our earlier guarantee.
210762306a36Sopenharmony_ci	 */
210862306a36Sopenharmony_ci	if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) {
210962306a36Sopenharmony_ci		long i, n = folio_nr_pages(folio);
211062306a36Sopenharmony_ci
211162306a36Sopenharmony_ci		for (i = 0; i < n; i++)
211262306a36Sopenharmony_ci			clear_highpage(folio_page(folio, i));
211362306a36Sopenharmony_ci		flush_dcache_folio(folio);
211462306a36Sopenharmony_ci		folio_mark_uptodate(folio);
211562306a36Sopenharmony_ci	}
211662306a36Sopenharmony_ci
211762306a36Sopenharmony_ci	/* Perhaps the file has been truncated since we checked */
211862306a36Sopenharmony_ci	if (sgp <= SGP_CACHE &&
211962306a36Sopenharmony_ci	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
212062306a36Sopenharmony_ci		if (alloced) {
212162306a36Sopenharmony_ci			folio_clear_dirty(folio);
212262306a36Sopenharmony_ci			filemap_remove_folio(folio);
212362306a36Sopenharmony_ci			shmem_recalc_inode(inode, 0, 0);
212462306a36Sopenharmony_ci		}
212562306a36Sopenharmony_ci		error = -EINVAL;
212662306a36Sopenharmony_ci		goto unlock;
212762306a36Sopenharmony_ci	}
212862306a36Sopenharmony_ciout:
212962306a36Sopenharmony_ci	*foliop = folio;
213062306a36Sopenharmony_ci	return 0;
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci	/*
213362306a36Sopenharmony_ci	 * Error recovery.
213462306a36Sopenharmony_ci	 */
213562306a36Sopenharmony_ciunacct:
213662306a36Sopenharmony_ci	shmem_inode_unacct_blocks(inode, folio_nr_pages(folio));
213762306a36Sopenharmony_ci
213862306a36Sopenharmony_ci	if (folio_test_large(folio)) {
213962306a36Sopenharmony_ci		folio_unlock(folio);
214062306a36Sopenharmony_ci		folio_put(folio);
214162306a36Sopenharmony_ci		goto alloc_nohuge;
214262306a36Sopenharmony_ci	}
214362306a36Sopenharmony_ciunlock:
214462306a36Sopenharmony_ci	if (folio) {
214562306a36Sopenharmony_ci		folio_unlock(folio);
214662306a36Sopenharmony_ci		folio_put(folio);
214762306a36Sopenharmony_ci	}
214862306a36Sopenharmony_ci	if (error == -ENOSPC && !once++) {
214962306a36Sopenharmony_ci		shmem_recalc_inode(inode, 0, 0);
215062306a36Sopenharmony_ci		goto repeat;
215162306a36Sopenharmony_ci	}
215262306a36Sopenharmony_ci	if (error == -EEXIST)
215362306a36Sopenharmony_ci		goto repeat;
215462306a36Sopenharmony_ci	return error;
215562306a36Sopenharmony_ci}
215662306a36Sopenharmony_ci
215762306a36Sopenharmony_ciint shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
215862306a36Sopenharmony_ci		enum sgp_type sgp)
215962306a36Sopenharmony_ci{
216062306a36Sopenharmony_ci	return shmem_get_folio_gfp(inode, index, foliop, sgp,
216162306a36Sopenharmony_ci			mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
216262306a36Sopenharmony_ci}
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_ci/*
216562306a36Sopenharmony_ci * This is like autoremove_wake_function, but it removes the wait queue
216662306a36Sopenharmony_ci * entry unconditionally - even if something else had already woken the
216762306a36Sopenharmony_ci * target.
216862306a36Sopenharmony_ci */
216962306a36Sopenharmony_cistatic int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
217062306a36Sopenharmony_ci{
217162306a36Sopenharmony_ci	int ret = default_wake_function(wait, mode, sync, key);
217262306a36Sopenharmony_ci	list_del_init(&wait->entry);
217362306a36Sopenharmony_ci	return ret;
217462306a36Sopenharmony_ci}
217562306a36Sopenharmony_ci
217662306a36Sopenharmony_cistatic vm_fault_t shmem_fault(struct vm_fault *vmf)
217762306a36Sopenharmony_ci{
217862306a36Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
217962306a36Sopenharmony_ci	struct inode *inode = file_inode(vma->vm_file);
218062306a36Sopenharmony_ci	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
218162306a36Sopenharmony_ci	struct folio *folio = NULL;
218262306a36Sopenharmony_ci	int err;
218362306a36Sopenharmony_ci	vm_fault_t ret = VM_FAULT_LOCKED;
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_ci	/*
218662306a36Sopenharmony_ci	 * Trinity finds that probing a hole which tmpfs is punching can
218762306a36Sopenharmony_ci	 * prevent the hole-punch from ever completing: which in turn
218862306a36Sopenharmony_ci	 * locks writers out with its hold on i_rwsem.  So refrain from
218962306a36Sopenharmony_ci	 * faulting pages into the hole while it's being punched.  Although
219062306a36Sopenharmony_ci	 * shmem_undo_range() does remove the additions, it may be unable to
219162306a36Sopenharmony_ci	 * keep up, as each new page needs its own unmap_mapping_range() call,
219262306a36Sopenharmony_ci	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
219362306a36Sopenharmony_ci	 *
219462306a36Sopenharmony_ci	 * It does not matter if we sometimes reach this check just before the
219562306a36Sopenharmony_ci	 * hole-punch begins, so that one fault then races with the punch:
219662306a36Sopenharmony_ci	 * we just need to make racing faults a rare case.
219762306a36Sopenharmony_ci	 *
219862306a36Sopenharmony_ci	 * The implementation below would be much simpler if we just used a
219962306a36Sopenharmony_ci	 * standard mutex or completion: but we cannot take i_rwsem in fault,
220062306a36Sopenharmony_ci	 * and bloating every shmem inode for this unlikely case would be sad.
220162306a36Sopenharmony_ci	 */
220262306a36Sopenharmony_ci	if (unlikely(inode->i_private)) {
220362306a36Sopenharmony_ci		struct shmem_falloc *shmem_falloc;
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
220662306a36Sopenharmony_ci		shmem_falloc = inode->i_private;
220762306a36Sopenharmony_ci		if (shmem_falloc &&
220862306a36Sopenharmony_ci		    shmem_falloc->waitq &&
220962306a36Sopenharmony_ci		    vmf->pgoff >= shmem_falloc->start &&
221062306a36Sopenharmony_ci		    vmf->pgoff < shmem_falloc->next) {
221162306a36Sopenharmony_ci			struct file *fpin;
221262306a36Sopenharmony_ci			wait_queue_head_t *shmem_falloc_waitq;
221362306a36Sopenharmony_ci			DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
221462306a36Sopenharmony_ci
221562306a36Sopenharmony_ci			ret = VM_FAULT_NOPAGE;
221662306a36Sopenharmony_ci			fpin = maybe_unlock_mmap_for_io(vmf, NULL);
221762306a36Sopenharmony_ci			if (fpin)
221862306a36Sopenharmony_ci				ret = VM_FAULT_RETRY;
221962306a36Sopenharmony_ci
222062306a36Sopenharmony_ci			shmem_falloc_waitq = shmem_falloc->waitq;
222162306a36Sopenharmony_ci			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
222262306a36Sopenharmony_ci					TASK_UNINTERRUPTIBLE);
222362306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
222462306a36Sopenharmony_ci			schedule();
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci			/*
222762306a36Sopenharmony_ci			 * shmem_falloc_waitq points into the shmem_fallocate()
222862306a36Sopenharmony_ci			 * stack of the hole-punching task: shmem_falloc_waitq
222962306a36Sopenharmony_ci			 * is usually invalid by the time we reach here, but
223062306a36Sopenharmony_ci			 * finish_wait() does not dereference it in that case;
223162306a36Sopenharmony_ci			 * though i_lock needed lest racing with wake_up_all().
223262306a36Sopenharmony_ci			 */
223362306a36Sopenharmony_ci			spin_lock(&inode->i_lock);
223462306a36Sopenharmony_ci			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
223562306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
223662306a36Sopenharmony_ci
223762306a36Sopenharmony_ci			if (fpin)
223862306a36Sopenharmony_ci				fput(fpin);
223962306a36Sopenharmony_ci			return ret;
224062306a36Sopenharmony_ci		}
224162306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
224262306a36Sopenharmony_ci	}
224362306a36Sopenharmony_ci
224462306a36Sopenharmony_ci	err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
224562306a36Sopenharmony_ci				  gfp, vma, vmf, &ret);
224662306a36Sopenharmony_ci	if (err)
224762306a36Sopenharmony_ci		return vmf_error(err);
224862306a36Sopenharmony_ci	if (folio)
224962306a36Sopenharmony_ci		vmf->page = folio_file_page(folio, vmf->pgoff);
225062306a36Sopenharmony_ci	return ret;
225162306a36Sopenharmony_ci}
225262306a36Sopenharmony_ci
225362306a36Sopenharmony_ciunsigned long shmem_get_unmapped_area(struct file *file,
225462306a36Sopenharmony_ci				      unsigned long uaddr, unsigned long len,
225562306a36Sopenharmony_ci				      unsigned long pgoff, unsigned long flags)
225662306a36Sopenharmony_ci{
225762306a36Sopenharmony_ci	unsigned long (*get_area)(struct file *,
225862306a36Sopenharmony_ci		unsigned long, unsigned long, unsigned long, unsigned long);
225962306a36Sopenharmony_ci	unsigned long addr;
226062306a36Sopenharmony_ci	unsigned long offset;
226162306a36Sopenharmony_ci	unsigned long inflated_len;
226262306a36Sopenharmony_ci	unsigned long inflated_addr;
226362306a36Sopenharmony_ci	unsigned long inflated_offset;
226462306a36Sopenharmony_ci
226562306a36Sopenharmony_ci	if (len > TASK_SIZE)
226662306a36Sopenharmony_ci		return -ENOMEM;
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci	get_area = current->mm->get_unmapped_area;
226962306a36Sopenharmony_ci	addr = get_area(file, uaddr, len, pgoff, flags);
227062306a36Sopenharmony_ci
227162306a36Sopenharmony_ci	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
227262306a36Sopenharmony_ci		return addr;
227362306a36Sopenharmony_ci	if (IS_ERR_VALUE(addr))
227462306a36Sopenharmony_ci		return addr;
227562306a36Sopenharmony_ci	if (addr & ~PAGE_MASK)
227662306a36Sopenharmony_ci		return addr;
227762306a36Sopenharmony_ci	if (addr > TASK_SIZE - len)
227862306a36Sopenharmony_ci		return addr;
227962306a36Sopenharmony_ci
228062306a36Sopenharmony_ci	if (shmem_huge == SHMEM_HUGE_DENY)
228162306a36Sopenharmony_ci		return addr;
228262306a36Sopenharmony_ci	if (len < HPAGE_PMD_SIZE)
228362306a36Sopenharmony_ci		return addr;
228462306a36Sopenharmony_ci	if (flags & MAP_FIXED)
228562306a36Sopenharmony_ci		return addr;
228662306a36Sopenharmony_ci	/*
228762306a36Sopenharmony_ci	 * Our priority is to support MAP_SHARED mapped hugely;
228862306a36Sopenharmony_ci	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
228962306a36Sopenharmony_ci	 * But if caller specified an address hint and we allocated area there
229062306a36Sopenharmony_ci	 * successfully, respect that as before.
229162306a36Sopenharmony_ci	 */
229262306a36Sopenharmony_ci	if (uaddr == addr)
229362306a36Sopenharmony_ci		return addr;
229462306a36Sopenharmony_ci
229562306a36Sopenharmony_ci	if (shmem_huge != SHMEM_HUGE_FORCE) {
229662306a36Sopenharmony_ci		struct super_block *sb;
229762306a36Sopenharmony_ci
229862306a36Sopenharmony_ci		if (file) {
229962306a36Sopenharmony_ci			VM_BUG_ON(file->f_op != &shmem_file_operations);
230062306a36Sopenharmony_ci			sb = file_inode(file)->i_sb;
230162306a36Sopenharmony_ci		} else {
230262306a36Sopenharmony_ci			/*
230362306a36Sopenharmony_ci			 * Called directly from mm/mmap.c, or drivers/char/mem.c
230462306a36Sopenharmony_ci			 * for "/dev/zero", to create a shared anonymous object.
230562306a36Sopenharmony_ci			 */
230662306a36Sopenharmony_ci			if (IS_ERR(shm_mnt))
230762306a36Sopenharmony_ci				return addr;
230862306a36Sopenharmony_ci			sb = shm_mnt->mnt_sb;
230962306a36Sopenharmony_ci		}
231062306a36Sopenharmony_ci		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
231162306a36Sopenharmony_ci			return addr;
231262306a36Sopenharmony_ci	}
231362306a36Sopenharmony_ci
231462306a36Sopenharmony_ci	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
231562306a36Sopenharmony_ci	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
231662306a36Sopenharmony_ci		return addr;
231762306a36Sopenharmony_ci	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
231862306a36Sopenharmony_ci		return addr;
231962306a36Sopenharmony_ci
232062306a36Sopenharmony_ci	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
232162306a36Sopenharmony_ci	if (inflated_len > TASK_SIZE)
232262306a36Sopenharmony_ci		return addr;
232362306a36Sopenharmony_ci	if (inflated_len < len)
232462306a36Sopenharmony_ci		return addr;
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_ci	inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
232762306a36Sopenharmony_ci	if (IS_ERR_VALUE(inflated_addr))
232862306a36Sopenharmony_ci		return addr;
232962306a36Sopenharmony_ci	if (inflated_addr & ~PAGE_MASK)
233062306a36Sopenharmony_ci		return addr;
233162306a36Sopenharmony_ci
233262306a36Sopenharmony_ci	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
233362306a36Sopenharmony_ci	inflated_addr += offset - inflated_offset;
233462306a36Sopenharmony_ci	if (inflated_offset > offset)
233562306a36Sopenharmony_ci		inflated_addr += HPAGE_PMD_SIZE;
233662306a36Sopenharmony_ci
233762306a36Sopenharmony_ci	if (inflated_addr > TASK_SIZE - len)
233862306a36Sopenharmony_ci		return addr;
233962306a36Sopenharmony_ci	return inflated_addr;
234062306a36Sopenharmony_ci}
234162306a36Sopenharmony_ci
234262306a36Sopenharmony_ci#ifdef CONFIG_NUMA
234362306a36Sopenharmony_cistatic int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
234462306a36Sopenharmony_ci{
234562306a36Sopenharmony_ci	struct inode *inode = file_inode(vma->vm_file);
234662306a36Sopenharmony_ci	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
234762306a36Sopenharmony_ci}
234862306a36Sopenharmony_ci
234962306a36Sopenharmony_cistatic struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
235062306a36Sopenharmony_ci					  unsigned long addr)
235162306a36Sopenharmony_ci{
235262306a36Sopenharmony_ci	struct inode *inode = file_inode(vma->vm_file);
235362306a36Sopenharmony_ci	pgoff_t index;
235462306a36Sopenharmony_ci
235562306a36Sopenharmony_ci	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
235662306a36Sopenharmony_ci	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
235762306a36Sopenharmony_ci}
235862306a36Sopenharmony_ci#endif
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ciint shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
236162306a36Sopenharmony_ci{
236262306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
236362306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
236462306a36Sopenharmony_ci	int retval = -ENOMEM;
236562306a36Sopenharmony_ci
236662306a36Sopenharmony_ci	/*
236762306a36Sopenharmony_ci	 * What serializes the accesses to info->flags?
236862306a36Sopenharmony_ci	 * ipc_lock_object() when called from shmctl_do_lock(),
236962306a36Sopenharmony_ci	 * no serialization needed when called from shm_destroy().
237062306a36Sopenharmony_ci	 */
237162306a36Sopenharmony_ci	if (lock && !(info->flags & VM_LOCKED)) {
237262306a36Sopenharmony_ci		if (!user_shm_lock(inode->i_size, ucounts))
237362306a36Sopenharmony_ci			goto out_nomem;
237462306a36Sopenharmony_ci		info->flags |= VM_LOCKED;
237562306a36Sopenharmony_ci		mapping_set_unevictable(file->f_mapping);
237662306a36Sopenharmony_ci	}
237762306a36Sopenharmony_ci	if (!lock && (info->flags & VM_LOCKED) && ucounts) {
237862306a36Sopenharmony_ci		user_shm_unlock(inode->i_size, ucounts);
237962306a36Sopenharmony_ci		info->flags &= ~VM_LOCKED;
238062306a36Sopenharmony_ci		mapping_clear_unevictable(file->f_mapping);
238162306a36Sopenharmony_ci	}
238262306a36Sopenharmony_ci	retval = 0;
238362306a36Sopenharmony_ci
238462306a36Sopenharmony_ciout_nomem:
238562306a36Sopenharmony_ci	return retval;
238662306a36Sopenharmony_ci}
238762306a36Sopenharmony_ci
238862306a36Sopenharmony_cistatic int shmem_mmap(struct file *file, struct vm_area_struct *vma)
238962306a36Sopenharmony_ci{
239062306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
239162306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
239262306a36Sopenharmony_ci	int ret;
239362306a36Sopenharmony_ci
239462306a36Sopenharmony_ci	ret = seal_check_future_write(info->seals, vma);
239562306a36Sopenharmony_ci	if (ret)
239662306a36Sopenharmony_ci		return ret;
239762306a36Sopenharmony_ci
239862306a36Sopenharmony_ci	/* arm64 - allow memory tagging on RAM-based files */
239962306a36Sopenharmony_ci	vm_flags_set(vma, VM_MTE_ALLOWED);
240062306a36Sopenharmony_ci
240162306a36Sopenharmony_ci	file_accessed(file);
240262306a36Sopenharmony_ci	/* This is anonymous shared memory if it is unlinked at the time of mmap */
240362306a36Sopenharmony_ci	if (inode->i_nlink)
240462306a36Sopenharmony_ci		vma->vm_ops = &shmem_vm_ops;
240562306a36Sopenharmony_ci	else
240662306a36Sopenharmony_ci		vma->vm_ops = &shmem_anon_vm_ops;
240762306a36Sopenharmony_ci	return 0;
240862306a36Sopenharmony_ci}
240962306a36Sopenharmony_ci
241062306a36Sopenharmony_cistatic int shmem_file_open(struct inode *inode, struct file *file)
241162306a36Sopenharmony_ci{
241262306a36Sopenharmony_ci	file->f_mode |= FMODE_CAN_ODIRECT;
241362306a36Sopenharmony_ci	return generic_file_open(inode, file);
241462306a36Sopenharmony_ci}
241562306a36Sopenharmony_ci
241662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
241762306a36Sopenharmony_cistatic int shmem_initxattrs(struct inode *, const struct xattr *, void *);
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ci/*
242062306a36Sopenharmony_ci * chattr's fsflags are unrelated to extended attributes,
242162306a36Sopenharmony_ci * but tmpfs has chosen to enable them under the same config option.
242262306a36Sopenharmony_ci */
242362306a36Sopenharmony_cistatic void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
242462306a36Sopenharmony_ci{
242562306a36Sopenharmony_ci	unsigned int i_flags = 0;
242662306a36Sopenharmony_ci
242762306a36Sopenharmony_ci	if (fsflags & FS_NOATIME_FL)
242862306a36Sopenharmony_ci		i_flags |= S_NOATIME;
242962306a36Sopenharmony_ci	if (fsflags & FS_APPEND_FL)
243062306a36Sopenharmony_ci		i_flags |= S_APPEND;
243162306a36Sopenharmony_ci	if (fsflags & FS_IMMUTABLE_FL)
243262306a36Sopenharmony_ci		i_flags |= S_IMMUTABLE;
243362306a36Sopenharmony_ci	/*
243462306a36Sopenharmony_ci	 * But FS_NODUMP_FL does not require any action in i_flags.
243562306a36Sopenharmony_ci	 */
243662306a36Sopenharmony_ci	inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE);
243762306a36Sopenharmony_ci}
243862306a36Sopenharmony_ci#else
243962306a36Sopenharmony_cistatic void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
244062306a36Sopenharmony_ci{
244162306a36Sopenharmony_ci}
244262306a36Sopenharmony_ci#define shmem_initxattrs NULL
244362306a36Sopenharmony_ci#endif
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_cistatic struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
244662306a36Sopenharmony_ci{
244762306a36Sopenharmony_ci	return &SHMEM_I(inode)->dir_offsets;
244862306a36Sopenharmony_ci}
244962306a36Sopenharmony_ci
245062306a36Sopenharmony_cistatic struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
245162306a36Sopenharmony_ci					     struct super_block *sb,
245262306a36Sopenharmony_ci					     struct inode *dir, umode_t mode,
245362306a36Sopenharmony_ci					     dev_t dev, unsigned long flags)
245462306a36Sopenharmony_ci{
245562306a36Sopenharmony_ci	struct inode *inode;
245662306a36Sopenharmony_ci	struct shmem_inode_info *info;
245762306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
245862306a36Sopenharmony_ci	ino_t ino;
245962306a36Sopenharmony_ci	int err;
246062306a36Sopenharmony_ci
246162306a36Sopenharmony_ci	err = shmem_reserve_inode(sb, &ino);
246262306a36Sopenharmony_ci	if (err)
246362306a36Sopenharmony_ci		return ERR_PTR(err);
246462306a36Sopenharmony_ci
246562306a36Sopenharmony_ci
246662306a36Sopenharmony_ci	inode = new_inode(sb);
246762306a36Sopenharmony_ci	if (!inode) {
246862306a36Sopenharmony_ci		shmem_free_inode(sb, 0);
246962306a36Sopenharmony_ci		return ERR_PTR(-ENOSPC);
247062306a36Sopenharmony_ci	}
247162306a36Sopenharmony_ci
247262306a36Sopenharmony_ci	inode->i_ino = ino;
247362306a36Sopenharmony_ci	inode_init_owner(idmap, inode, dir, mode);
247462306a36Sopenharmony_ci	inode->i_blocks = 0;
247562306a36Sopenharmony_ci	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
247662306a36Sopenharmony_ci	inode->i_generation = get_random_u32();
247762306a36Sopenharmony_ci	info = SHMEM_I(inode);
247862306a36Sopenharmony_ci	memset(info, 0, (char *)inode - (char *)info);
247962306a36Sopenharmony_ci	spin_lock_init(&info->lock);
248062306a36Sopenharmony_ci	atomic_set(&info->stop_eviction, 0);
248162306a36Sopenharmony_ci	info->seals = F_SEAL_SEAL;
248262306a36Sopenharmony_ci	info->flags = flags & VM_NORESERVE;
248362306a36Sopenharmony_ci	info->i_crtime = inode->i_mtime;
248462306a36Sopenharmony_ci	info->fsflags = (dir == NULL) ? 0 :
248562306a36Sopenharmony_ci		SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
248662306a36Sopenharmony_ci	if (info->fsflags)
248762306a36Sopenharmony_ci		shmem_set_inode_flags(inode, info->fsflags);
248862306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->shrinklist);
248962306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->swaplist);
249062306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->swaplist);
249162306a36Sopenharmony_ci	if (sbinfo->noswap)
249262306a36Sopenharmony_ci		mapping_set_unevictable(inode->i_mapping);
249362306a36Sopenharmony_ci	simple_xattrs_init(&info->xattrs);
249462306a36Sopenharmony_ci	cache_no_acl(inode);
249562306a36Sopenharmony_ci	mapping_set_large_folios(inode->i_mapping);
249662306a36Sopenharmony_ci
249762306a36Sopenharmony_ci	switch (mode & S_IFMT) {
249862306a36Sopenharmony_ci	default:
249962306a36Sopenharmony_ci		inode->i_op = &shmem_special_inode_operations;
250062306a36Sopenharmony_ci		init_special_inode(inode, mode, dev);
250162306a36Sopenharmony_ci		break;
250262306a36Sopenharmony_ci	case S_IFREG:
250362306a36Sopenharmony_ci		inode->i_mapping->a_ops = &shmem_aops;
250462306a36Sopenharmony_ci		inode->i_op = &shmem_inode_operations;
250562306a36Sopenharmony_ci		inode->i_fop = &shmem_file_operations;
250662306a36Sopenharmony_ci		mpol_shared_policy_init(&info->policy,
250762306a36Sopenharmony_ci					 shmem_get_sbmpol(sbinfo));
250862306a36Sopenharmony_ci		break;
250962306a36Sopenharmony_ci	case S_IFDIR:
251062306a36Sopenharmony_ci		inc_nlink(inode);
251162306a36Sopenharmony_ci		/* Some things misbehave if size == 0 on a directory */
251262306a36Sopenharmony_ci		inode->i_size = 2 * BOGO_DIRENT_SIZE;
251362306a36Sopenharmony_ci		inode->i_op = &shmem_dir_inode_operations;
251462306a36Sopenharmony_ci		inode->i_fop = &simple_offset_dir_operations;
251562306a36Sopenharmony_ci		simple_offset_init(shmem_get_offset_ctx(inode));
251662306a36Sopenharmony_ci		break;
251762306a36Sopenharmony_ci	case S_IFLNK:
251862306a36Sopenharmony_ci		/*
251962306a36Sopenharmony_ci		 * Must not load anything in the rbtree,
252062306a36Sopenharmony_ci		 * mpol_free_shared_policy will not be called.
252162306a36Sopenharmony_ci		 */
252262306a36Sopenharmony_ci		mpol_shared_policy_init(&info->policy, NULL);
252362306a36Sopenharmony_ci		break;
252462306a36Sopenharmony_ci	}
252562306a36Sopenharmony_ci
252662306a36Sopenharmony_ci	lockdep_annotate_inode_mutex_key(inode);
252762306a36Sopenharmony_ci	return inode;
252862306a36Sopenharmony_ci}
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
253162306a36Sopenharmony_cistatic struct inode *shmem_get_inode(struct mnt_idmap *idmap,
253262306a36Sopenharmony_ci				     struct super_block *sb, struct inode *dir,
253362306a36Sopenharmony_ci				     umode_t mode, dev_t dev, unsigned long flags)
253462306a36Sopenharmony_ci{
253562306a36Sopenharmony_ci	int err;
253662306a36Sopenharmony_ci	struct inode *inode;
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_ci	inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
253962306a36Sopenharmony_ci	if (IS_ERR(inode))
254062306a36Sopenharmony_ci		return inode;
254162306a36Sopenharmony_ci
254262306a36Sopenharmony_ci	err = dquot_initialize(inode);
254362306a36Sopenharmony_ci	if (err)
254462306a36Sopenharmony_ci		goto errout;
254562306a36Sopenharmony_ci
254662306a36Sopenharmony_ci	err = dquot_alloc_inode(inode);
254762306a36Sopenharmony_ci	if (err) {
254862306a36Sopenharmony_ci		dquot_drop(inode);
254962306a36Sopenharmony_ci		goto errout;
255062306a36Sopenharmony_ci	}
255162306a36Sopenharmony_ci	return inode;
255262306a36Sopenharmony_ci
255362306a36Sopenharmony_cierrout:
255462306a36Sopenharmony_ci	inode->i_flags |= S_NOQUOTA;
255562306a36Sopenharmony_ci	iput(inode);
255662306a36Sopenharmony_ci	return ERR_PTR(err);
255762306a36Sopenharmony_ci}
255862306a36Sopenharmony_ci#else
255962306a36Sopenharmony_cistatic inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
256062306a36Sopenharmony_ci				     struct super_block *sb, struct inode *dir,
256162306a36Sopenharmony_ci				     umode_t mode, dev_t dev, unsigned long flags)
256262306a36Sopenharmony_ci{
256362306a36Sopenharmony_ci	return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
256462306a36Sopenharmony_ci}
256562306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */
256662306a36Sopenharmony_ci
256762306a36Sopenharmony_ci#ifdef CONFIG_USERFAULTFD
256862306a36Sopenharmony_ciint shmem_mfill_atomic_pte(pmd_t *dst_pmd,
256962306a36Sopenharmony_ci			   struct vm_area_struct *dst_vma,
257062306a36Sopenharmony_ci			   unsigned long dst_addr,
257162306a36Sopenharmony_ci			   unsigned long src_addr,
257262306a36Sopenharmony_ci			   uffd_flags_t flags,
257362306a36Sopenharmony_ci			   struct folio **foliop)
257462306a36Sopenharmony_ci{
257562306a36Sopenharmony_ci	struct inode *inode = file_inode(dst_vma->vm_file);
257662306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
257762306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
257862306a36Sopenharmony_ci	gfp_t gfp = mapping_gfp_mask(mapping);
257962306a36Sopenharmony_ci	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
258062306a36Sopenharmony_ci	void *page_kaddr;
258162306a36Sopenharmony_ci	struct folio *folio;
258262306a36Sopenharmony_ci	int ret;
258362306a36Sopenharmony_ci	pgoff_t max_off;
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci	if (shmem_inode_acct_block(inode, 1)) {
258662306a36Sopenharmony_ci		/*
258762306a36Sopenharmony_ci		 * We may have got a page, returned -ENOENT triggering a retry,
258862306a36Sopenharmony_ci		 * and now we find ourselves with -ENOMEM. Release the page, to
258962306a36Sopenharmony_ci		 * avoid a BUG_ON in our caller.
259062306a36Sopenharmony_ci		 */
259162306a36Sopenharmony_ci		if (unlikely(*foliop)) {
259262306a36Sopenharmony_ci			folio_put(*foliop);
259362306a36Sopenharmony_ci			*foliop = NULL;
259462306a36Sopenharmony_ci		}
259562306a36Sopenharmony_ci		return -ENOMEM;
259662306a36Sopenharmony_ci	}
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci	if (!*foliop) {
259962306a36Sopenharmony_ci		ret = -ENOMEM;
260062306a36Sopenharmony_ci		folio = shmem_alloc_folio(gfp, info, pgoff);
260162306a36Sopenharmony_ci		if (!folio)
260262306a36Sopenharmony_ci			goto out_unacct_blocks;
260362306a36Sopenharmony_ci
260462306a36Sopenharmony_ci		if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) {
260562306a36Sopenharmony_ci			page_kaddr = kmap_local_folio(folio, 0);
260662306a36Sopenharmony_ci			/*
260762306a36Sopenharmony_ci			 * The read mmap_lock is held here.  Despite the
260862306a36Sopenharmony_ci			 * mmap_lock being read recursive a deadlock is still
260962306a36Sopenharmony_ci			 * possible if a writer has taken a lock.  For example:
261062306a36Sopenharmony_ci			 *
261162306a36Sopenharmony_ci			 * process A thread 1 takes read lock on own mmap_lock
261262306a36Sopenharmony_ci			 * process A thread 2 calls mmap, blocks taking write lock
261362306a36Sopenharmony_ci			 * process B thread 1 takes page fault, read lock on own mmap lock
261462306a36Sopenharmony_ci			 * process B thread 2 calls mmap, blocks taking write lock
261562306a36Sopenharmony_ci			 * process A thread 1 blocks taking read lock on process B
261662306a36Sopenharmony_ci			 * process B thread 1 blocks taking read lock on process A
261762306a36Sopenharmony_ci			 *
261862306a36Sopenharmony_ci			 * Disable page faults to prevent potential deadlock
261962306a36Sopenharmony_ci			 * and retry the copy outside the mmap_lock.
262062306a36Sopenharmony_ci			 */
262162306a36Sopenharmony_ci			pagefault_disable();
262262306a36Sopenharmony_ci			ret = copy_from_user(page_kaddr,
262362306a36Sopenharmony_ci					     (const void __user *)src_addr,
262462306a36Sopenharmony_ci					     PAGE_SIZE);
262562306a36Sopenharmony_ci			pagefault_enable();
262662306a36Sopenharmony_ci			kunmap_local(page_kaddr);
262762306a36Sopenharmony_ci
262862306a36Sopenharmony_ci			/* fallback to copy_from_user outside mmap_lock */
262962306a36Sopenharmony_ci			if (unlikely(ret)) {
263062306a36Sopenharmony_ci				*foliop = folio;
263162306a36Sopenharmony_ci				ret = -ENOENT;
263262306a36Sopenharmony_ci				/* don't free the page */
263362306a36Sopenharmony_ci				goto out_unacct_blocks;
263462306a36Sopenharmony_ci			}
263562306a36Sopenharmony_ci
263662306a36Sopenharmony_ci			flush_dcache_folio(folio);
263762306a36Sopenharmony_ci		} else {		/* ZEROPAGE */
263862306a36Sopenharmony_ci			clear_user_highpage(&folio->page, dst_addr);
263962306a36Sopenharmony_ci		}
264062306a36Sopenharmony_ci	} else {
264162306a36Sopenharmony_ci		folio = *foliop;
264262306a36Sopenharmony_ci		VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
264362306a36Sopenharmony_ci		*foliop = NULL;
264462306a36Sopenharmony_ci	}
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	VM_BUG_ON(folio_test_locked(folio));
264762306a36Sopenharmony_ci	VM_BUG_ON(folio_test_swapbacked(folio));
264862306a36Sopenharmony_ci	__folio_set_locked(folio);
264962306a36Sopenharmony_ci	__folio_set_swapbacked(folio);
265062306a36Sopenharmony_ci	__folio_mark_uptodate(folio);
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	ret = -EFAULT;
265362306a36Sopenharmony_ci	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
265462306a36Sopenharmony_ci	if (unlikely(pgoff >= max_off))
265562306a36Sopenharmony_ci		goto out_release;
265662306a36Sopenharmony_ci
265762306a36Sopenharmony_ci	ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL,
265862306a36Sopenharmony_ci				      gfp & GFP_RECLAIM_MASK, dst_vma->vm_mm);
265962306a36Sopenharmony_ci	if (ret)
266062306a36Sopenharmony_ci		goto out_release;
266162306a36Sopenharmony_ci
266262306a36Sopenharmony_ci	ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
266362306a36Sopenharmony_ci				       &folio->page, true, flags);
266462306a36Sopenharmony_ci	if (ret)
266562306a36Sopenharmony_ci		goto out_delete_from_cache;
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ci	shmem_recalc_inode(inode, 1, 0);
266862306a36Sopenharmony_ci	folio_unlock(folio);
266962306a36Sopenharmony_ci	return 0;
267062306a36Sopenharmony_ciout_delete_from_cache:
267162306a36Sopenharmony_ci	filemap_remove_folio(folio);
267262306a36Sopenharmony_ciout_release:
267362306a36Sopenharmony_ci	folio_unlock(folio);
267462306a36Sopenharmony_ci	folio_put(folio);
267562306a36Sopenharmony_ciout_unacct_blocks:
267662306a36Sopenharmony_ci	shmem_inode_unacct_blocks(inode, 1);
267762306a36Sopenharmony_ci	return ret;
267862306a36Sopenharmony_ci}
267962306a36Sopenharmony_ci#endif /* CONFIG_USERFAULTFD */
268062306a36Sopenharmony_ci
268162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
268262306a36Sopenharmony_cistatic const struct inode_operations shmem_symlink_inode_operations;
268362306a36Sopenharmony_cistatic const struct inode_operations shmem_short_symlink_operations;
268462306a36Sopenharmony_ci
268562306a36Sopenharmony_cistatic int
268662306a36Sopenharmony_cishmem_write_begin(struct file *file, struct address_space *mapping,
268762306a36Sopenharmony_ci			loff_t pos, unsigned len,
268862306a36Sopenharmony_ci			struct page **pagep, void **fsdata)
268962306a36Sopenharmony_ci{
269062306a36Sopenharmony_ci	struct inode *inode = mapping->host;
269162306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
269262306a36Sopenharmony_ci	pgoff_t index = pos >> PAGE_SHIFT;
269362306a36Sopenharmony_ci	struct folio *folio;
269462306a36Sopenharmony_ci	int ret = 0;
269562306a36Sopenharmony_ci
269662306a36Sopenharmony_ci	/* i_rwsem is held by caller */
269762306a36Sopenharmony_ci	if (unlikely(info->seals & (F_SEAL_GROW |
269862306a36Sopenharmony_ci				   F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
269962306a36Sopenharmony_ci		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
270062306a36Sopenharmony_ci			return -EPERM;
270162306a36Sopenharmony_ci		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
270262306a36Sopenharmony_ci			return -EPERM;
270362306a36Sopenharmony_ci	}
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_ci	ret = shmem_get_folio(inode, index, &folio, SGP_WRITE);
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci	if (ret)
270862306a36Sopenharmony_ci		return ret;
270962306a36Sopenharmony_ci
271062306a36Sopenharmony_ci	*pagep = folio_file_page(folio, index);
271162306a36Sopenharmony_ci	if (PageHWPoison(*pagep)) {
271262306a36Sopenharmony_ci		folio_unlock(folio);
271362306a36Sopenharmony_ci		folio_put(folio);
271462306a36Sopenharmony_ci		*pagep = NULL;
271562306a36Sopenharmony_ci		return -EIO;
271662306a36Sopenharmony_ci	}
271762306a36Sopenharmony_ci
271862306a36Sopenharmony_ci	return 0;
271962306a36Sopenharmony_ci}
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_cistatic int
272262306a36Sopenharmony_cishmem_write_end(struct file *file, struct address_space *mapping,
272362306a36Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
272462306a36Sopenharmony_ci			struct page *page, void *fsdata)
272562306a36Sopenharmony_ci{
272662306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
272762306a36Sopenharmony_ci	struct inode *inode = mapping->host;
272862306a36Sopenharmony_ci
272962306a36Sopenharmony_ci	if (pos + copied > inode->i_size)
273062306a36Sopenharmony_ci		i_size_write(inode, pos + copied);
273162306a36Sopenharmony_ci
273262306a36Sopenharmony_ci	if (!folio_test_uptodate(folio)) {
273362306a36Sopenharmony_ci		if (copied < folio_size(folio)) {
273462306a36Sopenharmony_ci			size_t from = offset_in_folio(folio, pos);
273562306a36Sopenharmony_ci			folio_zero_segments(folio, 0, from,
273662306a36Sopenharmony_ci					from + copied, folio_size(folio));
273762306a36Sopenharmony_ci		}
273862306a36Sopenharmony_ci		folio_mark_uptodate(folio);
273962306a36Sopenharmony_ci	}
274062306a36Sopenharmony_ci	folio_mark_dirty(folio);
274162306a36Sopenharmony_ci	folio_unlock(folio);
274262306a36Sopenharmony_ci	folio_put(folio);
274362306a36Sopenharmony_ci
274462306a36Sopenharmony_ci	return copied;
274562306a36Sopenharmony_ci}
274662306a36Sopenharmony_ci
274762306a36Sopenharmony_cistatic ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
274862306a36Sopenharmony_ci{
274962306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
275062306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
275162306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
275262306a36Sopenharmony_ci	pgoff_t index;
275362306a36Sopenharmony_ci	unsigned long offset;
275462306a36Sopenharmony_ci	int error = 0;
275562306a36Sopenharmony_ci	ssize_t retval = 0;
275662306a36Sopenharmony_ci	loff_t *ppos = &iocb->ki_pos;
275762306a36Sopenharmony_ci
275862306a36Sopenharmony_ci	index = *ppos >> PAGE_SHIFT;
275962306a36Sopenharmony_ci	offset = *ppos & ~PAGE_MASK;
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_ci	for (;;) {
276262306a36Sopenharmony_ci		struct folio *folio = NULL;
276362306a36Sopenharmony_ci		struct page *page = NULL;
276462306a36Sopenharmony_ci		pgoff_t end_index;
276562306a36Sopenharmony_ci		unsigned long nr, ret;
276662306a36Sopenharmony_ci		loff_t i_size = i_size_read(inode);
276762306a36Sopenharmony_ci
276862306a36Sopenharmony_ci		end_index = i_size >> PAGE_SHIFT;
276962306a36Sopenharmony_ci		if (index > end_index)
277062306a36Sopenharmony_ci			break;
277162306a36Sopenharmony_ci		if (index == end_index) {
277262306a36Sopenharmony_ci			nr = i_size & ~PAGE_MASK;
277362306a36Sopenharmony_ci			if (nr <= offset)
277462306a36Sopenharmony_ci				break;
277562306a36Sopenharmony_ci		}
277662306a36Sopenharmony_ci
277762306a36Sopenharmony_ci		error = shmem_get_folio(inode, index, &folio, SGP_READ);
277862306a36Sopenharmony_ci		if (error) {
277962306a36Sopenharmony_ci			if (error == -EINVAL)
278062306a36Sopenharmony_ci				error = 0;
278162306a36Sopenharmony_ci			break;
278262306a36Sopenharmony_ci		}
278362306a36Sopenharmony_ci		if (folio) {
278462306a36Sopenharmony_ci			folio_unlock(folio);
278562306a36Sopenharmony_ci
278662306a36Sopenharmony_ci			page = folio_file_page(folio, index);
278762306a36Sopenharmony_ci			if (PageHWPoison(page)) {
278862306a36Sopenharmony_ci				folio_put(folio);
278962306a36Sopenharmony_ci				error = -EIO;
279062306a36Sopenharmony_ci				break;
279162306a36Sopenharmony_ci			}
279262306a36Sopenharmony_ci		}
279362306a36Sopenharmony_ci
279462306a36Sopenharmony_ci		/*
279562306a36Sopenharmony_ci		 * We must evaluate after, since reads (unlike writes)
279662306a36Sopenharmony_ci		 * are called without i_rwsem protection against truncate
279762306a36Sopenharmony_ci		 */
279862306a36Sopenharmony_ci		nr = PAGE_SIZE;
279962306a36Sopenharmony_ci		i_size = i_size_read(inode);
280062306a36Sopenharmony_ci		end_index = i_size >> PAGE_SHIFT;
280162306a36Sopenharmony_ci		if (index == end_index) {
280262306a36Sopenharmony_ci			nr = i_size & ~PAGE_MASK;
280362306a36Sopenharmony_ci			if (nr <= offset) {
280462306a36Sopenharmony_ci				if (folio)
280562306a36Sopenharmony_ci					folio_put(folio);
280662306a36Sopenharmony_ci				break;
280762306a36Sopenharmony_ci			}
280862306a36Sopenharmony_ci		}
280962306a36Sopenharmony_ci		nr -= offset;
281062306a36Sopenharmony_ci
281162306a36Sopenharmony_ci		if (folio) {
281262306a36Sopenharmony_ci			/*
281362306a36Sopenharmony_ci			 * If users can be writing to this page using arbitrary
281462306a36Sopenharmony_ci			 * virtual addresses, take care about potential aliasing
281562306a36Sopenharmony_ci			 * before reading the page on the kernel side.
281662306a36Sopenharmony_ci			 */
281762306a36Sopenharmony_ci			if (mapping_writably_mapped(mapping))
281862306a36Sopenharmony_ci				flush_dcache_page(page);
281962306a36Sopenharmony_ci			/*
282062306a36Sopenharmony_ci			 * Mark the page accessed if we read the beginning.
282162306a36Sopenharmony_ci			 */
282262306a36Sopenharmony_ci			if (!offset)
282362306a36Sopenharmony_ci				folio_mark_accessed(folio);
282462306a36Sopenharmony_ci			/*
282562306a36Sopenharmony_ci			 * Ok, we have the page, and it's up-to-date, so
282662306a36Sopenharmony_ci			 * now we can copy it to user space...
282762306a36Sopenharmony_ci			 */
282862306a36Sopenharmony_ci			ret = copy_page_to_iter(page, offset, nr, to);
282962306a36Sopenharmony_ci			folio_put(folio);
283062306a36Sopenharmony_ci
283162306a36Sopenharmony_ci		} else if (user_backed_iter(to)) {
283262306a36Sopenharmony_ci			/*
283362306a36Sopenharmony_ci			 * Copy to user tends to be so well optimized, but
283462306a36Sopenharmony_ci			 * clear_user() not so much, that it is noticeably
283562306a36Sopenharmony_ci			 * faster to copy the zero page instead of clearing.
283662306a36Sopenharmony_ci			 */
283762306a36Sopenharmony_ci			ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to);
283862306a36Sopenharmony_ci		} else {
283962306a36Sopenharmony_ci			/*
284062306a36Sopenharmony_ci			 * But submitting the same page twice in a row to
284162306a36Sopenharmony_ci			 * splice() - or others? - can result in confusion:
284262306a36Sopenharmony_ci			 * so don't attempt that optimization on pipes etc.
284362306a36Sopenharmony_ci			 */
284462306a36Sopenharmony_ci			ret = iov_iter_zero(nr, to);
284562306a36Sopenharmony_ci		}
284662306a36Sopenharmony_ci
284762306a36Sopenharmony_ci		retval += ret;
284862306a36Sopenharmony_ci		offset += ret;
284962306a36Sopenharmony_ci		index += offset >> PAGE_SHIFT;
285062306a36Sopenharmony_ci		offset &= ~PAGE_MASK;
285162306a36Sopenharmony_ci
285262306a36Sopenharmony_ci		if (!iov_iter_count(to))
285362306a36Sopenharmony_ci			break;
285462306a36Sopenharmony_ci		if (ret < nr) {
285562306a36Sopenharmony_ci			error = -EFAULT;
285662306a36Sopenharmony_ci			break;
285762306a36Sopenharmony_ci		}
285862306a36Sopenharmony_ci		cond_resched();
285962306a36Sopenharmony_ci	}
286062306a36Sopenharmony_ci
286162306a36Sopenharmony_ci	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
286262306a36Sopenharmony_ci	file_accessed(file);
286362306a36Sopenharmony_ci	return retval ? retval : error;
286462306a36Sopenharmony_ci}
286562306a36Sopenharmony_ci
286662306a36Sopenharmony_cistatic ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
286762306a36Sopenharmony_ci{
286862306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
286962306a36Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
287062306a36Sopenharmony_ci	ssize_t ret;
287162306a36Sopenharmony_ci
287262306a36Sopenharmony_ci	inode_lock(inode);
287362306a36Sopenharmony_ci	ret = generic_write_checks(iocb, from);
287462306a36Sopenharmony_ci	if (ret <= 0)
287562306a36Sopenharmony_ci		goto unlock;
287662306a36Sopenharmony_ci	ret = file_remove_privs(file);
287762306a36Sopenharmony_ci	if (ret)
287862306a36Sopenharmony_ci		goto unlock;
287962306a36Sopenharmony_ci	ret = file_update_time(file);
288062306a36Sopenharmony_ci	if (ret)
288162306a36Sopenharmony_ci		goto unlock;
288262306a36Sopenharmony_ci	ret = generic_perform_write(iocb, from);
288362306a36Sopenharmony_ciunlock:
288462306a36Sopenharmony_ci	inode_unlock(inode);
288562306a36Sopenharmony_ci	return ret;
288662306a36Sopenharmony_ci}
288762306a36Sopenharmony_ci
288862306a36Sopenharmony_cistatic bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
288962306a36Sopenharmony_ci			      struct pipe_buffer *buf)
289062306a36Sopenharmony_ci{
289162306a36Sopenharmony_ci	return true;
289262306a36Sopenharmony_ci}
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_cistatic void zero_pipe_buf_release(struct pipe_inode_info *pipe,
289562306a36Sopenharmony_ci				  struct pipe_buffer *buf)
289662306a36Sopenharmony_ci{
289762306a36Sopenharmony_ci}
289862306a36Sopenharmony_ci
289962306a36Sopenharmony_cistatic bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
290062306a36Sopenharmony_ci				    struct pipe_buffer *buf)
290162306a36Sopenharmony_ci{
290262306a36Sopenharmony_ci	return false;
290362306a36Sopenharmony_ci}
290462306a36Sopenharmony_ci
290562306a36Sopenharmony_cistatic const struct pipe_buf_operations zero_pipe_buf_ops = {
290662306a36Sopenharmony_ci	.release	= zero_pipe_buf_release,
290762306a36Sopenharmony_ci	.try_steal	= zero_pipe_buf_try_steal,
290862306a36Sopenharmony_ci	.get		= zero_pipe_buf_get,
290962306a36Sopenharmony_ci};
291062306a36Sopenharmony_ci
291162306a36Sopenharmony_cistatic size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
291262306a36Sopenharmony_ci					loff_t fpos, size_t size)
291362306a36Sopenharmony_ci{
291462306a36Sopenharmony_ci	size_t offset = fpos & ~PAGE_MASK;
291562306a36Sopenharmony_ci
291662306a36Sopenharmony_ci	size = min_t(size_t, size, PAGE_SIZE - offset);
291762306a36Sopenharmony_ci
291862306a36Sopenharmony_ci	if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
291962306a36Sopenharmony_ci		struct pipe_buffer *buf = pipe_head_buf(pipe);
292062306a36Sopenharmony_ci
292162306a36Sopenharmony_ci		*buf = (struct pipe_buffer) {
292262306a36Sopenharmony_ci			.ops	= &zero_pipe_buf_ops,
292362306a36Sopenharmony_ci			.page	= ZERO_PAGE(0),
292462306a36Sopenharmony_ci			.offset	= offset,
292562306a36Sopenharmony_ci			.len	= size,
292662306a36Sopenharmony_ci		};
292762306a36Sopenharmony_ci		pipe->head++;
292862306a36Sopenharmony_ci	}
292962306a36Sopenharmony_ci
293062306a36Sopenharmony_ci	return size;
293162306a36Sopenharmony_ci}
293262306a36Sopenharmony_ci
293362306a36Sopenharmony_cistatic ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
293462306a36Sopenharmony_ci				      struct pipe_inode_info *pipe,
293562306a36Sopenharmony_ci				      size_t len, unsigned int flags)
293662306a36Sopenharmony_ci{
293762306a36Sopenharmony_ci	struct inode *inode = file_inode(in);
293862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
293962306a36Sopenharmony_ci	struct folio *folio = NULL;
294062306a36Sopenharmony_ci	size_t total_spliced = 0, used, npages, n, part;
294162306a36Sopenharmony_ci	loff_t isize;
294262306a36Sopenharmony_ci	int error = 0;
294362306a36Sopenharmony_ci
294462306a36Sopenharmony_ci	/* Work out how much data we can actually add into the pipe */
294562306a36Sopenharmony_ci	used = pipe_occupancy(pipe->head, pipe->tail);
294662306a36Sopenharmony_ci	npages = max_t(ssize_t, pipe->max_usage - used, 0);
294762306a36Sopenharmony_ci	len = min_t(size_t, len, npages * PAGE_SIZE);
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_ci	do {
295062306a36Sopenharmony_ci		if (*ppos >= i_size_read(inode))
295162306a36Sopenharmony_ci			break;
295262306a36Sopenharmony_ci
295362306a36Sopenharmony_ci		error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio,
295462306a36Sopenharmony_ci					SGP_READ);
295562306a36Sopenharmony_ci		if (error) {
295662306a36Sopenharmony_ci			if (error == -EINVAL)
295762306a36Sopenharmony_ci				error = 0;
295862306a36Sopenharmony_ci			break;
295962306a36Sopenharmony_ci		}
296062306a36Sopenharmony_ci		if (folio) {
296162306a36Sopenharmony_ci			folio_unlock(folio);
296262306a36Sopenharmony_ci
296362306a36Sopenharmony_ci			if (folio_test_hwpoison(folio) ||
296462306a36Sopenharmony_ci			    (folio_test_large(folio) &&
296562306a36Sopenharmony_ci			     folio_test_has_hwpoisoned(folio))) {
296662306a36Sopenharmony_ci				error = -EIO;
296762306a36Sopenharmony_ci				break;
296862306a36Sopenharmony_ci			}
296962306a36Sopenharmony_ci		}
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci		/*
297262306a36Sopenharmony_ci		 * i_size must be checked after we know the pages are Uptodate.
297362306a36Sopenharmony_ci		 *
297462306a36Sopenharmony_ci		 * Checking i_size after the check allows us to calculate
297562306a36Sopenharmony_ci		 * the correct value for "nr", which means the zero-filled
297662306a36Sopenharmony_ci		 * part of the page is not copied back to userspace (unless
297762306a36Sopenharmony_ci		 * another truncate extends the file - this is desired though).
297862306a36Sopenharmony_ci		 */
297962306a36Sopenharmony_ci		isize = i_size_read(inode);
298062306a36Sopenharmony_ci		if (unlikely(*ppos >= isize))
298162306a36Sopenharmony_ci			break;
298262306a36Sopenharmony_ci		part = min_t(loff_t, isize - *ppos, len);
298362306a36Sopenharmony_ci
298462306a36Sopenharmony_ci		if (folio) {
298562306a36Sopenharmony_ci			/*
298662306a36Sopenharmony_ci			 * If users can be writing to this page using arbitrary
298762306a36Sopenharmony_ci			 * virtual addresses, take care about potential aliasing
298862306a36Sopenharmony_ci			 * before reading the page on the kernel side.
298962306a36Sopenharmony_ci			 */
299062306a36Sopenharmony_ci			if (mapping_writably_mapped(mapping))
299162306a36Sopenharmony_ci				flush_dcache_folio(folio);
299262306a36Sopenharmony_ci			folio_mark_accessed(folio);
299362306a36Sopenharmony_ci			/*
299462306a36Sopenharmony_ci			 * Ok, we have the page, and it's up-to-date, so we can
299562306a36Sopenharmony_ci			 * now splice it into the pipe.
299662306a36Sopenharmony_ci			 */
299762306a36Sopenharmony_ci			n = splice_folio_into_pipe(pipe, folio, *ppos, part);
299862306a36Sopenharmony_ci			folio_put(folio);
299962306a36Sopenharmony_ci			folio = NULL;
300062306a36Sopenharmony_ci		} else {
300162306a36Sopenharmony_ci			n = splice_zeropage_into_pipe(pipe, *ppos, part);
300262306a36Sopenharmony_ci		}
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_ci		if (!n)
300562306a36Sopenharmony_ci			break;
300662306a36Sopenharmony_ci		len -= n;
300762306a36Sopenharmony_ci		total_spliced += n;
300862306a36Sopenharmony_ci		*ppos += n;
300962306a36Sopenharmony_ci		in->f_ra.prev_pos = *ppos;
301062306a36Sopenharmony_ci		if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
301162306a36Sopenharmony_ci			break;
301262306a36Sopenharmony_ci
301362306a36Sopenharmony_ci		cond_resched();
301462306a36Sopenharmony_ci	} while (len);
301562306a36Sopenharmony_ci
301662306a36Sopenharmony_ci	if (folio)
301762306a36Sopenharmony_ci		folio_put(folio);
301862306a36Sopenharmony_ci
301962306a36Sopenharmony_ci	file_accessed(in);
302062306a36Sopenharmony_ci	return total_spliced ? total_spliced : error;
302162306a36Sopenharmony_ci}
302262306a36Sopenharmony_ci
302362306a36Sopenharmony_cistatic loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
302462306a36Sopenharmony_ci{
302562306a36Sopenharmony_ci	struct address_space *mapping = file->f_mapping;
302662306a36Sopenharmony_ci	struct inode *inode = mapping->host;
302762306a36Sopenharmony_ci
302862306a36Sopenharmony_ci	if (whence != SEEK_DATA && whence != SEEK_HOLE)
302962306a36Sopenharmony_ci		return generic_file_llseek_size(file, offset, whence,
303062306a36Sopenharmony_ci					MAX_LFS_FILESIZE, i_size_read(inode));
303162306a36Sopenharmony_ci	if (offset < 0)
303262306a36Sopenharmony_ci		return -ENXIO;
303362306a36Sopenharmony_ci
303462306a36Sopenharmony_ci	inode_lock(inode);
303562306a36Sopenharmony_ci	/* We're holding i_rwsem so we can access i_size directly */
303662306a36Sopenharmony_ci	offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
303762306a36Sopenharmony_ci	if (offset >= 0)
303862306a36Sopenharmony_ci		offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
303962306a36Sopenharmony_ci	inode_unlock(inode);
304062306a36Sopenharmony_ci	return offset;
304162306a36Sopenharmony_ci}
304262306a36Sopenharmony_ci
304362306a36Sopenharmony_cistatic long shmem_fallocate(struct file *file, int mode, loff_t offset,
304462306a36Sopenharmony_ci							 loff_t len)
304562306a36Sopenharmony_ci{
304662306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
304762306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
304862306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
304962306a36Sopenharmony_ci	struct shmem_falloc shmem_falloc;
305062306a36Sopenharmony_ci	pgoff_t start, index, end, undo_fallocend;
305162306a36Sopenharmony_ci	int error;
305262306a36Sopenharmony_ci
305362306a36Sopenharmony_ci	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
305462306a36Sopenharmony_ci		return -EOPNOTSUPP;
305562306a36Sopenharmony_ci
305662306a36Sopenharmony_ci	inode_lock(inode);
305762306a36Sopenharmony_ci
305862306a36Sopenharmony_ci	if (mode & FALLOC_FL_PUNCH_HOLE) {
305962306a36Sopenharmony_ci		struct address_space *mapping = file->f_mapping;
306062306a36Sopenharmony_ci		loff_t unmap_start = round_up(offset, PAGE_SIZE);
306162306a36Sopenharmony_ci		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
306262306a36Sopenharmony_ci		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
306362306a36Sopenharmony_ci
306462306a36Sopenharmony_ci		/* protected by i_rwsem */
306562306a36Sopenharmony_ci		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
306662306a36Sopenharmony_ci			error = -EPERM;
306762306a36Sopenharmony_ci			goto out;
306862306a36Sopenharmony_ci		}
306962306a36Sopenharmony_ci
307062306a36Sopenharmony_ci		shmem_falloc.waitq = &shmem_falloc_waitq;
307162306a36Sopenharmony_ci		shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT;
307262306a36Sopenharmony_ci		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
307362306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
307462306a36Sopenharmony_ci		inode->i_private = &shmem_falloc;
307562306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
307662306a36Sopenharmony_ci
307762306a36Sopenharmony_ci		if ((u64)unmap_end > (u64)unmap_start)
307862306a36Sopenharmony_ci			unmap_mapping_range(mapping, unmap_start,
307962306a36Sopenharmony_ci					    1 + unmap_end - unmap_start, 0);
308062306a36Sopenharmony_ci		shmem_truncate_range(inode, offset, offset + len - 1);
308162306a36Sopenharmony_ci		/* No need to unmap again: hole-punching leaves COWed pages */
308262306a36Sopenharmony_ci
308362306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
308462306a36Sopenharmony_ci		inode->i_private = NULL;
308562306a36Sopenharmony_ci		wake_up_all(&shmem_falloc_waitq);
308662306a36Sopenharmony_ci		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
308762306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
308862306a36Sopenharmony_ci		error = 0;
308962306a36Sopenharmony_ci		goto out;
309062306a36Sopenharmony_ci	}
309162306a36Sopenharmony_ci
309262306a36Sopenharmony_ci	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
309362306a36Sopenharmony_ci	error = inode_newsize_ok(inode, offset + len);
309462306a36Sopenharmony_ci	if (error)
309562306a36Sopenharmony_ci		goto out;
309662306a36Sopenharmony_ci
309762306a36Sopenharmony_ci	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
309862306a36Sopenharmony_ci		error = -EPERM;
309962306a36Sopenharmony_ci		goto out;
310062306a36Sopenharmony_ci	}
310162306a36Sopenharmony_ci
310262306a36Sopenharmony_ci	start = offset >> PAGE_SHIFT;
310362306a36Sopenharmony_ci	end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
310462306a36Sopenharmony_ci	/* Try to avoid a swapstorm if len is impossible to satisfy */
310562306a36Sopenharmony_ci	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
310662306a36Sopenharmony_ci		error = -ENOSPC;
310762306a36Sopenharmony_ci		goto out;
310862306a36Sopenharmony_ci	}
310962306a36Sopenharmony_ci
311062306a36Sopenharmony_ci	shmem_falloc.waitq = NULL;
311162306a36Sopenharmony_ci	shmem_falloc.start = start;
311262306a36Sopenharmony_ci	shmem_falloc.next  = start;
311362306a36Sopenharmony_ci	shmem_falloc.nr_falloced = 0;
311462306a36Sopenharmony_ci	shmem_falloc.nr_unswapped = 0;
311562306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
311662306a36Sopenharmony_ci	inode->i_private = &shmem_falloc;
311762306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci	/*
312062306a36Sopenharmony_ci	 * info->fallocend is only relevant when huge pages might be
312162306a36Sopenharmony_ci	 * involved: to prevent split_huge_page() freeing fallocated
312262306a36Sopenharmony_ci	 * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size.
312362306a36Sopenharmony_ci	 */
312462306a36Sopenharmony_ci	undo_fallocend = info->fallocend;
312562306a36Sopenharmony_ci	if (info->fallocend < end)
312662306a36Sopenharmony_ci		info->fallocend = end;
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_ci	for (index = start; index < end; ) {
312962306a36Sopenharmony_ci		struct folio *folio;
313062306a36Sopenharmony_ci
313162306a36Sopenharmony_ci		/*
313262306a36Sopenharmony_ci		 * Good, the fallocate(2) manpage permits EINTR: we may have
313362306a36Sopenharmony_ci		 * been interrupted because we are using up too much memory.
313462306a36Sopenharmony_ci		 */
313562306a36Sopenharmony_ci		if (signal_pending(current))
313662306a36Sopenharmony_ci			error = -EINTR;
313762306a36Sopenharmony_ci		else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
313862306a36Sopenharmony_ci			error = -ENOMEM;
313962306a36Sopenharmony_ci		else
314062306a36Sopenharmony_ci			error = shmem_get_folio(inode, index, &folio,
314162306a36Sopenharmony_ci						SGP_FALLOC);
314262306a36Sopenharmony_ci		if (error) {
314362306a36Sopenharmony_ci			info->fallocend = undo_fallocend;
314462306a36Sopenharmony_ci			/* Remove the !uptodate folios we added */
314562306a36Sopenharmony_ci			if (index > start) {
314662306a36Sopenharmony_ci				shmem_undo_range(inode,
314762306a36Sopenharmony_ci				    (loff_t)start << PAGE_SHIFT,
314862306a36Sopenharmony_ci				    ((loff_t)index << PAGE_SHIFT) - 1, true);
314962306a36Sopenharmony_ci			}
315062306a36Sopenharmony_ci			goto undone;
315162306a36Sopenharmony_ci		}
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_ci		/*
315462306a36Sopenharmony_ci		 * Here is a more important optimization than it appears:
315562306a36Sopenharmony_ci		 * a second SGP_FALLOC on the same large folio will clear it,
315662306a36Sopenharmony_ci		 * making it uptodate and un-undoable if we fail later.
315762306a36Sopenharmony_ci		 */
315862306a36Sopenharmony_ci		index = folio_next_index(folio);
315962306a36Sopenharmony_ci		/* Beware 32-bit wraparound */
316062306a36Sopenharmony_ci		if (!index)
316162306a36Sopenharmony_ci			index--;
316262306a36Sopenharmony_ci
316362306a36Sopenharmony_ci		/*
316462306a36Sopenharmony_ci		 * Inform shmem_writepage() how far we have reached.
316562306a36Sopenharmony_ci		 * No need for lock or barrier: we have the page lock.
316662306a36Sopenharmony_ci		 */
316762306a36Sopenharmony_ci		if (!folio_test_uptodate(folio))
316862306a36Sopenharmony_ci			shmem_falloc.nr_falloced += index - shmem_falloc.next;
316962306a36Sopenharmony_ci		shmem_falloc.next = index;
317062306a36Sopenharmony_ci
317162306a36Sopenharmony_ci		/*
317262306a36Sopenharmony_ci		 * If !uptodate, leave it that way so that freeable folios
317362306a36Sopenharmony_ci		 * can be recognized if we need to rollback on error later.
317462306a36Sopenharmony_ci		 * But mark it dirty so that memory pressure will swap rather
317562306a36Sopenharmony_ci		 * than free the folios we are allocating (and SGP_CACHE folios
317662306a36Sopenharmony_ci		 * might still be clean: we now need to mark those dirty too).
317762306a36Sopenharmony_ci		 */
317862306a36Sopenharmony_ci		folio_mark_dirty(folio);
317962306a36Sopenharmony_ci		folio_unlock(folio);
318062306a36Sopenharmony_ci		folio_put(folio);
318162306a36Sopenharmony_ci		cond_resched();
318262306a36Sopenharmony_ci	}
318362306a36Sopenharmony_ci
318462306a36Sopenharmony_ci	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
318562306a36Sopenharmony_ci		i_size_write(inode, offset + len);
318662306a36Sopenharmony_ciundone:
318762306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
318862306a36Sopenharmony_ci	inode->i_private = NULL;
318962306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
319062306a36Sopenharmony_ciout:
319162306a36Sopenharmony_ci	if (!error)
319262306a36Sopenharmony_ci		file_modified(file);
319362306a36Sopenharmony_ci	inode_unlock(inode);
319462306a36Sopenharmony_ci	return error;
319562306a36Sopenharmony_ci}
319662306a36Sopenharmony_ci
319762306a36Sopenharmony_cistatic int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
319862306a36Sopenharmony_ci{
319962306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
320062306a36Sopenharmony_ci
320162306a36Sopenharmony_ci	buf->f_type = TMPFS_MAGIC;
320262306a36Sopenharmony_ci	buf->f_bsize = PAGE_SIZE;
320362306a36Sopenharmony_ci	buf->f_namelen = NAME_MAX;
320462306a36Sopenharmony_ci	if (sbinfo->max_blocks) {
320562306a36Sopenharmony_ci		buf->f_blocks = sbinfo->max_blocks;
320662306a36Sopenharmony_ci		buf->f_bavail =
320762306a36Sopenharmony_ci		buf->f_bfree  = sbinfo->max_blocks -
320862306a36Sopenharmony_ci				percpu_counter_sum(&sbinfo->used_blocks);
320962306a36Sopenharmony_ci	}
321062306a36Sopenharmony_ci	if (sbinfo->max_inodes) {
321162306a36Sopenharmony_ci		buf->f_files = sbinfo->max_inodes;
321262306a36Sopenharmony_ci		buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE;
321362306a36Sopenharmony_ci	}
321462306a36Sopenharmony_ci	/* else leave those fields 0 like simple_statfs */
321562306a36Sopenharmony_ci
321662306a36Sopenharmony_ci	buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b);
321762306a36Sopenharmony_ci
321862306a36Sopenharmony_ci	return 0;
321962306a36Sopenharmony_ci}
322062306a36Sopenharmony_ci
322162306a36Sopenharmony_ci/*
322262306a36Sopenharmony_ci * File creation. Allocate an inode, and we're done..
322362306a36Sopenharmony_ci */
322462306a36Sopenharmony_cistatic int
322562306a36Sopenharmony_cishmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
322662306a36Sopenharmony_ci	    struct dentry *dentry, umode_t mode, dev_t dev)
322762306a36Sopenharmony_ci{
322862306a36Sopenharmony_ci	struct inode *inode;
322962306a36Sopenharmony_ci	int error;
323062306a36Sopenharmony_ci
323162306a36Sopenharmony_ci	inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
323262306a36Sopenharmony_ci	if (IS_ERR(inode))
323362306a36Sopenharmony_ci		return PTR_ERR(inode);
323462306a36Sopenharmony_ci
323562306a36Sopenharmony_ci	error = simple_acl_create(dir, inode);
323662306a36Sopenharmony_ci	if (error)
323762306a36Sopenharmony_ci		goto out_iput;
323862306a36Sopenharmony_ci	error = security_inode_init_security(inode, dir,
323962306a36Sopenharmony_ci					     &dentry->d_name,
324062306a36Sopenharmony_ci					     shmem_initxattrs, NULL);
324162306a36Sopenharmony_ci	if (error && error != -EOPNOTSUPP)
324262306a36Sopenharmony_ci		goto out_iput;
324362306a36Sopenharmony_ci
324462306a36Sopenharmony_ci	error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
324562306a36Sopenharmony_ci	if (error)
324662306a36Sopenharmony_ci		goto out_iput;
324762306a36Sopenharmony_ci
324862306a36Sopenharmony_ci	dir->i_size += BOGO_DIRENT_SIZE;
324962306a36Sopenharmony_ci	dir->i_mtime = inode_set_ctime_current(dir);
325062306a36Sopenharmony_ci	inode_inc_iversion(dir);
325162306a36Sopenharmony_ci	d_instantiate(dentry, inode);
325262306a36Sopenharmony_ci	dget(dentry); /* Extra count - pin the dentry in core */
325362306a36Sopenharmony_ci	return error;
325462306a36Sopenharmony_ci
325562306a36Sopenharmony_ciout_iput:
325662306a36Sopenharmony_ci	iput(inode);
325762306a36Sopenharmony_ci	return error;
325862306a36Sopenharmony_ci}
325962306a36Sopenharmony_ci
326062306a36Sopenharmony_cistatic int
326162306a36Sopenharmony_cishmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
326262306a36Sopenharmony_ci	      struct file *file, umode_t mode)
326362306a36Sopenharmony_ci{
326462306a36Sopenharmony_ci	struct inode *inode;
326562306a36Sopenharmony_ci	int error;
326662306a36Sopenharmony_ci
326762306a36Sopenharmony_ci	inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
326862306a36Sopenharmony_ci
326962306a36Sopenharmony_ci	if (IS_ERR(inode)) {
327062306a36Sopenharmony_ci		error = PTR_ERR(inode);
327162306a36Sopenharmony_ci		goto err_out;
327262306a36Sopenharmony_ci	}
327362306a36Sopenharmony_ci
327462306a36Sopenharmony_ci	error = security_inode_init_security(inode, dir,
327562306a36Sopenharmony_ci					     NULL,
327662306a36Sopenharmony_ci					     shmem_initxattrs, NULL);
327762306a36Sopenharmony_ci	if (error && error != -EOPNOTSUPP)
327862306a36Sopenharmony_ci		goto out_iput;
327962306a36Sopenharmony_ci	error = simple_acl_create(dir, inode);
328062306a36Sopenharmony_ci	if (error)
328162306a36Sopenharmony_ci		goto out_iput;
328262306a36Sopenharmony_ci	d_tmpfile(file, inode);
328362306a36Sopenharmony_ci
328462306a36Sopenharmony_cierr_out:
328562306a36Sopenharmony_ci	return finish_open_simple(file, error);
328662306a36Sopenharmony_ciout_iput:
328762306a36Sopenharmony_ci	iput(inode);
328862306a36Sopenharmony_ci	return error;
328962306a36Sopenharmony_ci}
329062306a36Sopenharmony_ci
329162306a36Sopenharmony_cistatic int shmem_mkdir(struct mnt_idmap *idmap, struct inode *dir,
329262306a36Sopenharmony_ci		       struct dentry *dentry, umode_t mode)
329362306a36Sopenharmony_ci{
329462306a36Sopenharmony_ci	int error;
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci	error = shmem_mknod(idmap, dir, dentry, mode | S_IFDIR, 0);
329762306a36Sopenharmony_ci	if (error)
329862306a36Sopenharmony_ci		return error;
329962306a36Sopenharmony_ci	inc_nlink(dir);
330062306a36Sopenharmony_ci	return 0;
330162306a36Sopenharmony_ci}
330262306a36Sopenharmony_ci
330362306a36Sopenharmony_cistatic int shmem_create(struct mnt_idmap *idmap, struct inode *dir,
330462306a36Sopenharmony_ci			struct dentry *dentry, umode_t mode, bool excl)
330562306a36Sopenharmony_ci{
330662306a36Sopenharmony_ci	return shmem_mknod(idmap, dir, dentry, mode | S_IFREG, 0);
330762306a36Sopenharmony_ci}
330862306a36Sopenharmony_ci
330962306a36Sopenharmony_ci/*
331062306a36Sopenharmony_ci * Link a file..
331162306a36Sopenharmony_ci */
331262306a36Sopenharmony_cistatic int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
331362306a36Sopenharmony_ci{
331462306a36Sopenharmony_ci	struct inode *inode = d_inode(old_dentry);
331562306a36Sopenharmony_ci	int ret = 0;
331662306a36Sopenharmony_ci
331762306a36Sopenharmony_ci	/*
331862306a36Sopenharmony_ci	 * No ordinary (disk based) filesystem counts links as inodes;
331962306a36Sopenharmony_ci	 * but each new link needs a new dentry, pinning lowmem, and
332062306a36Sopenharmony_ci	 * tmpfs dentries cannot be pruned until they are unlinked.
332162306a36Sopenharmony_ci	 * But if an O_TMPFILE file is linked into the tmpfs, the
332262306a36Sopenharmony_ci	 * first link must skip that, to get the accounting right.
332362306a36Sopenharmony_ci	 */
332462306a36Sopenharmony_ci	if (inode->i_nlink) {
332562306a36Sopenharmony_ci		ret = shmem_reserve_inode(inode->i_sb, NULL);
332662306a36Sopenharmony_ci		if (ret)
332762306a36Sopenharmony_ci			goto out;
332862306a36Sopenharmony_ci	}
332962306a36Sopenharmony_ci
333062306a36Sopenharmony_ci	ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
333162306a36Sopenharmony_ci	if (ret) {
333262306a36Sopenharmony_ci		if (inode->i_nlink)
333362306a36Sopenharmony_ci			shmem_free_inode(inode->i_sb, 0);
333462306a36Sopenharmony_ci		goto out;
333562306a36Sopenharmony_ci	}
333662306a36Sopenharmony_ci
333762306a36Sopenharmony_ci	dir->i_size += BOGO_DIRENT_SIZE;
333862306a36Sopenharmony_ci	dir->i_mtime = inode_set_ctime_to_ts(dir,
333962306a36Sopenharmony_ci					     inode_set_ctime_current(inode));
334062306a36Sopenharmony_ci	inode_inc_iversion(dir);
334162306a36Sopenharmony_ci	inc_nlink(inode);
334262306a36Sopenharmony_ci	ihold(inode);	/* New dentry reference */
334362306a36Sopenharmony_ci	dget(dentry);		/* Extra pinning count for the created dentry */
334462306a36Sopenharmony_ci	d_instantiate(dentry, inode);
334562306a36Sopenharmony_ciout:
334662306a36Sopenharmony_ci	return ret;
334762306a36Sopenharmony_ci}
334862306a36Sopenharmony_ci
334962306a36Sopenharmony_cistatic int shmem_unlink(struct inode *dir, struct dentry *dentry)
335062306a36Sopenharmony_ci{
335162306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
335262306a36Sopenharmony_ci
335362306a36Sopenharmony_ci	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
335462306a36Sopenharmony_ci		shmem_free_inode(inode->i_sb, 0);
335562306a36Sopenharmony_ci
335662306a36Sopenharmony_ci	simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
335762306a36Sopenharmony_ci
335862306a36Sopenharmony_ci	dir->i_size -= BOGO_DIRENT_SIZE;
335962306a36Sopenharmony_ci	dir->i_mtime = inode_set_ctime_to_ts(dir,
336062306a36Sopenharmony_ci					     inode_set_ctime_current(inode));
336162306a36Sopenharmony_ci	inode_inc_iversion(dir);
336262306a36Sopenharmony_ci	drop_nlink(inode);
336362306a36Sopenharmony_ci	dput(dentry);	/* Undo the count from "create" - this does all the work */
336462306a36Sopenharmony_ci	return 0;
336562306a36Sopenharmony_ci}
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_cistatic int shmem_rmdir(struct inode *dir, struct dentry *dentry)
336862306a36Sopenharmony_ci{
336962306a36Sopenharmony_ci	if (!simple_empty(dentry))
337062306a36Sopenharmony_ci		return -ENOTEMPTY;
337162306a36Sopenharmony_ci
337262306a36Sopenharmony_ci	drop_nlink(d_inode(dentry));
337362306a36Sopenharmony_ci	drop_nlink(dir);
337462306a36Sopenharmony_ci	return shmem_unlink(dir, dentry);
337562306a36Sopenharmony_ci}
337662306a36Sopenharmony_ci
337762306a36Sopenharmony_cistatic int shmem_whiteout(struct mnt_idmap *idmap,
337862306a36Sopenharmony_ci			  struct inode *old_dir, struct dentry *old_dentry)
337962306a36Sopenharmony_ci{
338062306a36Sopenharmony_ci	struct dentry *whiteout;
338162306a36Sopenharmony_ci	int error;
338262306a36Sopenharmony_ci
338362306a36Sopenharmony_ci	whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
338462306a36Sopenharmony_ci	if (!whiteout)
338562306a36Sopenharmony_ci		return -ENOMEM;
338662306a36Sopenharmony_ci
338762306a36Sopenharmony_ci	error = shmem_mknod(idmap, old_dir, whiteout,
338862306a36Sopenharmony_ci			    S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
338962306a36Sopenharmony_ci	dput(whiteout);
339062306a36Sopenharmony_ci	if (error)
339162306a36Sopenharmony_ci		return error;
339262306a36Sopenharmony_ci
339362306a36Sopenharmony_ci	/*
339462306a36Sopenharmony_ci	 * Cheat and hash the whiteout while the old dentry is still in
339562306a36Sopenharmony_ci	 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
339662306a36Sopenharmony_ci	 *
339762306a36Sopenharmony_ci	 * d_lookup() will consistently find one of them at this point,
339862306a36Sopenharmony_ci	 * not sure which one, but that isn't even important.
339962306a36Sopenharmony_ci	 */
340062306a36Sopenharmony_ci	d_rehash(whiteout);
340162306a36Sopenharmony_ci	return 0;
340262306a36Sopenharmony_ci}
340362306a36Sopenharmony_ci
340462306a36Sopenharmony_ci/*
340562306a36Sopenharmony_ci * The VFS layer already does all the dentry stuff for rename,
340662306a36Sopenharmony_ci * we just have to decrement the usage count for the target if
340762306a36Sopenharmony_ci * it exists so that the VFS layer correctly free's it when it
340862306a36Sopenharmony_ci * gets overwritten.
340962306a36Sopenharmony_ci */
341062306a36Sopenharmony_cistatic int shmem_rename2(struct mnt_idmap *idmap,
341162306a36Sopenharmony_ci			 struct inode *old_dir, struct dentry *old_dentry,
341262306a36Sopenharmony_ci			 struct inode *new_dir, struct dentry *new_dentry,
341362306a36Sopenharmony_ci			 unsigned int flags)
341462306a36Sopenharmony_ci{
341562306a36Sopenharmony_ci	struct inode *inode = d_inode(old_dentry);
341662306a36Sopenharmony_ci	int they_are_dirs = S_ISDIR(inode->i_mode);
341762306a36Sopenharmony_ci	int error;
341862306a36Sopenharmony_ci
341962306a36Sopenharmony_ci	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
342062306a36Sopenharmony_ci		return -EINVAL;
342162306a36Sopenharmony_ci
342262306a36Sopenharmony_ci	if (flags & RENAME_EXCHANGE)
342362306a36Sopenharmony_ci		return simple_offset_rename_exchange(old_dir, old_dentry,
342462306a36Sopenharmony_ci						     new_dir, new_dentry);
342562306a36Sopenharmony_ci
342662306a36Sopenharmony_ci	if (!simple_empty(new_dentry))
342762306a36Sopenharmony_ci		return -ENOTEMPTY;
342862306a36Sopenharmony_ci
342962306a36Sopenharmony_ci	if (flags & RENAME_WHITEOUT) {
343062306a36Sopenharmony_ci		error = shmem_whiteout(idmap, old_dir, old_dentry);
343162306a36Sopenharmony_ci		if (error)
343262306a36Sopenharmony_ci			return error;
343362306a36Sopenharmony_ci	}
343462306a36Sopenharmony_ci
343562306a36Sopenharmony_ci	simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
343662306a36Sopenharmony_ci	error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
343762306a36Sopenharmony_ci	if (error)
343862306a36Sopenharmony_ci		return error;
343962306a36Sopenharmony_ci
344062306a36Sopenharmony_ci	if (d_really_is_positive(new_dentry)) {
344162306a36Sopenharmony_ci		(void) shmem_unlink(new_dir, new_dentry);
344262306a36Sopenharmony_ci		if (they_are_dirs) {
344362306a36Sopenharmony_ci			drop_nlink(d_inode(new_dentry));
344462306a36Sopenharmony_ci			drop_nlink(old_dir);
344562306a36Sopenharmony_ci		}
344662306a36Sopenharmony_ci	} else if (they_are_dirs) {
344762306a36Sopenharmony_ci		drop_nlink(old_dir);
344862306a36Sopenharmony_ci		inc_nlink(new_dir);
344962306a36Sopenharmony_ci	}
345062306a36Sopenharmony_ci
345162306a36Sopenharmony_ci	old_dir->i_size -= BOGO_DIRENT_SIZE;
345262306a36Sopenharmony_ci	new_dir->i_size += BOGO_DIRENT_SIZE;
345362306a36Sopenharmony_ci	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
345462306a36Sopenharmony_ci	inode_inc_iversion(old_dir);
345562306a36Sopenharmony_ci	inode_inc_iversion(new_dir);
345662306a36Sopenharmony_ci	return 0;
345762306a36Sopenharmony_ci}
345862306a36Sopenharmony_ci
345962306a36Sopenharmony_cistatic int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
346062306a36Sopenharmony_ci			 struct dentry *dentry, const char *symname)
346162306a36Sopenharmony_ci{
346262306a36Sopenharmony_ci	int error;
346362306a36Sopenharmony_ci	int len;
346462306a36Sopenharmony_ci	struct inode *inode;
346562306a36Sopenharmony_ci	struct folio *folio;
346662306a36Sopenharmony_ci
346762306a36Sopenharmony_ci	len = strlen(symname) + 1;
346862306a36Sopenharmony_ci	if (len > PAGE_SIZE)
346962306a36Sopenharmony_ci		return -ENAMETOOLONG;
347062306a36Sopenharmony_ci
347162306a36Sopenharmony_ci	inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
347262306a36Sopenharmony_ci				VM_NORESERVE);
347362306a36Sopenharmony_ci
347462306a36Sopenharmony_ci	if (IS_ERR(inode))
347562306a36Sopenharmony_ci		return PTR_ERR(inode);
347662306a36Sopenharmony_ci
347762306a36Sopenharmony_ci	error = security_inode_init_security(inode, dir, &dentry->d_name,
347862306a36Sopenharmony_ci					     shmem_initxattrs, NULL);
347962306a36Sopenharmony_ci	if (error && error != -EOPNOTSUPP)
348062306a36Sopenharmony_ci		goto out_iput;
348162306a36Sopenharmony_ci
348262306a36Sopenharmony_ci	error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
348362306a36Sopenharmony_ci	if (error)
348462306a36Sopenharmony_ci		goto out_iput;
348562306a36Sopenharmony_ci
348662306a36Sopenharmony_ci	inode->i_size = len-1;
348762306a36Sopenharmony_ci	if (len <= SHORT_SYMLINK_LEN) {
348862306a36Sopenharmony_ci		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
348962306a36Sopenharmony_ci		if (!inode->i_link) {
349062306a36Sopenharmony_ci			error = -ENOMEM;
349162306a36Sopenharmony_ci			goto out_remove_offset;
349262306a36Sopenharmony_ci		}
349362306a36Sopenharmony_ci		inode->i_op = &shmem_short_symlink_operations;
349462306a36Sopenharmony_ci	} else {
349562306a36Sopenharmony_ci		inode_nohighmem(inode);
349662306a36Sopenharmony_ci		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
349762306a36Sopenharmony_ci		if (error)
349862306a36Sopenharmony_ci			goto out_remove_offset;
349962306a36Sopenharmony_ci		inode->i_mapping->a_ops = &shmem_aops;
350062306a36Sopenharmony_ci		inode->i_op = &shmem_symlink_inode_operations;
350162306a36Sopenharmony_ci		memcpy(folio_address(folio), symname, len);
350262306a36Sopenharmony_ci		folio_mark_uptodate(folio);
350362306a36Sopenharmony_ci		folio_mark_dirty(folio);
350462306a36Sopenharmony_ci		folio_unlock(folio);
350562306a36Sopenharmony_ci		folio_put(folio);
350662306a36Sopenharmony_ci	}
350762306a36Sopenharmony_ci	dir->i_size += BOGO_DIRENT_SIZE;
350862306a36Sopenharmony_ci	dir->i_mtime = inode_set_ctime_current(dir);
350962306a36Sopenharmony_ci	inode_inc_iversion(dir);
351062306a36Sopenharmony_ci	d_instantiate(dentry, inode);
351162306a36Sopenharmony_ci	dget(dentry);
351262306a36Sopenharmony_ci	return 0;
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ciout_remove_offset:
351562306a36Sopenharmony_ci	simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
351662306a36Sopenharmony_ciout_iput:
351762306a36Sopenharmony_ci	iput(inode);
351862306a36Sopenharmony_ci	return error;
351962306a36Sopenharmony_ci}
352062306a36Sopenharmony_ci
352162306a36Sopenharmony_cistatic void shmem_put_link(void *arg)
352262306a36Sopenharmony_ci{
352362306a36Sopenharmony_ci	folio_mark_accessed(arg);
352462306a36Sopenharmony_ci	folio_put(arg);
352562306a36Sopenharmony_ci}
352662306a36Sopenharmony_ci
352762306a36Sopenharmony_cistatic const char *shmem_get_link(struct dentry *dentry,
352862306a36Sopenharmony_ci				  struct inode *inode,
352962306a36Sopenharmony_ci				  struct delayed_call *done)
353062306a36Sopenharmony_ci{
353162306a36Sopenharmony_ci	struct folio *folio = NULL;
353262306a36Sopenharmony_ci	int error;
353362306a36Sopenharmony_ci
353462306a36Sopenharmony_ci	if (!dentry) {
353562306a36Sopenharmony_ci		folio = filemap_get_folio(inode->i_mapping, 0);
353662306a36Sopenharmony_ci		if (IS_ERR(folio))
353762306a36Sopenharmony_ci			return ERR_PTR(-ECHILD);
353862306a36Sopenharmony_ci		if (PageHWPoison(folio_page(folio, 0)) ||
353962306a36Sopenharmony_ci		    !folio_test_uptodate(folio)) {
354062306a36Sopenharmony_ci			folio_put(folio);
354162306a36Sopenharmony_ci			return ERR_PTR(-ECHILD);
354262306a36Sopenharmony_ci		}
354362306a36Sopenharmony_ci	} else {
354462306a36Sopenharmony_ci		error = shmem_get_folio(inode, 0, &folio, SGP_READ);
354562306a36Sopenharmony_ci		if (error)
354662306a36Sopenharmony_ci			return ERR_PTR(error);
354762306a36Sopenharmony_ci		if (!folio)
354862306a36Sopenharmony_ci			return ERR_PTR(-ECHILD);
354962306a36Sopenharmony_ci		if (PageHWPoison(folio_page(folio, 0))) {
355062306a36Sopenharmony_ci			folio_unlock(folio);
355162306a36Sopenharmony_ci			folio_put(folio);
355262306a36Sopenharmony_ci			return ERR_PTR(-ECHILD);
355362306a36Sopenharmony_ci		}
355462306a36Sopenharmony_ci		folio_unlock(folio);
355562306a36Sopenharmony_ci	}
355662306a36Sopenharmony_ci	set_delayed_call(done, shmem_put_link, folio);
355762306a36Sopenharmony_ci	return folio_address(folio);
355862306a36Sopenharmony_ci}
355962306a36Sopenharmony_ci
356062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
356162306a36Sopenharmony_ci
356262306a36Sopenharmony_cistatic int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
356362306a36Sopenharmony_ci{
356462306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
356562306a36Sopenharmony_ci
356662306a36Sopenharmony_ci	fileattr_fill_flags(fa, info->fsflags & SHMEM_FL_USER_VISIBLE);
356762306a36Sopenharmony_ci
356862306a36Sopenharmony_ci	return 0;
356962306a36Sopenharmony_ci}
357062306a36Sopenharmony_ci
357162306a36Sopenharmony_cistatic int shmem_fileattr_set(struct mnt_idmap *idmap,
357262306a36Sopenharmony_ci			      struct dentry *dentry, struct fileattr *fa)
357362306a36Sopenharmony_ci{
357462306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
357562306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
357662306a36Sopenharmony_ci
357762306a36Sopenharmony_ci	if (fileattr_has_fsx(fa))
357862306a36Sopenharmony_ci		return -EOPNOTSUPP;
357962306a36Sopenharmony_ci	if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE)
358062306a36Sopenharmony_ci		return -EOPNOTSUPP;
358162306a36Sopenharmony_ci
358262306a36Sopenharmony_ci	info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) |
358362306a36Sopenharmony_ci		(fa->flags & SHMEM_FL_USER_MODIFIABLE);
358462306a36Sopenharmony_ci
358562306a36Sopenharmony_ci	shmem_set_inode_flags(inode, info->fsflags);
358662306a36Sopenharmony_ci	inode_set_ctime_current(inode);
358762306a36Sopenharmony_ci	inode_inc_iversion(inode);
358862306a36Sopenharmony_ci	return 0;
358962306a36Sopenharmony_ci}
359062306a36Sopenharmony_ci
359162306a36Sopenharmony_ci/*
359262306a36Sopenharmony_ci * Superblocks without xattr inode operations may get some security.* xattr
359362306a36Sopenharmony_ci * support from the LSM "for free". As soon as we have any other xattrs
359462306a36Sopenharmony_ci * like ACLs, we also need to implement the security.* handlers at
359562306a36Sopenharmony_ci * filesystem level, though.
359662306a36Sopenharmony_ci */
359762306a36Sopenharmony_ci
359862306a36Sopenharmony_ci/*
359962306a36Sopenharmony_ci * Callback for security_inode_init_security() for acquiring xattrs.
360062306a36Sopenharmony_ci */
360162306a36Sopenharmony_cistatic int shmem_initxattrs(struct inode *inode,
360262306a36Sopenharmony_ci			    const struct xattr *xattr_array,
360362306a36Sopenharmony_ci			    void *fs_info)
360462306a36Sopenharmony_ci{
360562306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
360662306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
360762306a36Sopenharmony_ci	const struct xattr *xattr;
360862306a36Sopenharmony_ci	struct simple_xattr *new_xattr;
360962306a36Sopenharmony_ci	size_t ispace = 0;
361062306a36Sopenharmony_ci	size_t len;
361162306a36Sopenharmony_ci
361262306a36Sopenharmony_ci	if (sbinfo->max_inodes) {
361362306a36Sopenharmony_ci		for (xattr = xattr_array; xattr->name != NULL; xattr++) {
361462306a36Sopenharmony_ci			ispace += simple_xattr_space(xattr->name,
361562306a36Sopenharmony_ci				xattr->value_len + XATTR_SECURITY_PREFIX_LEN);
361662306a36Sopenharmony_ci		}
361762306a36Sopenharmony_ci		if (ispace) {
361862306a36Sopenharmony_ci			raw_spin_lock(&sbinfo->stat_lock);
361962306a36Sopenharmony_ci			if (sbinfo->free_ispace < ispace)
362062306a36Sopenharmony_ci				ispace = 0;
362162306a36Sopenharmony_ci			else
362262306a36Sopenharmony_ci				sbinfo->free_ispace -= ispace;
362362306a36Sopenharmony_ci			raw_spin_unlock(&sbinfo->stat_lock);
362462306a36Sopenharmony_ci			if (!ispace)
362562306a36Sopenharmony_ci				return -ENOSPC;
362662306a36Sopenharmony_ci		}
362762306a36Sopenharmony_ci	}
362862306a36Sopenharmony_ci
362962306a36Sopenharmony_ci	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
363062306a36Sopenharmony_ci		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
363162306a36Sopenharmony_ci		if (!new_xattr)
363262306a36Sopenharmony_ci			break;
363362306a36Sopenharmony_ci
363462306a36Sopenharmony_ci		len = strlen(xattr->name) + 1;
363562306a36Sopenharmony_ci		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
363662306a36Sopenharmony_ci					  GFP_KERNEL_ACCOUNT);
363762306a36Sopenharmony_ci		if (!new_xattr->name) {
363862306a36Sopenharmony_ci			kvfree(new_xattr);
363962306a36Sopenharmony_ci			break;
364062306a36Sopenharmony_ci		}
364162306a36Sopenharmony_ci
364262306a36Sopenharmony_ci		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
364362306a36Sopenharmony_ci		       XATTR_SECURITY_PREFIX_LEN);
364462306a36Sopenharmony_ci		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
364562306a36Sopenharmony_ci		       xattr->name, len);
364662306a36Sopenharmony_ci
364762306a36Sopenharmony_ci		simple_xattr_add(&info->xattrs, new_xattr);
364862306a36Sopenharmony_ci	}
364962306a36Sopenharmony_ci
365062306a36Sopenharmony_ci	if (xattr->name != NULL) {
365162306a36Sopenharmony_ci		if (ispace) {
365262306a36Sopenharmony_ci			raw_spin_lock(&sbinfo->stat_lock);
365362306a36Sopenharmony_ci			sbinfo->free_ispace += ispace;
365462306a36Sopenharmony_ci			raw_spin_unlock(&sbinfo->stat_lock);
365562306a36Sopenharmony_ci		}
365662306a36Sopenharmony_ci		simple_xattrs_free(&info->xattrs, NULL);
365762306a36Sopenharmony_ci		return -ENOMEM;
365862306a36Sopenharmony_ci	}
365962306a36Sopenharmony_ci
366062306a36Sopenharmony_ci	return 0;
366162306a36Sopenharmony_ci}
366262306a36Sopenharmony_ci
366362306a36Sopenharmony_cistatic int shmem_xattr_handler_get(const struct xattr_handler *handler,
366462306a36Sopenharmony_ci				   struct dentry *unused, struct inode *inode,
366562306a36Sopenharmony_ci				   const char *name, void *buffer, size_t size)
366662306a36Sopenharmony_ci{
366762306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
366862306a36Sopenharmony_ci
366962306a36Sopenharmony_ci	name = xattr_full_name(handler, name);
367062306a36Sopenharmony_ci	return simple_xattr_get(&info->xattrs, name, buffer, size);
367162306a36Sopenharmony_ci}
367262306a36Sopenharmony_ci
367362306a36Sopenharmony_cistatic int shmem_xattr_handler_set(const struct xattr_handler *handler,
367462306a36Sopenharmony_ci				   struct mnt_idmap *idmap,
367562306a36Sopenharmony_ci				   struct dentry *unused, struct inode *inode,
367662306a36Sopenharmony_ci				   const char *name, const void *value,
367762306a36Sopenharmony_ci				   size_t size, int flags)
367862306a36Sopenharmony_ci{
367962306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(inode);
368062306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
368162306a36Sopenharmony_ci	struct simple_xattr *old_xattr;
368262306a36Sopenharmony_ci	size_t ispace = 0;
368362306a36Sopenharmony_ci
368462306a36Sopenharmony_ci	name = xattr_full_name(handler, name);
368562306a36Sopenharmony_ci	if (value && sbinfo->max_inodes) {
368662306a36Sopenharmony_ci		ispace = simple_xattr_space(name, size);
368762306a36Sopenharmony_ci		raw_spin_lock(&sbinfo->stat_lock);
368862306a36Sopenharmony_ci		if (sbinfo->free_ispace < ispace)
368962306a36Sopenharmony_ci			ispace = 0;
369062306a36Sopenharmony_ci		else
369162306a36Sopenharmony_ci			sbinfo->free_ispace -= ispace;
369262306a36Sopenharmony_ci		raw_spin_unlock(&sbinfo->stat_lock);
369362306a36Sopenharmony_ci		if (!ispace)
369462306a36Sopenharmony_ci			return -ENOSPC;
369562306a36Sopenharmony_ci	}
369662306a36Sopenharmony_ci
369762306a36Sopenharmony_ci	old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
369862306a36Sopenharmony_ci	if (!IS_ERR(old_xattr)) {
369962306a36Sopenharmony_ci		ispace = 0;
370062306a36Sopenharmony_ci		if (old_xattr && sbinfo->max_inodes)
370162306a36Sopenharmony_ci			ispace = simple_xattr_space(old_xattr->name,
370262306a36Sopenharmony_ci						    old_xattr->size);
370362306a36Sopenharmony_ci		simple_xattr_free(old_xattr);
370462306a36Sopenharmony_ci		old_xattr = NULL;
370562306a36Sopenharmony_ci		inode_set_ctime_current(inode);
370662306a36Sopenharmony_ci		inode_inc_iversion(inode);
370762306a36Sopenharmony_ci	}
370862306a36Sopenharmony_ci	if (ispace) {
370962306a36Sopenharmony_ci		raw_spin_lock(&sbinfo->stat_lock);
371062306a36Sopenharmony_ci		sbinfo->free_ispace += ispace;
371162306a36Sopenharmony_ci		raw_spin_unlock(&sbinfo->stat_lock);
371262306a36Sopenharmony_ci	}
371362306a36Sopenharmony_ci	return PTR_ERR(old_xattr);
371462306a36Sopenharmony_ci}
371562306a36Sopenharmony_ci
371662306a36Sopenharmony_cistatic const struct xattr_handler shmem_security_xattr_handler = {
371762306a36Sopenharmony_ci	.prefix = XATTR_SECURITY_PREFIX,
371862306a36Sopenharmony_ci	.get = shmem_xattr_handler_get,
371962306a36Sopenharmony_ci	.set = shmem_xattr_handler_set,
372062306a36Sopenharmony_ci};
372162306a36Sopenharmony_ci
372262306a36Sopenharmony_cistatic const struct xattr_handler shmem_trusted_xattr_handler = {
372362306a36Sopenharmony_ci	.prefix = XATTR_TRUSTED_PREFIX,
372462306a36Sopenharmony_ci	.get = shmem_xattr_handler_get,
372562306a36Sopenharmony_ci	.set = shmem_xattr_handler_set,
372662306a36Sopenharmony_ci};
372762306a36Sopenharmony_ci
372862306a36Sopenharmony_cistatic const struct xattr_handler shmem_user_xattr_handler = {
372962306a36Sopenharmony_ci	.prefix = XATTR_USER_PREFIX,
373062306a36Sopenharmony_ci	.get = shmem_xattr_handler_get,
373162306a36Sopenharmony_ci	.set = shmem_xattr_handler_set,
373262306a36Sopenharmony_ci};
373362306a36Sopenharmony_ci
373462306a36Sopenharmony_cistatic const struct xattr_handler *shmem_xattr_handlers[] = {
373562306a36Sopenharmony_ci	&shmem_security_xattr_handler,
373662306a36Sopenharmony_ci	&shmem_trusted_xattr_handler,
373762306a36Sopenharmony_ci	&shmem_user_xattr_handler,
373862306a36Sopenharmony_ci	NULL
373962306a36Sopenharmony_ci};
374062306a36Sopenharmony_ci
374162306a36Sopenharmony_cistatic ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
374262306a36Sopenharmony_ci{
374362306a36Sopenharmony_ci	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
374462306a36Sopenharmony_ci	return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
374562306a36Sopenharmony_ci}
374662306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_XATTR */
374762306a36Sopenharmony_ci
374862306a36Sopenharmony_cistatic const struct inode_operations shmem_short_symlink_operations = {
374962306a36Sopenharmony_ci	.getattr	= shmem_getattr,
375062306a36Sopenharmony_ci	.setattr	= shmem_setattr,
375162306a36Sopenharmony_ci	.get_link	= simple_get_link,
375262306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
375362306a36Sopenharmony_ci	.listxattr	= shmem_listxattr,
375462306a36Sopenharmony_ci#endif
375562306a36Sopenharmony_ci};
375662306a36Sopenharmony_ci
375762306a36Sopenharmony_cistatic const struct inode_operations shmem_symlink_inode_operations = {
375862306a36Sopenharmony_ci	.getattr	= shmem_getattr,
375962306a36Sopenharmony_ci	.setattr	= shmem_setattr,
376062306a36Sopenharmony_ci	.get_link	= shmem_get_link,
376162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
376262306a36Sopenharmony_ci	.listxattr	= shmem_listxattr,
376362306a36Sopenharmony_ci#endif
376462306a36Sopenharmony_ci};
376562306a36Sopenharmony_ci
376662306a36Sopenharmony_cistatic struct dentry *shmem_get_parent(struct dentry *child)
376762306a36Sopenharmony_ci{
376862306a36Sopenharmony_ci	return ERR_PTR(-ESTALE);
376962306a36Sopenharmony_ci}
377062306a36Sopenharmony_ci
377162306a36Sopenharmony_cistatic int shmem_match(struct inode *ino, void *vfh)
377262306a36Sopenharmony_ci{
377362306a36Sopenharmony_ci	__u32 *fh = vfh;
377462306a36Sopenharmony_ci	__u64 inum = fh[2];
377562306a36Sopenharmony_ci	inum = (inum << 32) | fh[1];
377662306a36Sopenharmony_ci	return ino->i_ino == inum && fh[0] == ino->i_generation;
377762306a36Sopenharmony_ci}
377862306a36Sopenharmony_ci
377962306a36Sopenharmony_ci/* Find any alias of inode, but prefer a hashed alias */
378062306a36Sopenharmony_cistatic struct dentry *shmem_find_alias(struct inode *inode)
378162306a36Sopenharmony_ci{
378262306a36Sopenharmony_ci	struct dentry *alias = d_find_alias(inode);
378362306a36Sopenharmony_ci
378462306a36Sopenharmony_ci	return alias ?: d_find_any_alias(inode);
378562306a36Sopenharmony_ci}
378662306a36Sopenharmony_ci
378762306a36Sopenharmony_ci
378862306a36Sopenharmony_cistatic struct dentry *shmem_fh_to_dentry(struct super_block *sb,
378962306a36Sopenharmony_ci		struct fid *fid, int fh_len, int fh_type)
379062306a36Sopenharmony_ci{
379162306a36Sopenharmony_ci	struct inode *inode;
379262306a36Sopenharmony_ci	struct dentry *dentry = NULL;
379362306a36Sopenharmony_ci	u64 inum;
379462306a36Sopenharmony_ci
379562306a36Sopenharmony_ci	if (fh_len < 3)
379662306a36Sopenharmony_ci		return NULL;
379762306a36Sopenharmony_ci
379862306a36Sopenharmony_ci	inum = fid->raw[2];
379962306a36Sopenharmony_ci	inum = (inum << 32) | fid->raw[1];
380062306a36Sopenharmony_ci
380162306a36Sopenharmony_ci	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
380262306a36Sopenharmony_ci			shmem_match, fid->raw);
380362306a36Sopenharmony_ci	if (inode) {
380462306a36Sopenharmony_ci		dentry = shmem_find_alias(inode);
380562306a36Sopenharmony_ci		iput(inode);
380662306a36Sopenharmony_ci	}
380762306a36Sopenharmony_ci
380862306a36Sopenharmony_ci	return dentry;
380962306a36Sopenharmony_ci}
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_cistatic int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
381262306a36Sopenharmony_ci				struct inode *parent)
381362306a36Sopenharmony_ci{
381462306a36Sopenharmony_ci	if (*len < 3) {
381562306a36Sopenharmony_ci		*len = 3;
381662306a36Sopenharmony_ci		return FILEID_INVALID;
381762306a36Sopenharmony_ci	}
381862306a36Sopenharmony_ci
381962306a36Sopenharmony_ci	if (inode_unhashed(inode)) {
382062306a36Sopenharmony_ci		/* Unfortunately insert_inode_hash is not idempotent,
382162306a36Sopenharmony_ci		 * so as we hash inodes here rather than at creation
382262306a36Sopenharmony_ci		 * time, we need a lock to ensure we only try
382362306a36Sopenharmony_ci		 * to do it once
382462306a36Sopenharmony_ci		 */
382562306a36Sopenharmony_ci		static DEFINE_SPINLOCK(lock);
382662306a36Sopenharmony_ci		spin_lock(&lock);
382762306a36Sopenharmony_ci		if (inode_unhashed(inode))
382862306a36Sopenharmony_ci			__insert_inode_hash(inode,
382962306a36Sopenharmony_ci					    inode->i_ino + inode->i_generation);
383062306a36Sopenharmony_ci		spin_unlock(&lock);
383162306a36Sopenharmony_ci	}
383262306a36Sopenharmony_ci
383362306a36Sopenharmony_ci	fh[0] = inode->i_generation;
383462306a36Sopenharmony_ci	fh[1] = inode->i_ino;
383562306a36Sopenharmony_ci	fh[2] = ((__u64)inode->i_ino) >> 32;
383662306a36Sopenharmony_ci
383762306a36Sopenharmony_ci	*len = 3;
383862306a36Sopenharmony_ci	return 1;
383962306a36Sopenharmony_ci}
384062306a36Sopenharmony_ci
384162306a36Sopenharmony_cistatic const struct export_operations shmem_export_ops = {
384262306a36Sopenharmony_ci	.get_parent     = shmem_get_parent,
384362306a36Sopenharmony_ci	.encode_fh      = shmem_encode_fh,
384462306a36Sopenharmony_ci	.fh_to_dentry	= shmem_fh_to_dentry,
384562306a36Sopenharmony_ci};
384662306a36Sopenharmony_ci
384762306a36Sopenharmony_cienum shmem_param {
384862306a36Sopenharmony_ci	Opt_gid,
384962306a36Sopenharmony_ci	Opt_huge,
385062306a36Sopenharmony_ci	Opt_mode,
385162306a36Sopenharmony_ci	Opt_mpol,
385262306a36Sopenharmony_ci	Opt_nr_blocks,
385362306a36Sopenharmony_ci	Opt_nr_inodes,
385462306a36Sopenharmony_ci	Opt_size,
385562306a36Sopenharmony_ci	Opt_uid,
385662306a36Sopenharmony_ci	Opt_inode32,
385762306a36Sopenharmony_ci	Opt_inode64,
385862306a36Sopenharmony_ci	Opt_noswap,
385962306a36Sopenharmony_ci	Opt_quota,
386062306a36Sopenharmony_ci	Opt_usrquota,
386162306a36Sopenharmony_ci	Opt_grpquota,
386262306a36Sopenharmony_ci	Opt_usrquota_block_hardlimit,
386362306a36Sopenharmony_ci	Opt_usrquota_inode_hardlimit,
386462306a36Sopenharmony_ci	Opt_grpquota_block_hardlimit,
386562306a36Sopenharmony_ci	Opt_grpquota_inode_hardlimit,
386662306a36Sopenharmony_ci};
386762306a36Sopenharmony_ci
386862306a36Sopenharmony_cistatic const struct constant_table shmem_param_enums_huge[] = {
386962306a36Sopenharmony_ci	{"never",	SHMEM_HUGE_NEVER },
387062306a36Sopenharmony_ci	{"always",	SHMEM_HUGE_ALWAYS },
387162306a36Sopenharmony_ci	{"within_size",	SHMEM_HUGE_WITHIN_SIZE },
387262306a36Sopenharmony_ci	{"advise",	SHMEM_HUGE_ADVISE },
387362306a36Sopenharmony_ci	{}
387462306a36Sopenharmony_ci};
387562306a36Sopenharmony_ci
387662306a36Sopenharmony_ciconst struct fs_parameter_spec shmem_fs_parameters[] = {
387762306a36Sopenharmony_ci	fsparam_u32   ("gid",		Opt_gid),
387862306a36Sopenharmony_ci	fsparam_enum  ("huge",		Opt_huge,  shmem_param_enums_huge),
387962306a36Sopenharmony_ci	fsparam_u32oct("mode",		Opt_mode),
388062306a36Sopenharmony_ci	fsparam_string("mpol",		Opt_mpol),
388162306a36Sopenharmony_ci	fsparam_string("nr_blocks",	Opt_nr_blocks),
388262306a36Sopenharmony_ci	fsparam_string("nr_inodes",	Opt_nr_inodes),
388362306a36Sopenharmony_ci	fsparam_string("size",		Opt_size),
388462306a36Sopenharmony_ci	fsparam_u32   ("uid",		Opt_uid),
388562306a36Sopenharmony_ci	fsparam_flag  ("inode32",	Opt_inode32),
388662306a36Sopenharmony_ci	fsparam_flag  ("inode64",	Opt_inode64),
388762306a36Sopenharmony_ci	fsparam_flag  ("noswap",	Opt_noswap),
388862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
388962306a36Sopenharmony_ci	fsparam_flag  ("quota",		Opt_quota),
389062306a36Sopenharmony_ci	fsparam_flag  ("usrquota",	Opt_usrquota),
389162306a36Sopenharmony_ci	fsparam_flag  ("grpquota",	Opt_grpquota),
389262306a36Sopenharmony_ci	fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit),
389362306a36Sopenharmony_ci	fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit),
389462306a36Sopenharmony_ci	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
389562306a36Sopenharmony_ci	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
389662306a36Sopenharmony_ci#endif
389762306a36Sopenharmony_ci	{}
389862306a36Sopenharmony_ci};
389962306a36Sopenharmony_ci
390062306a36Sopenharmony_cistatic int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
390162306a36Sopenharmony_ci{
390262306a36Sopenharmony_ci	struct shmem_options *ctx = fc->fs_private;
390362306a36Sopenharmony_ci	struct fs_parse_result result;
390462306a36Sopenharmony_ci	unsigned long long size;
390562306a36Sopenharmony_ci	char *rest;
390662306a36Sopenharmony_ci	int opt;
390762306a36Sopenharmony_ci	kuid_t kuid;
390862306a36Sopenharmony_ci	kgid_t kgid;
390962306a36Sopenharmony_ci
391062306a36Sopenharmony_ci	opt = fs_parse(fc, shmem_fs_parameters, param, &result);
391162306a36Sopenharmony_ci	if (opt < 0)
391262306a36Sopenharmony_ci		return opt;
391362306a36Sopenharmony_ci
391462306a36Sopenharmony_ci	switch (opt) {
391562306a36Sopenharmony_ci	case Opt_size:
391662306a36Sopenharmony_ci		size = memparse(param->string, &rest);
391762306a36Sopenharmony_ci		if (*rest == '%') {
391862306a36Sopenharmony_ci			size <<= PAGE_SHIFT;
391962306a36Sopenharmony_ci			size *= totalram_pages();
392062306a36Sopenharmony_ci			do_div(size, 100);
392162306a36Sopenharmony_ci			rest++;
392262306a36Sopenharmony_ci		}
392362306a36Sopenharmony_ci		if (*rest)
392462306a36Sopenharmony_ci			goto bad_value;
392562306a36Sopenharmony_ci		ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE);
392662306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_BLOCKS;
392762306a36Sopenharmony_ci		break;
392862306a36Sopenharmony_ci	case Opt_nr_blocks:
392962306a36Sopenharmony_ci		ctx->blocks = memparse(param->string, &rest);
393062306a36Sopenharmony_ci		if (*rest || ctx->blocks > LONG_MAX)
393162306a36Sopenharmony_ci			goto bad_value;
393262306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_BLOCKS;
393362306a36Sopenharmony_ci		break;
393462306a36Sopenharmony_ci	case Opt_nr_inodes:
393562306a36Sopenharmony_ci		ctx->inodes = memparse(param->string, &rest);
393662306a36Sopenharmony_ci		if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE)
393762306a36Sopenharmony_ci			goto bad_value;
393862306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_INODES;
393962306a36Sopenharmony_ci		break;
394062306a36Sopenharmony_ci	case Opt_mode:
394162306a36Sopenharmony_ci		ctx->mode = result.uint_32 & 07777;
394262306a36Sopenharmony_ci		break;
394362306a36Sopenharmony_ci	case Opt_uid:
394462306a36Sopenharmony_ci		kuid = make_kuid(current_user_ns(), result.uint_32);
394562306a36Sopenharmony_ci		if (!uid_valid(kuid))
394662306a36Sopenharmony_ci			goto bad_value;
394762306a36Sopenharmony_ci
394862306a36Sopenharmony_ci		/*
394962306a36Sopenharmony_ci		 * The requested uid must be representable in the
395062306a36Sopenharmony_ci		 * filesystem's idmapping.
395162306a36Sopenharmony_ci		 */
395262306a36Sopenharmony_ci		if (!kuid_has_mapping(fc->user_ns, kuid))
395362306a36Sopenharmony_ci			goto bad_value;
395462306a36Sopenharmony_ci
395562306a36Sopenharmony_ci		ctx->uid = kuid;
395662306a36Sopenharmony_ci		break;
395762306a36Sopenharmony_ci	case Opt_gid:
395862306a36Sopenharmony_ci		kgid = make_kgid(current_user_ns(), result.uint_32);
395962306a36Sopenharmony_ci		if (!gid_valid(kgid))
396062306a36Sopenharmony_ci			goto bad_value;
396162306a36Sopenharmony_ci
396262306a36Sopenharmony_ci		/*
396362306a36Sopenharmony_ci		 * The requested gid must be representable in the
396462306a36Sopenharmony_ci		 * filesystem's idmapping.
396562306a36Sopenharmony_ci		 */
396662306a36Sopenharmony_ci		if (!kgid_has_mapping(fc->user_ns, kgid))
396762306a36Sopenharmony_ci			goto bad_value;
396862306a36Sopenharmony_ci
396962306a36Sopenharmony_ci		ctx->gid = kgid;
397062306a36Sopenharmony_ci		break;
397162306a36Sopenharmony_ci	case Opt_huge:
397262306a36Sopenharmony_ci		ctx->huge = result.uint_32;
397362306a36Sopenharmony_ci		if (ctx->huge != SHMEM_HUGE_NEVER &&
397462306a36Sopenharmony_ci		    !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
397562306a36Sopenharmony_ci		      has_transparent_hugepage()))
397662306a36Sopenharmony_ci			goto unsupported_parameter;
397762306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_HUGE;
397862306a36Sopenharmony_ci		break;
397962306a36Sopenharmony_ci	case Opt_mpol:
398062306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_NUMA)) {
398162306a36Sopenharmony_ci			mpol_put(ctx->mpol);
398262306a36Sopenharmony_ci			ctx->mpol = NULL;
398362306a36Sopenharmony_ci			if (mpol_parse_str(param->string, &ctx->mpol))
398462306a36Sopenharmony_ci				goto bad_value;
398562306a36Sopenharmony_ci			break;
398662306a36Sopenharmony_ci		}
398762306a36Sopenharmony_ci		goto unsupported_parameter;
398862306a36Sopenharmony_ci	case Opt_inode32:
398962306a36Sopenharmony_ci		ctx->full_inums = false;
399062306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_INUMS;
399162306a36Sopenharmony_ci		break;
399262306a36Sopenharmony_ci	case Opt_inode64:
399362306a36Sopenharmony_ci		if (sizeof(ino_t) < 8) {
399462306a36Sopenharmony_ci			return invalfc(fc,
399562306a36Sopenharmony_ci				       "Cannot use inode64 with <64bit inums in kernel\n");
399662306a36Sopenharmony_ci		}
399762306a36Sopenharmony_ci		ctx->full_inums = true;
399862306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_INUMS;
399962306a36Sopenharmony_ci		break;
400062306a36Sopenharmony_ci	case Opt_noswap:
400162306a36Sopenharmony_ci		if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) {
400262306a36Sopenharmony_ci			return invalfc(fc,
400362306a36Sopenharmony_ci				       "Turning off swap in unprivileged tmpfs mounts unsupported");
400462306a36Sopenharmony_ci		}
400562306a36Sopenharmony_ci		ctx->noswap = true;
400662306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_NOSWAP;
400762306a36Sopenharmony_ci		break;
400862306a36Sopenharmony_ci	case Opt_quota:
400962306a36Sopenharmony_ci		if (fc->user_ns != &init_user_ns)
401062306a36Sopenharmony_ci			return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
401162306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_QUOTA;
401262306a36Sopenharmony_ci		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
401362306a36Sopenharmony_ci		break;
401462306a36Sopenharmony_ci	case Opt_usrquota:
401562306a36Sopenharmony_ci		if (fc->user_ns != &init_user_ns)
401662306a36Sopenharmony_ci			return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
401762306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_QUOTA;
401862306a36Sopenharmony_ci		ctx->quota_types |= QTYPE_MASK_USR;
401962306a36Sopenharmony_ci		break;
402062306a36Sopenharmony_ci	case Opt_grpquota:
402162306a36Sopenharmony_ci		if (fc->user_ns != &init_user_ns)
402262306a36Sopenharmony_ci			return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
402362306a36Sopenharmony_ci		ctx->seen |= SHMEM_SEEN_QUOTA;
402462306a36Sopenharmony_ci		ctx->quota_types |= QTYPE_MASK_GRP;
402562306a36Sopenharmony_ci		break;
402662306a36Sopenharmony_ci	case Opt_usrquota_block_hardlimit:
402762306a36Sopenharmony_ci		size = memparse(param->string, &rest);
402862306a36Sopenharmony_ci		if (*rest || !size)
402962306a36Sopenharmony_ci			goto bad_value;
403062306a36Sopenharmony_ci		if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
403162306a36Sopenharmony_ci			return invalfc(fc,
403262306a36Sopenharmony_ci				       "User quota block hardlimit too large.");
403362306a36Sopenharmony_ci		ctx->qlimits.usrquota_bhardlimit = size;
403462306a36Sopenharmony_ci		break;
403562306a36Sopenharmony_ci	case Opt_grpquota_block_hardlimit:
403662306a36Sopenharmony_ci		size = memparse(param->string, &rest);
403762306a36Sopenharmony_ci		if (*rest || !size)
403862306a36Sopenharmony_ci			goto bad_value;
403962306a36Sopenharmony_ci		if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
404062306a36Sopenharmony_ci			return invalfc(fc,
404162306a36Sopenharmony_ci				       "Group quota block hardlimit too large.");
404262306a36Sopenharmony_ci		ctx->qlimits.grpquota_bhardlimit = size;
404362306a36Sopenharmony_ci		break;
404462306a36Sopenharmony_ci	case Opt_usrquota_inode_hardlimit:
404562306a36Sopenharmony_ci		size = memparse(param->string, &rest);
404662306a36Sopenharmony_ci		if (*rest || !size)
404762306a36Sopenharmony_ci			goto bad_value;
404862306a36Sopenharmony_ci		if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
404962306a36Sopenharmony_ci			return invalfc(fc,
405062306a36Sopenharmony_ci				       "User quota inode hardlimit too large.");
405162306a36Sopenharmony_ci		ctx->qlimits.usrquota_ihardlimit = size;
405262306a36Sopenharmony_ci		break;
405362306a36Sopenharmony_ci	case Opt_grpquota_inode_hardlimit:
405462306a36Sopenharmony_ci		size = memparse(param->string, &rest);
405562306a36Sopenharmony_ci		if (*rest || !size)
405662306a36Sopenharmony_ci			goto bad_value;
405762306a36Sopenharmony_ci		if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
405862306a36Sopenharmony_ci			return invalfc(fc,
405962306a36Sopenharmony_ci				       "Group quota inode hardlimit too large.");
406062306a36Sopenharmony_ci		ctx->qlimits.grpquota_ihardlimit = size;
406162306a36Sopenharmony_ci		break;
406262306a36Sopenharmony_ci	}
406362306a36Sopenharmony_ci	return 0;
406462306a36Sopenharmony_ci
406562306a36Sopenharmony_ciunsupported_parameter:
406662306a36Sopenharmony_ci	return invalfc(fc, "Unsupported parameter '%s'", param->key);
406762306a36Sopenharmony_cibad_value:
406862306a36Sopenharmony_ci	return invalfc(fc, "Bad value for '%s'", param->key);
406962306a36Sopenharmony_ci}
407062306a36Sopenharmony_ci
407162306a36Sopenharmony_cistatic int shmem_parse_options(struct fs_context *fc, void *data)
407262306a36Sopenharmony_ci{
407362306a36Sopenharmony_ci	char *options = data;
407462306a36Sopenharmony_ci
407562306a36Sopenharmony_ci	if (options) {
407662306a36Sopenharmony_ci		int err = security_sb_eat_lsm_opts(options, &fc->security);
407762306a36Sopenharmony_ci		if (err)
407862306a36Sopenharmony_ci			return err;
407962306a36Sopenharmony_ci	}
408062306a36Sopenharmony_ci
408162306a36Sopenharmony_ci	while (options != NULL) {
408262306a36Sopenharmony_ci		char *this_char = options;
408362306a36Sopenharmony_ci		for (;;) {
408462306a36Sopenharmony_ci			/*
408562306a36Sopenharmony_ci			 * NUL-terminate this option: unfortunately,
408662306a36Sopenharmony_ci			 * mount options form a comma-separated list,
408762306a36Sopenharmony_ci			 * but mpol's nodelist may also contain commas.
408862306a36Sopenharmony_ci			 */
408962306a36Sopenharmony_ci			options = strchr(options, ',');
409062306a36Sopenharmony_ci			if (options == NULL)
409162306a36Sopenharmony_ci				break;
409262306a36Sopenharmony_ci			options++;
409362306a36Sopenharmony_ci			if (!isdigit(*options)) {
409462306a36Sopenharmony_ci				options[-1] = '\0';
409562306a36Sopenharmony_ci				break;
409662306a36Sopenharmony_ci			}
409762306a36Sopenharmony_ci		}
409862306a36Sopenharmony_ci		if (*this_char) {
409962306a36Sopenharmony_ci			char *value = strchr(this_char, '=');
410062306a36Sopenharmony_ci			size_t len = 0;
410162306a36Sopenharmony_ci			int err;
410262306a36Sopenharmony_ci
410362306a36Sopenharmony_ci			if (value) {
410462306a36Sopenharmony_ci				*value++ = '\0';
410562306a36Sopenharmony_ci				len = strlen(value);
410662306a36Sopenharmony_ci			}
410762306a36Sopenharmony_ci			err = vfs_parse_fs_string(fc, this_char, value, len);
410862306a36Sopenharmony_ci			if (err < 0)
410962306a36Sopenharmony_ci				return err;
411062306a36Sopenharmony_ci		}
411162306a36Sopenharmony_ci	}
411262306a36Sopenharmony_ci	return 0;
411362306a36Sopenharmony_ci}
411462306a36Sopenharmony_ci
411562306a36Sopenharmony_ci/*
411662306a36Sopenharmony_ci * Reconfigure a shmem filesystem.
411762306a36Sopenharmony_ci */
411862306a36Sopenharmony_cistatic int shmem_reconfigure(struct fs_context *fc)
411962306a36Sopenharmony_ci{
412062306a36Sopenharmony_ci	struct shmem_options *ctx = fc->fs_private;
412162306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
412262306a36Sopenharmony_ci	unsigned long used_isp;
412362306a36Sopenharmony_ci	struct mempolicy *mpol = NULL;
412462306a36Sopenharmony_ci	const char *err;
412562306a36Sopenharmony_ci
412662306a36Sopenharmony_ci	raw_spin_lock(&sbinfo->stat_lock);
412762306a36Sopenharmony_ci	used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace;
412862306a36Sopenharmony_ci
412962306a36Sopenharmony_ci	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
413062306a36Sopenharmony_ci		if (!sbinfo->max_blocks) {
413162306a36Sopenharmony_ci			err = "Cannot retroactively limit size";
413262306a36Sopenharmony_ci			goto out;
413362306a36Sopenharmony_ci		}
413462306a36Sopenharmony_ci		if (percpu_counter_compare(&sbinfo->used_blocks,
413562306a36Sopenharmony_ci					   ctx->blocks) > 0) {
413662306a36Sopenharmony_ci			err = "Too small a size for current use";
413762306a36Sopenharmony_ci			goto out;
413862306a36Sopenharmony_ci		}
413962306a36Sopenharmony_ci	}
414062306a36Sopenharmony_ci	if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) {
414162306a36Sopenharmony_ci		if (!sbinfo->max_inodes) {
414262306a36Sopenharmony_ci			err = "Cannot retroactively limit inodes";
414362306a36Sopenharmony_ci			goto out;
414462306a36Sopenharmony_ci		}
414562306a36Sopenharmony_ci		if (ctx->inodes * BOGO_INODE_SIZE < used_isp) {
414662306a36Sopenharmony_ci			err = "Too few inodes for current use";
414762306a36Sopenharmony_ci			goto out;
414862306a36Sopenharmony_ci		}
414962306a36Sopenharmony_ci	}
415062306a36Sopenharmony_ci
415162306a36Sopenharmony_ci	if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums &&
415262306a36Sopenharmony_ci	    sbinfo->next_ino > UINT_MAX) {
415362306a36Sopenharmony_ci		err = "Current inum too high to switch to 32-bit inums";
415462306a36Sopenharmony_ci		goto out;
415562306a36Sopenharmony_ci	}
415662306a36Sopenharmony_ci	if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) {
415762306a36Sopenharmony_ci		err = "Cannot disable swap on remount";
415862306a36Sopenharmony_ci		goto out;
415962306a36Sopenharmony_ci	}
416062306a36Sopenharmony_ci	if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) {
416162306a36Sopenharmony_ci		err = "Cannot enable swap on remount if it was disabled on first mount";
416262306a36Sopenharmony_ci		goto out;
416362306a36Sopenharmony_ci	}
416462306a36Sopenharmony_ci
416562306a36Sopenharmony_ci	if (ctx->seen & SHMEM_SEEN_QUOTA &&
416662306a36Sopenharmony_ci	    !sb_any_quota_loaded(fc->root->d_sb)) {
416762306a36Sopenharmony_ci		err = "Cannot enable quota on remount";
416862306a36Sopenharmony_ci		goto out;
416962306a36Sopenharmony_ci	}
417062306a36Sopenharmony_ci
417162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
417262306a36Sopenharmony_ci#define CHANGED_LIMIT(name)						\
417362306a36Sopenharmony_ci	(ctx->qlimits.name## hardlimit &&				\
417462306a36Sopenharmony_ci	(ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit))
417562306a36Sopenharmony_ci
417662306a36Sopenharmony_ci	if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) ||
417762306a36Sopenharmony_ci	    CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) {
417862306a36Sopenharmony_ci		err = "Cannot change global quota limit on remount";
417962306a36Sopenharmony_ci		goto out;
418062306a36Sopenharmony_ci	}
418162306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */
418262306a36Sopenharmony_ci
418362306a36Sopenharmony_ci	if (ctx->seen & SHMEM_SEEN_HUGE)
418462306a36Sopenharmony_ci		sbinfo->huge = ctx->huge;
418562306a36Sopenharmony_ci	if (ctx->seen & SHMEM_SEEN_INUMS)
418662306a36Sopenharmony_ci		sbinfo->full_inums = ctx->full_inums;
418762306a36Sopenharmony_ci	if (ctx->seen & SHMEM_SEEN_BLOCKS)
418862306a36Sopenharmony_ci		sbinfo->max_blocks  = ctx->blocks;
418962306a36Sopenharmony_ci	if (ctx->seen & SHMEM_SEEN_INODES) {
419062306a36Sopenharmony_ci		sbinfo->max_inodes  = ctx->inodes;
419162306a36Sopenharmony_ci		sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp;
419262306a36Sopenharmony_ci	}
419362306a36Sopenharmony_ci
419462306a36Sopenharmony_ci	/*
419562306a36Sopenharmony_ci	 * Preserve previous mempolicy unless mpol remount option was specified.
419662306a36Sopenharmony_ci	 */
419762306a36Sopenharmony_ci	if (ctx->mpol) {
419862306a36Sopenharmony_ci		mpol = sbinfo->mpol;
419962306a36Sopenharmony_ci		sbinfo->mpol = ctx->mpol;	/* transfers initial ref */
420062306a36Sopenharmony_ci		ctx->mpol = NULL;
420162306a36Sopenharmony_ci	}
420262306a36Sopenharmony_ci
420362306a36Sopenharmony_ci	if (ctx->noswap)
420462306a36Sopenharmony_ci		sbinfo->noswap = true;
420562306a36Sopenharmony_ci
420662306a36Sopenharmony_ci	raw_spin_unlock(&sbinfo->stat_lock);
420762306a36Sopenharmony_ci	mpol_put(mpol);
420862306a36Sopenharmony_ci	return 0;
420962306a36Sopenharmony_ciout:
421062306a36Sopenharmony_ci	raw_spin_unlock(&sbinfo->stat_lock);
421162306a36Sopenharmony_ci	return invalfc(fc, "%s", err);
421262306a36Sopenharmony_ci}
421362306a36Sopenharmony_ci
421462306a36Sopenharmony_cistatic int shmem_show_options(struct seq_file *seq, struct dentry *root)
421562306a36Sopenharmony_ci{
421662306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
421762306a36Sopenharmony_ci	struct mempolicy *mpol;
421862306a36Sopenharmony_ci
421962306a36Sopenharmony_ci	if (sbinfo->max_blocks != shmem_default_max_blocks())
422062306a36Sopenharmony_ci		seq_printf(seq, ",size=%luk", K(sbinfo->max_blocks));
422162306a36Sopenharmony_ci	if (sbinfo->max_inodes != shmem_default_max_inodes())
422262306a36Sopenharmony_ci		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
422362306a36Sopenharmony_ci	if (sbinfo->mode != (0777 | S_ISVTX))
422462306a36Sopenharmony_ci		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
422562306a36Sopenharmony_ci	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
422662306a36Sopenharmony_ci		seq_printf(seq, ",uid=%u",
422762306a36Sopenharmony_ci				from_kuid_munged(&init_user_ns, sbinfo->uid));
422862306a36Sopenharmony_ci	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
422962306a36Sopenharmony_ci		seq_printf(seq, ",gid=%u",
423062306a36Sopenharmony_ci				from_kgid_munged(&init_user_ns, sbinfo->gid));
423162306a36Sopenharmony_ci
423262306a36Sopenharmony_ci	/*
423362306a36Sopenharmony_ci	 * Showing inode{64,32} might be useful even if it's the system default,
423462306a36Sopenharmony_ci	 * since then people don't have to resort to checking both here and
423562306a36Sopenharmony_ci	 * /proc/config.gz to confirm 64-bit inums were successfully applied
423662306a36Sopenharmony_ci	 * (which may not even exist if IKCONFIG_PROC isn't enabled).
423762306a36Sopenharmony_ci	 *
423862306a36Sopenharmony_ci	 * We hide it when inode64 isn't the default and we are using 32-bit
423962306a36Sopenharmony_ci	 * inodes, since that probably just means the feature isn't even under
424062306a36Sopenharmony_ci	 * consideration.
424162306a36Sopenharmony_ci	 *
424262306a36Sopenharmony_ci	 * As such:
424362306a36Sopenharmony_ci	 *
424462306a36Sopenharmony_ci	 *                     +-----------------+-----------------+
424562306a36Sopenharmony_ci	 *                     | TMPFS_INODE64=y | TMPFS_INODE64=n |
424662306a36Sopenharmony_ci	 *  +------------------+-----------------+-----------------+
424762306a36Sopenharmony_ci	 *  | full_inums=true  | show            | show            |
424862306a36Sopenharmony_ci	 *  | full_inums=false | show            | hide            |
424962306a36Sopenharmony_ci	 *  +------------------+-----------------+-----------------+
425062306a36Sopenharmony_ci	 *
425162306a36Sopenharmony_ci	 */
425262306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums)
425362306a36Sopenharmony_ci		seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32));
425462306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
425562306a36Sopenharmony_ci	/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
425662306a36Sopenharmony_ci	if (sbinfo->huge)
425762306a36Sopenharmony_ci		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
425862306a36Sopenharmony_ci#endif
425962306a36Sopenharmony_ci	mpol = shmem_get_sbmpol(sbinfo);
426062306a36Sopenharmony_ci	shmem_show_mpol(seq, mpol);
426162306a36Sopenharmony_ci	mpol_put(mpol);
426262306a36Sopenharmony_ci	if (sbinfo->noswap)
426362306a36Sopenharmony_ci		seq_printf(seq, ",noswap");
426462306a36Sopenharmony_ci	return 0;
426562306a36Sopenharmony_ci}
426662306a36Sopenharmony_ci
426762306a36Sopenharmony_ci#endif /* CONFIG_TMPFS */
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_cistatic void shmem_put_super(struct super_block *sb)
427062306a36Sopenharmony_ci{
427162306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
427262306a36Sopenharmony_ci
427362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
427462306a36Sopenharmony_ci	shmem_disable_quotas(sb);
427562306a36Sopenharmony_ci#endif
427662306a36Sopenharmony_ci	free_percpu(sbinfo->ino_batch);
427762306a36Sopenharmony_ci	percpu_counter_destroy(&sbinfo->used_blocks);
427862306a36Sopenharmony_ci	mpol_put(sbinfo->mpol);
427962306a36Sopenharmony_ci	kfree(sbinfo);
428062306a36Sopenharmony_ci	sb->s_fs_info = NULL;
428162306a36Sopenharmony_ci}
428262306a36Sopenharmony_ci
428362306a36Sopenharmony_cistatic int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
428462306a36Sopenharmony_ci{
428562306a36Sopenharmony_ci	struct shmem_options *ctx = fc->fs_private;
428662306a36Sopenharmony_ci	struct inode *inode;
428762306a36Sopenharmony_ci	struct shmem_sb_info *sbinfo;
428862306a36Sopenharmony_ci	int error = -ENOMEM;
428962306a36Sopenharmony_ci
429062306a36Sopenharmony_ci	/* Round up to L1_CACHE_BYTES to resist false sharing */
429162306a36Sopenharmony_ci	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
429262306a36Sopenharmony_ci				L1_CACHE_BYTES), GFP_KERNEL);
429362306a36Sopenharmony_ci	if (!sbinfo)
429462306a36Sopenharmony_ci		return error;
429562306a36Sopenharmony_ci
429662306a36Sopenharmony_ci	sb->s_fs_info = sbinfo;
429762306a36Sopenharmony_ci
429862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
429962306a36Sopenharmony_ci	/*
430062306a36Sopenharmony_ci	 * Per default we only allow half of the physical ram per
430162306a36Sopenharmony_ci	 * tmpfs instance, limiting inodes to one per page of lowmem;
430262306a36Sopenharmony_ci	 * but the internal instance is left unlimited.
430362306a36Sopenharmony_ci	 */
430462306a36Sopenharmony_ci	if (!(sb->s_flags & SB_KERNMOUNT)) {
430562306a36Sopenharmony_ci		if (!(ctx->seen & SHMEM_SEEN_BLOCKS))
430662306a36Sopenharmony_ci			ctx->blocks = shmem_default_max_blocks();
430762306a36Sopenharmony_ci		if (!(ctx->seen & SHMEM_SEEN_INODES))
430862306a36Sopenharmony_ci			ctx->inodes = shmem_default_max_inodes();
430962306a36Sopenharmony_ci		if (!(ctx->seen & SHMEM_SEEN_INUMS))
431062306a36Sopenharmony_ci			ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
431162306a36Sopenharmony_ci		sbinfo->noswap = ctx->noswap;
431262306a36Sopenharmony_ci	} else {
431362306a36Sopenharmony_ci		sb->s_flags |= SB_NOUSER;
431462306a36Sopenharmony_ci	}
431562306a36Sopenharmony_ci	sb->s_export_op = &shmem_export_ops;
431662306a36Sopenharmony_ci	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
431762306a36Sopenharmony_ci#else
431862306a36Sopenharmony_ci	sb->s_flags |= SB_NOUSER;
431962306a36Sopenharmony_ci#endif
432062306a36Sopenharmony_ci	sbinfo->max_blocks = ctx->blocks;
432162306a36Sopenharmony_ci	sbinfo->max_inodes = ctx->inodes;
432262306a36Sopenharmony_ci	sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
432362306a36Sopenharmony_ci	if (sb->s_flags & SB_KERNMOUNT) {
432462306a36Sopenharmony_ci		sbinfo->ino_batch = alloc_percpu(ino_t);
432562306a36Sopenharmony_ci		if (!sbinfo->ino_batch)
432662306a36Sopenharmony_ci			goto failed;
432762306a36Sopenharmony_ci	}
432862306a36Sopenharmony_ci	sbinfo->uid = ctx->uid;
432962306a36Sopenharmony_ci	sbinfo->gid = ctx->gid;
433062306a36Sopenharmony_ci	sbinfo->full_inums = ctx->full_inums;
433162306a36Sopenharmony_ci	sbinfo->mode = ctx->mode;
433262306a36Sopenharmony_ci	sbinfo->huge = ctx->huge;
433362306a36Sopenharmony_ci	sbinfo->mpol = ctx->mpol;
433462306a36Sopenharmony_ci	ctx->mpol = NULL;
433562306a36Sopenharmony_ci
433662306a36Sopenharmony_ci	raw_spin_lock_init(&sbinfo->stat_lock);
433762306a36Sopenharmony_ci	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
433862306a36Sopenharmony_ci		goto failed;
433962306a36Sopenharmony_ci	spin_lock_init(&sbinfo->shrinklist_lock);
434062306a36Sopenharmony_ci	INIT_LIST_HEAD(&sbinfo->shrinklist);
434162306a36Sopenharmony_ci
434262306a36Sopenharmony_ci	sb->s_maxbytes = MAX_LFS_FILESIZE;
434362306a36Sopenharmony_ci	sb->s_blocksize = PAGE_SIZE;
434462306a36Sopenharmony_ci	sb->s_blocksize_bits = PAGE_SHIFT;
434562306a36Sopenharmony_ci	sb->s_magic = TMPFS_MAGIC;
434662306a36Sopenharmony_ci	sb->s_op = &shmem_ops;
434762306a36Sopenharmony_ci	sb->s_time_gran = 1;
434862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
434962306a36Sopenharmony_ci	sb->s_xattr = shmem_xattr_handlers;
435062306a36Sopenharmony_ci#endif
435162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL
435262306a36Sopenharmony_ci	sb->s_flags |= SB_POSIXACL;
435362306a36Sopenharmony_ci#endif
435462306a36Sopenharmony_ci	uuid_gen(&sb->s_uuid);
435562306a36Sopenharmony_ci
435662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
435762306a36Sopenharmony_ci	if (ctx->seen & SHMEM_SEEN_QUOTA) {
435862306a36Sopenharmony_ci		sb->dq_op = &shmem_quota_operations;
435962306a36Sopenharmony_ci		sb->s_qcop = &dquot_quotactl_sysfile_ops;
436062306a36Sopenharmony_ci		sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
436162306a36Sopenharmony_ci
436262306a36Sopenharmony_ci		/* Copy the default limits from ctx into sbinfo */
436362306a36Sopenharmony_ci		memcpy(&sbinfo->qlimits, &ctx->qlimits,
436462306a36Sopenharmony_ci		       sizeof(struct shmem_quota_limits));
436562306a36Sopenharmony_ci
436662306a36Sopenharmony_ci		if (shmem_enable_quotas(sb, ctx->quota_types))
436762306a36Sopenharmony_ci			goto failed;
436862306a36Sopenharmony_ci	}
436962306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */
437062306a36Sopenharmony_ci
437162306a36Sopenharmony_ci	inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
437262306a36Sopenharmony_ci				VM_NORESERVE);
437362306a36Sopenharmony_ci	if (IS_ERR(inode)) {
437462306a36Sopenharmony_ci		error = PTR_ERR(inode);
437562306a36Sopenharmony_ci		goto failed;
437662306a36Sopenharmony_ci	}
437762306a36Sopenharmony_ci	inode->i_uid = sbinfo->uid;
437862306a36Sopenharmony_ci	inode->i_gid = sbinfo->gid;
437962306a36Sopenharmony_ci	sb->s_root = d_make_root(inode);
438062306a36Sopenharmony_ci	if (!sb->s_root)
438162306a36Sopenharmony_ci		goto failed;
438262306a36Sopenharmony_ci	return 0;
438362306a36Sopenharmony_ci
438462306a36Sopenharmony_cifailed:
438562306a36Sopenharmony_ci	shmem_put_super(sb);
438662306a36Sopenharmony_ci	return error;
438762306a36Sopenharmony_ci}
438862306a36Sopenharmony_ci
438962306a36Sopenharmony_cistatic int shmem_get_tree(struct fs_context *fc)
439062306a36Sopenharmony_ci{
439162306a36Sopenharmony_ci	return get_tree_nodev(fc, shmem_fill_super);
439262306a36Sopenharmony_ci}
439362306a36Sopenharmony_ci
439462306a36Sopenharmony_cistatic void shmem_free_fc(struct fs_context *fc)
439562306a36Sopenharmony_ci{
439662306a36Sopenharmony_ci	struct shmem_options *ctx = fc->fs_private;
439762306a36Sopenharmony_ci
439862306a36Sopenharmony_ci	if (ctx) {
439962306a36Sopenharmony_ci		mpol_put(ctx->mpol);
440062306a36Sopenharmony_ci		kfree(ctx);
440162306a36Sopenharmony_ci	}
440262306a36Sopenharmony_ci}
440362306a36Sopenharmony_ci
440462306a36Sopenharmony_cistatic const struct fs_context_operations shmem_fs_context_ops = {
440562306a36Sopenharmony_ci	.free			= shmem_free_fc,
440662306a36Sopenharmony_ci	.get_tree		= shmem_get_tree,
440762306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
440862306a36Sopenharmony_ci	.parse_monolithic	= shmem_parse_options,
440962306a36Sopenharmony_ci	.parse_param		= shmem_parse_one,
441062306a36Sopenharmony_ci	.reconfigure		= shmem_reconfigure,
441162306a36Sopenharmony_ci#endif
441262306a36Sopenharmony_ci};
441362306a36Sopenharmony_ci
441462306a36Sopenharmony_cistatic struct kmem_cache *shmem_inode_cachep;
441562306a36Sopenharmony_ci
441662306a36Sopenharmony_cistatic struct inode *shmem_alloc_inode(struct super_block *sb)
441762306a36Sopenharmony_ci{
441862306a36Sopenharmony_ci	struct shmem_inode_info *info;
441962306a36Sopenharmony_ci	info = alloc_inode_sb(sb, shmem_inode_cachep, GFP_KERNEL);
442062306a36Sopenharmony_ci	if (!info)
442162306a36Sopenharmony_ci		return NULL;
442262306a36Sopenharmony_ci	return &info->vfs_inode;
442362306a36Sopenharmony_ci}
442462306a36Sopenharmony_ci
442562306a36Sopenharmony_cistatic void shmem_free_in_core_inode(struct inode *inode)
442662306a36Sopenharmony_ci{
442762306a36Sopenharmony_ci	if (S_ISLNK(inode->i_mode))
442862306a36Sopenharmony_ci		kfree(inode->i_link);
442962306a36Sopenharmony_ci	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
443062306a36Sopenharmony_ci}
443162306a36Sopenharmony_ci
443262306a36Sopenharmony_cistatic void shmem_destroy_inode(struct inode *inode)
443362306a36Sopenharmony_ci{
443462306a36Sopenharmony_ci	if (S_ISREG(inode->i_mode))
443562306a36Sopenharmony_ci		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
443662306a36Sopenharmony_ci	if (S_ISDIR(inode->i_mode))
443762306a36Sopenharmony_ci		simple_offset_destroy(shmem_get_offset_ctx(inode));
443862306a36Sopenharmony_ci}
443962306a36Sopenharmony_ci
444062306a36Sopenharmony_cistatic void shmem_init_inode(void *foo)
444162306a36Sopenharmony_ci{
444262306a36Sopenharmony_ci	struct shmem_inode_info *info = foo;
444362306a36Sopenharmony_ci	inode_init_once(&info->vfs_inode);
444462306a36Sopenharmony_ci}
444562306a36Sopenharmony_ci
444662306a36Sopenharmony_cistatic void shmem_init_inodecache(void)
444762306a36Sopenharmony_ci{
444862306a36Sopenharmony_ci	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
444962306a36Sopenharmony_ci				sizeof(struct shmem_inode_info),
445062306a36Sopenharmony_ci				0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
445162306a36Sopenharmony_ci}
445262306a36Sopenharmony_ci
445362306a36Sopenharmony_cistatic void shmem_destroy_inodecache(void)
445462306a36Sopenharmony_ci{
445562306a36Sopenharmony_ci	kmem_cache_destroy(shmem_inode_cachep);
445662306a36Sopenharmony_ci}
445762306a36Sopenharmony_ci
445862306a36Sopenharmony_ci/* Keep the page in page cache instead of truncating it */
445962306a36Sopenharmony_cistatic int shmem_error_remove_page(struct address_space *mapping,
446062306a36Sopenharmony_ci				   struct page *page)
446162306a36Sopenharmony_ci{
446262306a36Sopenharmony_ci	return 0;
446362306a36Sopenharmony_ci}
446462306a36Sopenharmony_ci
446562306a36Sopenharmony_ciconst struct address_space_operations shmem_aops = {
446662306a36Sopenharmony_ci	.writepage	= shmem_writepage,
446762306a36Sopenharmony_ci	.dirty_folio	= noop_dirty_folio,
446862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
446962306a36Sopenharmony_ci	.write_begin	= shmem_write_begin,
447062306a36Sopenharmony_ci	.write_end	= shmem_write_end,
447162306a36Sopenharmony_ci#endif
447262306a36Sopenharmony_ci#ifdef CONFIG_MIGRATION
447362306a36Sopenharmony_ci	.migrate_folio	= migrate_folio,
447462306a36Sopenharmony_ci#endif
447562306a36Sopenharmony_ci	.error_remove_page = shmem_error_remove_page,
447662306a36Sopenharmony_ci};
447762306a36Sopenharmony_ciEXPORT_SYMBOL(shmem_aops);
447862306a36Sopenharmony_ci
447962306a36Sopenharmony_cistatic const struct file_operations shmem_file_operations = {
448062306a36Sopenharmony_ci	.mmap		= shmem_mmap,
448162306a36Sopenharmony_ci	.open		= shmem_file_open,
448262306a36Sopenharmony_ci	.get_unmapped_area = shmem_get_unmapped_area,
448362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
448462306a36Sopenharmony_ci	.llseek		= shmem_file_llseek,
448562306a36Sopenharmony_ci	.read_iter	= shmem_file_read_iter,
448662306a36Sopenharmony_ci	.write_iter	= shmem_file_write_iter,
448762306a36Sopenharmony_ci	.fsync		= noop_fsync,
448862306a36Sopenharmony_ci	.splice_read	= shmem_file_splice_read,
448962306a36Sopenharmony_ci	.splice_write	= iter_file_splice_write,
449062306a36Sopenharmony_ci	.fallocate	= shmem_fallocate,
449162306a36Sopenharmony_ci#endif
449262306a36Sopenharmony_ci};
449362306a36Sopenharmony_ci
449462306a36Sopenharmony_cistatic const struct inode_operations shmem_inode_operations = {
449562306a36Sopenharmony_ci	.getattr	= shmem_getattr,
449662306a36Sopenharmony_ci	.setattr	= shmem_setattr,
449762306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
449862306a36Sopenharmony_ci	.listxattr	= shmem_listxattr,
449962306a36Sopenharmony_ci	.set_acl	= simple_set_acl,
450062306a36Sopenharmony_ci	.fileattr_get	= shmem_fileattr_get,
450162306a36Sopenharmony_ci	.fileattr_set	= shmem_fileattr_set,
450262306a36Sopenharmony_ci#endif
450362306a36Sopenharmony_ci};
450462306a36Sopenharmony_ci
450562306a36Sopenharmony_cistatic const struct inode_operations shmem_dir_inode_operations = {
450662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
450762306a36Sopenharmony_ci	.getattr	= shmem_getattr,
450862306a36Sopenharmony_ci	.create		= shmem_create,
450962306a36Sopenharmony_ci	.lookup		= simple_lookup,
451062306a36Sopenharmony_ci	.link		= shmem_link,
451162306a36Sopenharmony_ci	.unlink		= shmem_unlink,
451262306a36Sopenharmony_ci	.symlink	= shmem_symlink,
451362306a36Sopenharmony_ci	.mkdir		= shmem_mkdir,
451462306a36Sopenharmony_ci	.rmdir		= shmem_rmdir,
451562306a36Sopenharmony_ci	.mknod		= shmem_mknod,
451662306a36Sopenharmony_ci	.rename		= shmem_rename2,
451762306a36Sopenharmony_ci	.tmpfile	= shmem_tmpfile,
451862306a36Sopenharmony_ci	.get_offset_ctx	= shmem_get_offset_ctx,
451962306a36Sopenharmony_ci#endif
452062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
452162306a36Sopenharmony_ci	.listxattr	= shmem_listxattr,
452262306a36Sopenharmony_ci	.fileattr_get	= shmem_fileattr_get,
452362306a36Sopenharmony_ci	.fileattr_set	= shmem_fileattr_set,
452462306a36Sopenharmony_ci#endif
452562306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL
452662306a36Sopenharmony_ci	.setattr	= shmem_setattr,
452762306a36Sopenharmony_ci	.set_acl	= simple_set_acl,
452862306a36Sopenharmony_ci#endif
452962306a36Sopenharmony_ci};
453062306a36Sopenharmony_ci
453162306a36Sopenharmony_cistatic const struct inode_operations shmem_special_inode_operations = {
453262306a36Sopenharmony_ci	.getattr	= shmem_getattr,
453362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR
453462306a36Sopenharmony_ci	.listxattr	= shmem_listxattr,
453562306a36Sopenharmony_ci#endif
453662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL
453762306a36Sopenharmony_ci	.setattr	= shmem_setattr,
453862306a36Sopenharmony_ci	.set_acl	= simple_set_acl,
453962306a36Sopenharmony_ci#endif
454062306a36Sopenharmony_ci};
454162306a36Sopenharmony_ci
454262306a36Sopenharmony_cistatic const struct super_operations shmem_ops = {
454362306a36Sopenharmony_ci	.alloc_inode	= shmem_alloc_inode,
454462306a36Sopenharmony_ci	.free_inode	= shmem_free_in_core_inode,
454562306a36Sopenharmony_ci	.destroy_inode	= shmem_destroy_inode,
454662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
454762306a36Sopenharmony_ci	.statfs		= shmem_statfs,
454862306a36Sopenharmony_ci	.show_options	= shmem_show_options,
454962306a36Sopenharmony_ci#endif
455062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
455162306a36Sopenharmony_ci	.get_dquots	= shmem_get_dquots,
455262306a36Sopenharmony_ci#endif
455362306a36Sopenharmony_ci	.evict_inode	= shmem_evict_inode,
455462306a36Sopenharmony_ci	.drop_inode	= generic_delete_inode,
455562306a36Sopenharmony_ci	.put_super	= shmem_put_super,
455662306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
455762306a36Sopenharmony_ci	.nr_cached_objects	= shmem_unused_huge_count,
455862306a36Sopenharmony_ci	.free_cached_objects	= shmem_unused_huge_scan,
455962306a36Sopenharmony_ci#endif
456062306a36Sopenharmony_ci};
456162306a36Sopenharmony_ci
456262306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_vm_ops = {
456362306a36Sopenharmony_ci	.fault		= shmem_fault,
456462306a36Sopenharmony_ci	.map_pages	= filemap_map_pages,
456562306a36Sopenharmony_ci#ifdef CONFIG_NUMA
456662306a36Sopenharmony_ci	.set_policy     = shmem_set_policy,
456762306a36Sopenharmony_ci	.get_policy     = shmem_get_policy,
456862306a36Sopenharmony_ci#endif
456962306a36Sopenharmony_ci};
457062306a36Sopenharmony_ci
457162306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_anon_vm_ops = {
457262306a36Sopenharmony_ci	.fault		= shmem_fault,
457362306a36Sopenharmony_ci	.map_pages	= filemap_map_pages,
457462306a36Sopenharmony_ci#ifdef CONFIG_NUMA
457562306a36Sopenharmony_ci	.set_policy     = shmem_set_policy,
457662306a36Sopenharmony_ci	.get_policy     = shmem_get_policy,
457762306a36Sopenharmony_ci#endif
457862306a36Sopenharmony_ci};
457962306a36Sopenharmony_ci
458062306a36Sopenharmony_ciint shmem_init_fs_context(struct fs_context *fc)
458162306a36Sopenharmony_ci{
458262306a36Sopenharmony_ci	struct shmem_options *ctx;
458362306a36Sopenharmony_ci
458462306a36Sopenharmony_ci	ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL);
458562306a36Sopenharmony_ci	if (!ctx)
458662306a36Sopenharmony_ci		return -ENOMEM;
458762306a36Sopenharmony_ci
458862306a36Sopenharmony_ci	ctx->mode = 0777 | S_ISVTX;
458962306a36Sopenharmony_ci	ctx->uid = current_fsuid();
459062306a36Sopenharmony_ci	ctx->gid = current_fsgid();
459162306a36Sopenharmony_ci
459262306a36Sopenharmony_ci	fc->fs_private = ctx;
459362306a36Sopenharmony_ci	fc->ops = &shmem_fs_context_ops;
459462306a36Sopenharmony_ci	return 0;
459562306a36Sopenharmony_ci}
459662306a36Sopenharmony_ci
459762306a36Sopenharmony_cistatic struct file_system_type shmem_fs_type = {
459862306a36Sopenharmony_ci	.owner		= THIS_MODULE,
459962306a36Sopenharmony_ci	.name		= "tmpfs",
460062306a36Sopenharmony_ci	.init_fs_context = shmem_init_fs_context,
460162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS
460262306a36Sopenharmony_ci	.parameters	= shmem_fs_parameters,
460362306a36Sopenharmony_ci#endif
460462306a36Sopenharmony_ci	.kill_sb	= kill_litter_super,
460562306a36Sopenharmony_ci#ifdef CONFIG_SHMEM
460662306a36Sopenharmony_ci	.fs_flags	= FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
460762306a36Sopenharmony_ci#else
460862306a36Sopenharmony_ci	.fs_flags	= FS_USERNS_MOUNT,
460962306a36Sopenharmony_ci#endif
461062306a36Sopenharmony_ci};
461162306a36Sopenharmony_ci
461262306a36Sopenharmony_civoid __init shmem_init(void)
461362306a36Sopenharmony_ci{
461462306a36Sopenharmony_ci	int error;
461562306a36Sopenharmony_ci
461662306a36Sopenharmony_ci	shmem_init_inodecache();
461762306a36Sopenharmony_ci
461862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
461962306a36Sopenharmony_ci	error = register_quota_format(&shmem_quota_format);
462062306a36Sopenharmony_ci	if (error < 0) {
462162306a36Sopenharmony_ci		pr_err("Could not register quota format\n");
462262306a36Sopenharmony_ci		goto out3;
462362306a36Sopenharmony_ci	}
462462306a36Sopenharmony_ci#endif
462562306a36Sopenharmony_ci
462662306a36Sopenharmony_ci	error = register_filesystem(&shmem_fs_type);
462762306a36Sopenharmony_ci	if (error) {
462862306a36Sopenharmony_ci		pr_err("Could not register tmpfs\n");
462962306a36Sopenharmony_ci		goto out2;
463062306a36Sopenharmony_ci	}
463162306a36Sopenharmony_ci
463262306a36Sopenharmony_ci	shm_mnt = kern_mount(&shmem_fs_type);
463362306a36Sopenharmony_ci	if (IS_ERR(shm_mnt)) {
463462306a36Sopenharmony_ci		error = PTR_ERR(shm_mnt);
463562306a36Sopenharmony_ci		pr_err("Could not kern_mount tmpfs\n");
463662306a36Sopenharmony_ci		goto out1;
463762306a36Sopenharmony_ci	}
463862306a36Sopenharmony_ci
463962306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
464062306a36Sopenharmony_ci	if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
464162306a36Sopenharmony_ci		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
464262306a36Sopenharmony_ci	else
464362306a36Sopenharmony_ci		shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
464462306a36Sopenharmony_ci#endif
464562306a36Sopenharmony_ci	return;
464662306a36Sopenharmony_ci
464762306a36Sopenharmony_ciout1:
464862306a36Sopenharmony_ci	unregister_filesystem(&shmem_fs_type);
464962306a36Sopenharmony_ciout2:
465062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA
465162306a36Sopenharmony_ci	unregister_quota_format(&shmem_quota_format);
465262306a36Sopenharmony_ciout3:
465362306a36Sopenharmony_ci#endif
465462306a36Sopenharmony_ci	shmem_destroy_inodecache();
465562306a36Sopenharmony_ci	shm_mnt = ERR_PTR(error);
465662306a36Sopenharmony_ci}
465762306a36Sopenharmony_ci
465862306a36Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
465962306a36Sopenharmony_cistatic ssize_t shmem_enabled_show(struct kobject *kobj,
466062306a36Sopenharmony_ci				  struct kobj_attribute *attr, char *buf)
466162306a36Sopenharmony_ci{
466262306a36Sopenharmony_ci	static const int values[] = {
466362306a36Sopenharmony_ci		SHMEM_HUGE_ALWAYS,
466462306a36Sopenharmony_ci		SHMEM_HUGE_WITHIN_SIZE,
466562306a36Sopenharmony_ci		SHMEM_HUGE_ADVISE,
466662306a36Sopenharmony_ci		SHMEM_HUGE_NEVER,
466762306a36Sopenharmony_ci		SHMEM_HUGE_DENY,
466862306a36Sopenharmony_ci		SHMEM_HUGE_FORCE,
466962306a36Sopenharmony_ci	};
467062306a36Sopenharmony_ci	int len = 0;
467162306a36Sopenharmony_ci	int i;
467262306a36Sopenharmony_ci
467362306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(values); i++) {
467462306a36Sopenharmony_ci		len += sysfs_emit_at(buf, len,
467562306a36Sopenharmony_ci				     shmem_huge == values[i] ? "%s[%s]" : "%s%s",
467662306a36Sopenharmony_ci				     i ? " " : "",
467762306a36Sopenharmony_ci				     shmem_format_huge(values[i]));
467862306a36Sopenharmony_ci	}
467962306a36Sopenharmony_ci
468062306a36Sopenharmony_ci	len += sysfs_emit_at(buf, len, "\n");
468162306a36Sopenharmony_ci
468262306a36Sopenharmony_ci	return len;
468362306a36Sopenharmony_ci}
468462306a36Sopenharmony_ci
468562306a36Sopenharmony_cistatic ssize_t shmem_enabled_store(struct kobject *kobj,
468662306a36Sopenharmony_ci		struct kobj_attribute *attr, const char *buf, size_t count)
468762306a36Sopenharmony_ci{
468862306a36Sopenharmony_ci	char tmp[16];
468962306a36Sopenharmony_ci	int huge;
469062306a36Sopenharmony_ci
469162306a36Sopenharmony_ci	if (count + 1 > sizeof(tmp))
469262306a36Sopenharmony_ci		return -EINVAL;
469362306a36Sopenharmony_ci	memcpy(tmp, buf, count);
469462306a36Sopenharmony_ci	tmp[count] = '\0';
469562306a36Sopenharmony_ci	if (count && tmp[count - 1] == '\n')
469662306a36Sopenharmony_ci		tmp[count - 1] = '\0';
469762306a36Sopenharmony_ci
469862306a36Sopenharmony_ci	huge = shmem_parse_huge(tmp);
469962306a36Sopenharmony_ci	if (huge == -EINVAL)
470062306a36Sopenharmony_ci		return -EINVAL;
470162306a36Sopenharmony_ci	if (!has_transparent_hugepage() &&
470262306a36Sopenharmony_ci			huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
470362306a36Sopenharmony_ci		return -EINVAL;
470462306a36Sopenharmony_ci
470562306a36Sopenharmony_ci	shmem_huge = huge;
470662306a36Sopenharmony_ci	if (shmem_huge > SHMEM_HUGE_DENY)
470762306a36Sopenharmony_ci		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
470862306a36Sopenharmony_ci	return count;
470962306a36Sopenharmony_ci}
471062306a36Sopenharmony_ci
471162306a36Sopenharmony_cistruct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
471262306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
471362306a36Sopenharmony_ci
471462306a36Sopenharmony_ci#else /* !CONFIG_SHMEM */
471562306a36Sopenharmony_ci
471662306a36Sopenharmony_ci/*
471762306a36Sopenharmony_ci * tiny-shmem: simple shmemfs and tmpfs using ramfs code
471862306a36Sopenharmony_ci *
471962306a36Sopenharmony_ci * This is intended for small system where the benefits of the full
472062306a36Sopenharmony_ci * shmem code (swap-backed and resource-limited) are outweighed by
472162306a36Sopenharmony_ci * their complexity. On systems without swap this code should be
472262306a36Sopenharmony_ci * effectively equivalent, but much lighter weight.
472362306a36Sopenharmony_ci */
472462306a36Sopenharmony_ci
472562306a36Sopenharmony_cistatic struct file_system_type shmem_fs_type = {
472662306a36Sopenharmony_ci	.name		= "tmpfs",
472762306a36Sopenharmony_ci	.init_fs_context = ramfs_init_fs_context,
472862306a36Sopenharmony_ci	.parameters	= ramfs_fs_parameters,
472962306a36Sopenharmony_ci	.kill_sb	= ramfs_kill_sb,
473062306a36Sopenharmony_ci	.fs_flags	= FS_USERNS_MOUNT,
473162306a36Sopenharmony_ci};
473262306a36Sopenharmony_ci
473362306a36Sopenharmony_civoid __init shmem_init(void)
473462306a36Sopenharmony_ci{
473562306a36Sopenharmony_ci	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
473662306a36Sopenharmony_ci
473762306a36Sopenharmony_ci	shm_mnt = kern_mount(&shmem_fs_type);
473862306a36Sopenharmony_ci	BUG_ON(IS_ERR(shm_mnt));
473962306a36Sopenharmony_ci}
474062306a36Sopenharmony_ci
474162306a36Sopenharmony_ciint shmem_unuse(unsigned int type)
474262306a36Sopenharmony_ci{
474362306a36Sopenharmony_ci	return 0;
474462306a36Sopenharmony_ci}
474562306a36Sopenharmony_ci
474662306a36Sopenharmony_ciint shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
474762306a36Sopenharmony_ci{
474862306a36Sopenharmony_ci	return 0;
474962306a36Sopenharmony_ci}
475062306a36Sopenharmony_ci
475162306a36Sopenharmony_civoid shmem_unlock_mapping(struct address_space *mapping)
475262306a36Sopenharmony_ci{
475362306a36Sopenharmony_ci}
475462306a36Sopenharmony_ci
475562306a36Sopenharmony_ci#ifdef CONFIG_MMU
475662306a36Sopenharmony_ciunsigned long shmem_get_unmapped_area(struct file *file,
475762306a36Sopenharmony_ci				      unsigned long addr, unsigned long len,
475862306a36Sopenharmony_ci				      unsigned long pgoff, unsigned long flags)
475962306a36Sopenharmony_ci{
476062306a36Sopenharmony_ci	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
476162306a36Sopenharmony_ci}
476262306a36Sopenharmony_ci#endif
476362306a36Sopenharmony_ci
476462306a36Sopenharmony_civoid shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
476562306a36Sopenharmony_ci{
476662306a36Sopenharmony_ci	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
476762306a36Sopenharmony_ci}
476862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_truncate_range);
476962306a36Sopenharmony_ci
477062306a36Sopenharmony_ci#define shmem_vm_ops				generic_file_vm_ops
477162306a36Sopenharmony_ci#define shmem_anon_vm_ops			generic_file_vm_ops
477262306a36Sopenharmony_ci#define shmem_file_operations			ramfs_file_operations
477362306a36Sopenharmony_ci#define shmem_acct_size(flags, size)		0
477462306a36Sopenharmony_ci#define shmem_unacct_size(flags, size)		do {} while (0)
477562306a36Sopenharmony_ci
477662306a36Sopenharmony_cistatic inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir,
477762306a36Sopenharmony_ci					    umode_t mode, dev_t dev, unsigned long flags)
477862306a36Sopenharmony_ci{
477962306a36Sopenharmony_ci	struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
478062306a36Sopenharmony_ci	return inode ? inode : ERR_PTR(-ENOSPC);
478162306a36Sopenharmony_ci}
478262306a36Sopenharmony_ci
478362306a36Sopenharmony_ci#endif /* CONFIG_SHMEM */
478462306a36Sopenharmony_ci
478562306a36Sopenharmony_ci/* common code */
478662306a36Sopenharmony_ci
478762306a36Sopenharmony_cistatic struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
478862306a36Sopenharmony_ci				       unsigned long flags, unsigned int i_flags)
478962306a36Sopenharmony_ci{
479062306a36Sopenharmony_ci	struct inode *inode;
479162306a36Sopenharmony_ci	struct file *res;
479262306a36Sopenharmony_ci
479362306a36Sopenharmony_ci	if (IS_ERR(mnt))
479462306a36Sopenharmony_ci		return ERR_CAST(mnt);
479562306a36Sopenharmony_ci
479662306a36Sopenharmony_ci	if (size < 0 || size > MAX_LFS_FILESIZE)
479762306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
479862306a36Sopenharmony_ci
479962306a36Sopenharmony_ci	if (shmem_acct_size(flags, size))
480062306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
480162306a36Sopenharmony_ci
480262306a36Sopenharmony_ci	if (is_idmapped_mnt(mnt))
480362306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
480462306a36Sopenharmony_ci
480562306a36Sopenharmony_ci	inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
480662306a36Sopenharmony_ci				S_IFREG | S_IRWXUGO, 0, flags);
480762306a36Sopenharmony_ci
480862306a36Sopenharmony_ci	if (IS_ERR(inode)) {
480962306a36Sopenharmony_ci		shmem_unacct_size(flags, size);
481062306a36Sopenharmony_ci		return ERR_CAST(inode);
481162306a36Sopenharmony_ci	}
481262306a36Sopenharmony_ci	inode->i_flags |= i_flags;
481362306a36Sopenharmony_ci	inode->i_size = size;
481462306a36Sopenharmony_ci	clear_nlink(inode);	/* It is unlinked */
481562306a36Sopenharmony_ci	res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
481662306a36Sopenharmony_ci	if (!IS_ERR(res))
481762306a36Sopenharmony_ci		res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
481862306a36Sopenharmony_ci				&shmem_file_operations);
481962306a36Sopenharmony_ci	if (IS_ERR(res))
482062306a36Sopenharmony_ci		iput(inode);
482162306a36Sopenharmony_ci	return res;
482262306a36Sopenharmony_ci}
482362306a36Sopenharmony_ci
482462306a36Sopenharmony_ci/**
482562306a36Sopenharmony_ci * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
482662306a36Sopenharmony_ci * 	kernel internal.  There will be NO LSM permission checks against the
482762306a36Sopenharmony_ci * 	underlying inode.  So users of this interface must do LSM checks at a
482862306a36Sopenharmony_ci *	higher layer.  The users are the big_key and shm implementations.  LSM
482962306a36Sopenharmony_ci *	checks are provided at the key or shm level rather than the inode.
483062306a36Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps
483162306a36Sopenharmony_ci * @size: size to be set for the file
483262306a36Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
483362306a36Sopenharmony_ci */
483462306a36Sopenharmony_cistruct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
483562306a36Sopenharmony_ci{
483662306a36Sopenharmony_ci	return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
483762306a36Sopenharmony_ci}
483862306a36Sopenharmony_ci
483962306a36Sopenharmony_ci/**
484062306a36Sopenharmony_ci * shmem_file_setup - get an unlinked file living in tmpfs
484162306a36Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps
484262306a36Sopenharmony_ci * @size: size to be set for the file
484362306a36Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
484462306a36Sopenharmony_ci */
484562306a36Sopenharmony_cistruct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
484662306a36Sopenharmony_ci{
484762306a36Sopenharmony_ci	return __shmem_file_setup(shm_mnt, name, size, flags, 0);
484862306a36Sopenharmony_ci}
484962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_file_setup);
485062306a36Sopenharmony_ci
485162306a36Sopenharmony_ci/**
485262306a36Sopenharmony_ci * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs
485362306a36Sopenharmony_ci * @mnt: the tmpfs mount where the file will be created
485462306a36Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps
485562306a36Sopenharmony_ci * @size: size to be set for the file
485662306a36Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
485762306a36Sopenharmony_ci */
485862306a36Sopenharmony_cistruct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
485962306a36Sopenharmony_ci				       loff_t size, unsigned long flags)
486062306a36Sopenharmony_ci{
486162306a36Sopenharmony_ci	return __shmem_file_setup(mnt, name, size, flags, 0);
486262306a36Sopenharmony_ci}
486362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
486462306a36Sopenharmony_ci
486562306a36Sopenharmony_ci/**
486662306a36Sopenharmony_ci * shmem_zero_setup - setup a shared anonymous mapping
486762306a36Sopenharmony_ci * @vma: the vma to be mmapped is prepared by do_mmap
486862306a36Sopenharmony_ci */
486962306a36Sopenharmony_ciint shmem_zero_setup(struct vm_area_struct *vma)
487062306a36Sopenharmony_ci{
487162306a36Sopenharmony_ci	struct file *file;
487262306a36Sopenharmony_ci	loff_t size = vma->vm_end - vma->vm_start;
487362306a36Sopenharmony_ci
487462306a36Sopenharmony_ci	/*
487562306a36Sopenharmony_ci	 * Cloning a new file under mmap_lock leads to a lock ordering conflict
487662306a36Sopenharmony_ci	 * between XFS directory reading and selinux: since this file is only
487762306a36Sopenharmony_ci	 * accessible to the user through its mapping, use S_PRIVATE flag to
487862306a36Sopenharmony_ci	 * bypass file security, in the same way as shmem_kernel_file_setup().
487962306a36Sopenharmony_ci	 */
488062306a36Sopenharmony_ci	file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
488162306a36Sopenharmony_ci	if (IS_ERR(file))
488262306a36Sopenharmony_ci		return PTR_ERR(file);
488362306a36Sopenharmony_ci
488462306a36Sopenharmony_ci	if (vma->vm_file)
488562306a36Sopenharmony_ci		fput(vma->vm_file);
488662306a36Sopenharmony_ci	vma->vm_file = file;
488762306a36Sopenharmony_ci	vma->vm_ops = &shmem_anon_vm_ops;
488862306a36Sopenharmony_ci
488962306a36Sopenharmony_ci	return 0;
489062306a36Sopenharmony_ci}
489162306a36Sopenharmony_ci
489262306a36Sopenharmony_ci/**
489362306a36Sopenharmony_ci * shmem_read_folio_gfp - read into page cache, using specified page allocation flags.
489462306a36Sopenharmony_ci * @mapping:	the folio's address_space
489562306a36Sopenharmony_ci * @index:	the folio index
489662306a36Sopenharmony_ci * @gfp:	the page allocator flags to use if allocating
489762306a36Sopenharmony_ci *
489862306a36Sopenharmony_ci * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
489962306a36Sopenharmony_ci * with any new page allocations done using the specified allocation flags.
490062306a36Sopenharmony_ci * But read_cache_page_gfp() uses the ->read_folio() method: which does not
490162306a36Sopenharmony_ci * suit tmpfs, since it may have pages in swapcache, and needs to find those
490262306a36Sopenharmony_ci * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
490362306a36Sopenharmony_ci *
490462306a36Sopenharmony_ci * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
490562306a36Sopenharmony_ci * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
490662306a36Sopenharmony_ci */
490762306a36Sopenharmony_cistruct folio *shmem_read_folio_gfp(struct address_space *mapping,
490862306a36Sopenharmony_ci		pgoff_t index, gfp_t gfp)
490962306a36Sopenharmony_ci{
491062306a36Sopenharmony_ci#ifdef CONFIG_SHMEM
491162306a36Sopenharmony_ci	struct inode *inode = mapping->host;
491262306a36Sopenharmony_ci	struct folio *folio;
491362306a36Sopenharmony_ci	int error;
491462306a36Sopenharmony_ci
491562306a36Sopenharmony_ci	BUG_ON(!shmem_mapping(mapping));
491662306a36Sopenharmony_ci	error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
491762306a36Sopenharmony_ci				  gfp, NULL, NULL, NULL);
491862306a36Sopenharmony_ci	if (error)
491962306a36Sopenharmony_ci		return ERR_PTR(error);
492062306a36Sopenharmony_ci
492162306a36Sopenharmony_ci	folio_unlock(folio);
492262306a36Sopenharmony_ci	return folio;
492362306a36Sopenharmony_ci#else
492462306a36Sopenharmony_ci	/*
492562306a36Sopenharmony_ci	 * The tiny !SHMEM case uses ramfs without swap
492662306a36Sopenharmony_ci	 */
492762306a36Sopenharmony_ci	return mapping_read_folio_gfp(mapping, index, gfp);
492862306a36Sopenharmony_ci#endif
492962306a36Sopenharmony_ci}
493062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_read_folio_gfp);
493162306a36Sopenharmony_ci
493262306a36Sopenharmony_cistruct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
493362306a36Sopenharmony_ci					 pgoff_t index, gfp_t gfp)
493462306a36Sopenharmony_ci{
493562306a36Sopenharmony_ci	struct folio *folio = shmem_read_folio_gfp(mapping, index, gfp);
493662306a36Sopenharmony_ci	struct page *page;
493762306a36Sopenharmony_ci
493862306a36Sopenharmony_ci	if (IS_ERR(folio))
493962306a36Sopenharmony_ci		return &folio->page;
494062306a36Sopenharmony_ci
494162306a36Sopenharmony_ci	page = folio_file_page(folio, index);
494262306a36Sopenharmony_ci	if (PageHWPoison(page)) {
494362306a36Sopenharmony_ci		folio_put(folio);
494462306a36Sopenharmony_ci		return ERR_PTR(-EIO);
494562306a36Sopenharmony_ci	}
494662306a36Sopenharmony_ci
494762306a36Sopenharmony_ci	return page;
494862306a36Sopenharmony_ci}
494962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
4950