162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * (C) 1997 Linus Torvalds
462306a36Sopenharmony_ci * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include <linux/export.h>
762306a36Sopenharmony_ci#include <linux/fs.h>
862306a36Sopenharmony_ci#include <linux/filelock.h>
962306a36Sopenharmony_ci#include <linux/mm.h>
1062306a36Sopenharmony_ci#include <linux/backing-dev.h>
1162306a36Sopenharmony_ci#include <linux/hash.h>
1262306a36Sopenharmony_ci#include <linux/swap.h>
1362306a36Sopenharmony_ci#include <linux/security.h>
1462306a36Sopenharmony_ci#include <linux/cdev.h>
1562306a36Sopenharmony_ci#include <linux/memblock.h>
1662306a36Sopenharmony_ci#include <linux/fsnotify.h>
1762306a36Sopenharmony_ci#include <linux/mount.h>
1862306a36Sopenharmony_ci#include <linux/posix_acl.h>
1962306a36Sopenharmony_ci#include <linux/buffer_head.h> /* for inode_has_buffers */
2062306a36Sopenharmony_ci#include <linux/ratelimit.h>
2162306a36Sopenharmony_ci#include <linux/list_lru.h>
2262306a36Sopenharmony_ci#include <linux/iversion.h>
2362306a36Sopenharmony_ci#include <trace/events/writeback.h>
2462306a36Sopenharmony_ci#include "internal.h"
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/*
2762306a36Sopenharmony_ci * Inode locking rules:
2862306a36Sopenharmony_ci *
2962306a36Sopenharmony_ci * inode->i_lock protects:
3062306a36Sopenharmony_ci *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
3162306a36Sopenharmony_ci * Inode LRU list locks protect:
3262306a36Sopenharmony_ci *   inode->i_sb->s_inode_lru, inode->i_lru
3362306a36Sopenharmony_ci * inode->i_sb->s_inode_list_lock protects:
3462306a36Sopenharmony_ci *   inode->i_sb->s_inodes, inode->i_sb_list
3562306a36Sopenharmony_ci * bdi->wb.list_lock protects:
3662306a36Sopenharmony_ci *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
3762306a36Sopenharmony_ci * inode_hash_lock protects:
3862306a36Sopenharmony_ci *   inode_hashtable, inode->i_hash
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * Lock ordering:
4162306a36Sopenharmony_ci *
4262306a36Sopenharmony_ci * inode->i_sb->s_inode_list_lock
4362306a36Sopenharmony_ci *   inode->i_lock
4462306a36Sopenharmony_ci *     Inode LRU list locks
4562306a36Sopenharmony_ci *
4662306a36Sopenharmony_ci * bdi->wb.list_lock
4762306a36Sopenharmony_ci *   inode->i_lock
4862306a36Sopenharmony_ci *
4962306a36Sopenharmony_ci * inode_hash_lock
5062306a36Sopenharmony_ci *   inode->i_sb->s_inode_list_lock
5162306a36Sopenharmony_ci *   inode->i_lock
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci * iunique_lock
5462306a36Sopenharmony_ci *   inode_hash_lock
5562306a36Sopenharmony_ci */
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic unsigned int i_hash_mask __read_mostly;
5862306a36Sopenharmony_cistatic unsigned int i_hash_shift __read_mostly;
5962306a36Sopenharmony_cistatic struct hlist_head *inode_hashtable __read_mostly;
6062306a36Sopenharmony_cistatic __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/*
6362306a36Sopenharmony_ci * Empty aops. Can be used for the cases where the user does not
6462306a36Sopenharmony_ci * define any of the address_space operations.
6562306a36Sopenharmony_ci */
6662306a36Sopenharmony_ciconst struct address_space_operations empty_aops = {
6762306a36Sopenharmony_ci};
6862306a36Sopenharmony_ciEXPORT_SYMBOL(empty_aops);
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, nr_inodes);
7162306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, nr_unused);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic struct kmem_cache *inode_cachep __read_mostly;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cistatic long get_nr_inodes(void)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	int i;
7862306a36Sopenharmony_ci	long sum = 0;
7962306a36Sopenharmony_ci	for_each_possible_cpu(i)
8062306a36Sopenharmony_ci		sum += per_cpu(nr_inodes, i);
8162306a36Sopenharmony_ci	return sum < 0 ? 0 : sum;
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic inline long get_nr_inodes_unused(void)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	int i;
8762306a36Sopenharmony_ci	long sum = 0;
8862306a36Sopenharmony_ci	for_each_possible_cpu(i)
8962306a36Sopenharmony_ci		sum += per_cpu(nr_unused, i);
9062306a36Sopenharmony_ci	return sum < 0 ? 0 : sum;
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cilong get_nr_dirty_inodes(void)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	/* not actually dirty inodes, but a wild approximation */
9662306a36Sopenharmony_ci	long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
9762306a36Sopenharmony_ci	return nr_dirty > 0 ? nr_dirty : 0;
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci/*
10162306a36Sopenharmony_ci * Handle nr_inode sysctl
10262306a36Sopenharmony_ci */
10362306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
10462306a36Sopenharmony_ci/*
10562306a36Sopenharmony_ci * Statistics gathering..
10662306a36Sopenharmony_ci */
10762306a36Sopenharmony_cistatic struct inodes_stat_t inodes_stat;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistatic int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
11062306a36Sopenharmony_ci			  size_t *lenp, loff_t *ppos)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	inodes_stat.nr_inodes = get_nr_inodes();
11362306a36Sopenharmony_ci	inodes_stat.nr_unused = get_nr_inodes_unused();
11462306a36Sopenharmony_ci	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic struct ctl_table inodes_sysctls[] = {
11862306a36Sopenharmony_ci	{
11962306a36Sopenharmony_ci		.procname	= "inode-nr",
12062306a36Sopenharmony_ci		.data		= &inodes_stat,
12162306a36Sopenharmony_ci		.maxlen		= 2*sizeof(long),
12262306a36Sopenharmony_ci		.mode		= 0444,
12362306a36Sopenharmony_ci		.proc_handler	= proc_nr_inodes,
12462306a36Sopenharmony_ci	},
12562306a36Sopenharmony_ci	{
12662306a36Sopenharmony_ci		.procname	= "inode-state",
12762306a36Sopenharmony_ci		.data		= &inodes_stat,
12862306a36Sopenharmony_ci		.maxlen		= 7*sizeof(long),
12962306a36Sopenharmony_ci		.mode		= 0444,
13062306a36Sopenharmony_ci		.proc_handler	= proc_nr_inodes,
13162306a36Sopenharmony_ci	},
13262306a36Sopenharmony_ci	{ }
13362306a36Sopenharmony_ci};
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_cistatic int __init init_fs_inode_sysctls(void)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	register_sysctl_init("fs", inodes_sysctls);
13862306a36Sopenharmony_ci	return 0;
13962306a36Sopenharmony_ci}
14062306a36Sopenharmony_ciearly_initcall(init_fs_inode_sysctls);
14162306a36Sopenharmony_ci#endif
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_cistatic int no_open(struct inode *inode, struct file *file)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	return -ENXIO;
14662306a36Sopenharmony_ci}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci/**
14962306a36Sopenharmony_ci * inode_init_always - perform inode structure initialisation
15062306a36Sopenharmony_ci * @sb: superblock inode belongs to
15162306a36Sopenharmony_ci * @inode: inode to initialise
15262306a36Sopenharmony_ci *
15362306a36Sopenharmony_ci * These are initializations that need to be done on every inode
15462306a36Sopenharmony_ci * allocation as the fields are not initialised by slab allocation.
15562306a36Sopenharmony_ci */
15662306a36Sopenharmony_ciint inode_init_always(struct super_block *sb, struct inode *inode)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	static const struct inode_operations empty_iops;
15962306a36Sopenharmony_ci	static const struct file_operations no_open_fops = {.open = no_open};
16062306a36Sopenharmony_ci	struct address_space *const mapping = &inode->i_data;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	inode->i_sb = sb;
16362306a36Sopenharmony_ci	inode->i_blkbits = sb->s_blocksize_bits;
16462306a36Sopenharmony_ci	inode->i_flags = 0;
16562306a36Sopenharmony_ci	atomic64_set(&inode->i_sequence, 0);
16662306a36Sopenharmony_ci	atomic_set(&inode->i_count, 1);
16762306a36Sopenharmony_ci	inode->i_op = &empty_iops;
16862306a36Sopenharmony_ci	inode->i_fop = &no_open_fops;
16962306a36Sopenharmony_ci	inode->i_ino = 0;
17062306a36Sopenharmony_ci	inode->__i_nlink = 1;
17162306a36Sopenharmony_ci	inode->i_opflags = 0;
17262306a36Sopenharmony_ci	if (sb->s_xattr)
17362306a36Sopenharmony_ci		inode->i_opflags |= IOP_XATTR;
17462306a36Sopenharmony_ci	i_uid_write(inode, 0);
17562306a36Sopenharmony_ci	i_gid_write(inode, 0);
17662306a36Sopenharmony_ci	atomic_set(&inode->i_writecount, 0);
17762306a36Sopenharmony_ci	inode->i_size = 0;
17862306a36Sopenharmony_ci	inode->i_write_hint = WRITE_LIFE_NOT_SET;
17962306a36Sopenharmony_ci	inode->i_blocks = 0;
18062306a36Sopenharmony_ci	inode->i_bytes = 0;
18162306a36Sopenharmony_ci	inode->i_generation = 0;
18262306a36Sopenharmony_ci	inode->i_pipe = NULL;
18362306a36Sopenharmony_ci	inode->i_cdev = NULL;
18462306a36Sopenharmony_ci	inode->i_link = NULL;
18562306a36Sopenharmony_ci	inode->i_dir_seq = 0;
18662306a36Sopenharmony_ci	inode->i_rdev = 0;
18762306a36Sopenharmony_ci	inode->dirtied_when = 0;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
19062306a36Sopenharmony_ci	inode->i_wb_frn_winner = 0;
19162306a36Sopenharmony_ci	inode->i_wb_frn_avg_time = 0;
19262306a36Sopenharmony_ci	inode->i_wb_frn_history = 0;
19362306a36Sopenharmony_ci#endif
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	spin_lock_init(&inode->i_lock);
19662306a36Sopenharmony_ci	lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	init_rwsem(&inode->i_rwsem);
19962306a36Sopenharmony_ci	lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	atomic_set(&inode->i_dio_count, 0);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	mapping->a_ops = &empty_aops;
20462306a36Sopenharmony_ci	mapping->host = inode;
20562306a36Sopenharmony_ci	mapping->flags = 0;
20662306a36Sopenharmony_ci	mapping->wb_err = 0;
20762306a36Sopenharmony_ci	atomic_set(&mapping->i_mmap_writable, 0);
20862306a36Sopenharmony_ci#ifdef CONFIG_READ_ONLY_THP_FOR_FS
20962306a36Sopenharmony_ci	atomic_set(&mapping->nr_thps, 0);
21062306a36Sopenharmony_ci#endif
21162306a36Sopenharmony_ci	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
21262306a36Sopenharmony_ci	mapping->private_data = NULL;
21362306a36Sopenharmony_ci	mapping->writeback_index = 0;
21462306a36Sopenharmony_ci	init_rwsem(&mapping->invalidate_lock);
21562306a36Sopenharmony_ci	lockdep_set_class_and_name(&mapping->invalidate_lock,
21662306a36Sopenharmony_ci				   &sb->s_type->invalidate_lock_key,
21762306a36Sopenharmony_ci				   "mapping.invalidate_lock");
21862306a36Sopenharmony_ci	if (sb->s_iflags & SB_I_STABLE_WRITES)
21962306a36Sopenharmony_ci		mapping_set_stable_writes(mapping);
22062306a36Sopenharmony_ci	inode->i_private = NULL;
22162306a36Sopenharmony_ci	inode->i_mapping = mapping;
22262306a36Sopenharmony_ci	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
22362306a36Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL
22462306a36Sopenharmony_ci	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
22562306a36Sopenharmony_ci#endif
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci#ifdef CONFIG_FSNOTIFY
22862306a36Sopenharmony_ci	inode->i_fsnotify_mask = 0;
22962306a36Sopenharmony_ci#endif
23062306a36Sopenharmony_ci	inode->i_flctx = NULL;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	if (unlikely(security_inode_alloc(inode)))
23362306a36Sopenharmony_ci		return -ENOMEM;
23462306a36Sopenharmony_ci	this_cpu_inc(nr_inodes);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	return 0;
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ciEXPORT_SYMBOL(inode_init_always);
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_civoid free_inode_nonrcu(struct inode *inode)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	kmem_cache_free(inode_cachep, inode);
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ciEXPORT_SYMBOL(free_inode_nonrcu);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_cistatic void i_callback(struct rcu_head *head)
24762306a36Sopenharmony_ci{
24862306a36Sopenharmony_ci	struct inode *inode = container_of(head, struct inode, i_rcu);
24962306a36Sopenharmony_ci	if (inode->free_inode)
25062306a36Sopenharmony_ci		inode->free_inode(inode);
25162306a36Sopenharmony_ci	else
25262306a36Sopenharmony_ci		free_inode_nonrcu(inode);
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_cistatic struct inode *alloc_inode(struct super_block *sb)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	const struct super_operations *ops = sb->s_op;
25862306a36Sopenharmony_ci	struct inode *inode;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	if (ops->alloc_inode)
26162306a36Sopenharmony_ci		inode = ops->alloc_inode(sb);
26262306a36Sopenharmony_ci	else
26362306a36Sopenharmony_ci		inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	if (!inode)
26662306a36Sopenharmony_ci		return NULL;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	if (unlikely(inode_init_always(sb, inode))) {
26962306a36Sopenharmony_ci		if (ops->destroy_inode) {
27062306a36Sopenharmony_ci			ops->destroy_inode(inode);
27162306a36Sopenharmony_ci			if (!ops->free_inode)
27262306a36Sopenharmony_ci				return NULL;
27362306a36Sopenharmony_ci		}
27462306a36Sopenharmony_ci		inode->free_inode = ops->free_inode;
27562306a36Sopenharmony_ci		i_callback(&inode->i_rcu);
27662306a36Sopenharmony_ci		return NULL;
27762306a36Sopenharmony_ci	}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	return inode;
28062306a36Sopenharmony_ci}
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_civoid __destroy_inode(struct inode *inode)
28362306a36Sopenharmony_ci{
28462306a36Sopenharmony_ci	BUG_ON(inode_has_buffers(inode));
28562306a36Sopenharmony_ci	inode_detach_wb(inode);
28662306a36Sopenharmony_ci	security_inode_free(inode);
28762306a36Sopenharmony_ci	fsnotify_inode_delete(inode);
28862306a36Sopenharmony_ci	locks_free_lock_context(inode);
28962306a36Sopenharmony_ci	if (!inode->i_nlink) {
29062306a36Sopenharmony_ci		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
29162306a36Sopenharmony_ci		atomic_long_dec(&inode->i_sb->s_remove_count);
29262306a36Sopenharmony_ci	}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL
29562306a36Sopenharmony_ci	if (inode->i_acl && !is_uncached_acl(inode->i_acl))
29662306a36Sopenharmony_ci		posix_acl_release(inode->i_acl);
29762306a36Sopenharmony_ci	if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
29862306a36Sopenharmony_ci		posix_acl_release(inode->i_default_acl);
29962306a36Sopenharmony_ci#endif
30062306a36Sopenharmony_ci	this_cpu_dec(nr_inodes);
30162306a36Sopenharmony_ci}
30262306a36Sopenharmony_ciEXPORT_SYMBOL(__destroy_inode);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_cistatic void destroy_inode(struct inode *inode)
30562306a36Sopenharmony_ci{
30662306a36Sopenharmony_ci	const struct super_operations *ops = inode->i_sb->s_op;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_lru));
30962306a36Sopenharmony_ci	__destroy_inode(inode);
31062306a36Sopenharmony_ci	if (ops->destroy_inode) {
31162306a36Sopenharmony_ci		ops->destroy_inode(inode);
31262306a36Sopenharmony_ci		if (!ops->free_inode)
31362306a36Sopenharmony_ci			return;
31462306a36Sopenharmony_ci	}
31562306a36Sopenharmony_ci	inode->free_inode = ops->free_inode;
31662306a36Sopenharmony_ci	call_rcu(&inode->i_rcu, i_callback);
31762306a36Sopenharmony_ci}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci/**
32062306a36Sopenharmony_ci * drop_nlink - directly drop an inode's link count
32162306a36Sopenharmony_ci * @inode: inode
32262306a36Sopenharmony_ci *
32362306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any
32462306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink.  In cases
32562306a36Sopenharmony_ci * where we are attempting to track writes to the
32662306a36Sopenharmony_ci * filesystem, a decrement to zero means an imminent
32762306a36Sopenharmony_ci * write when the file is truncated and actually unlinked
32862306a36Sopenharmony_ci * on the filesystem.
32962306a36Sopenharmony_ci */
33062306a36Sopenharmony_civoid drop_nlink(struct inode *inode)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	WARN_ON(inode->i_nlink == 0);
33362306a36Sopenharmony_ci	inode->__i_nlink--;
33462306a36Sopenharmony_ci	if (!inode->i_nlink)
33562306a36Sopenharmony_ci		atomic_long_inc(&inode->i_sb->s_remove_count);
33662306a36Sopenharmony_ci}
33762306a36Sopenharmony_ciEXPORT_SYMBOL(drop_nlink);
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci/**
34062306a36Sopenharmony_ci * clear_nlink - directly zero an inode's link count
34162306a36Sopenharmony_ci * @inode: inode
34262306a36Sopenharmony_ci *
34362306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any
34462306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink.  See
34562306a36Sopenharmony_ci * drop_nlink() for why we care about i_nlink hitting zero.
34662306a36Sopenharmony_ci */
34762306a36Sopenharmony_civoid clear_nlink(struct inode *inode)
34862306a36Sopenharmony_ci{
34962306a36Sopenharmony_ci	if (inode->i_nlink) {
35062306a36Sopenharmony_ci		inode->__i_nlink = 0;
35162306a36Sopenharmony_ci		atomic_long_inc(&inode->i_sb->s_remove_count);
35262306a36Sopenharmony_ci	}
35362306a36Sopenharmony_ci}
35462306a36Sopenharmony_ciEXPORT_SYMBOL(clear_nlink);
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci/**
35762306a36Sopenharmony_ci * set_nlink - directly set an inode's link count
35862306a36Sopenharmony_ci * @inode: inode
35962306a36Sopenharmony_ci * @nlink: new nlink (should be non-zero)
36062306a36Sopenharmony_ci *
36162306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any
36262306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink.
36362306a36Sopenharmony_ci */
36462306a36Sopenharmony_civoid set_nlink(struct inode *inode, unsigned int nlink)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	if (!nlink) {
36762306a36Sopenharmony_ci		clear_nlink(inode);
36862306a36Sopenharmony_ci	} else {
36962306a36Sopenharmony_ci		/* Yes, some filesystems do change nlink from zero to one */
37062306a36Sopenharmony_ci		if (inode->i_nlink == 0)
37162306a36Sopenharmony_ci			atomic_long_dec(&inode->i_sb->s_remove_count);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci		inode->__i_nlink = nlink;
37462306a36Sopenharmony_ci	}
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ciEXPORT_SYMBOL(set_nlink);
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci/**
37962306a36Sopenharmony_ci * inc_nlink - directly increment an inode's link count
38062306a36Sopenharmony_ci * @inode: inode
38162306a36Sopenharmony_ci *
38262306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any
38362306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink.  Currently,
38462306a36Sopenharmony_ci * it is only here for parity with dec_nlink().
38562306a36Sopenharmony_ci */
38662306a36Sopenharmony_civoid inc_nlink(struct inode *inode)
38762306a36Sopenharmony_ci{
38862306a36Sopenharmony_ci	if (unlikely(inode->i_nlink == 0)) {
38962306a36Sopenharmony_ci		WARN_ON(!(inode->i_state & I_LINKABLE));
39062306a36Sopenharmony_ci		atomic_long_dec(&inode->i_sb->s_remove_count);
39162306a36Sopenharmony_ci	}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	inode->__i_nlink++;
39462306a36Sopenharmony_ci}
39562306a36Sopenharmony_ciEXPORT_SYMBOL(inc_nlink);
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_cistatic void __address_space_init_once(struct address_space *mapping)
39862306a36Sopenharmony_ci{
39962306a36Sopenharmony_ci	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
40062306a36Sopenharmony_ci	init_rwsem(&mapping->i_mmap_rwsem);
40162306a36Sopenharmony_ci	INIT_LIST_HEAD(&mapping->private_list);
40262306a36Sopenharmony_ci	spin_lock_init(&mapping->private_lock);
40362306a36Sopenharmony_ci	mapping->i_mmap = RB_ROOT_CACHED;
40462306a36Sopenharmony_ci}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_civoid address_space_init_once(struct address_space *mapping)
40762306a36Sopenharmony_ci{
40862306a36Sopenharmony_ci	memset(mapping, 0, sizeof(*mapping));
40962306a36Sopenharmony_ci	__address_space_init_once(mapping);
41062306a36Sopenharmony_ci}
41162306a36Sopenharmony_ciEXPORT_SYMBOL(address_space_init_once);
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci/*
41462306a36Sopenharmony_ci * These are initializations that only need to be done
41562306a36Sopenharmony_ci * once, because the fields are idempotent across use
41662306a36Sopenharmony_ci * of the inode, so let the slab aware of that.
41762306a36Sopenharmony_ci */
41862306a36Sopenharmony_civoid inode_init_once(struct inode *inode)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	memset(inode, 0, sizeof(*inode));
42162306a36Sopenharmony_ci	INIT_HLIST_NODE(&inode->i_hash);
42262306a36Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_devices);
42362306a36Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_io_list);
42462306a36Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_wb_list);
42562306a36Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_lru);
42662306a36Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_sb_list);
42762306a36Sopenharmony_ci	__address_space_init_once(&inode->i_data);
42862306a36Sopenharmony_ci	i_size_ordered_init(inode);
42962306a36Sopenharmony_ci}
43062306a36Sopenharmony_ciEXPORT_SYMBOL(inode_init_once);
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_cistatic void init_once(void *foo)
43362306a36Sopenharmony_ci{
43462306a36Sopenharmony_ci	struct inode *inode = (struct inode *) foo;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	inode_init_once(inode);
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci/*
44062306a36Sopenharmony_ci * inode->i_lock must be held
44162306a36Sopenharmony_ci */
44262306a36Sopenharmony_civoid __iget(struct inode *inode)
44362306a36Sopenharmony_ci{
44462306a36Sopenharmony_ci	atomic_inc(&inode->i_count);
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci/*
44862306a36Sopenharmony_ci * get additional reference to inode; caller must already hold one.
44962306a36Sopenharmony_ci */
45062306a36Sopenharmony_civoid ihold(struct inode *inode)
45162306a36Sopenharmony_ci{
45262306a36Sopenharmony_ci	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
45362306a36Sopenharmony_ci}
45462306a36Sopenharmony_ciEXPORT_SYMBOL(ihold);
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_cistatic void __inode_add_lru(struct inode *inode, bool rotate)
45762306a36Sopenharmony_ci{
45862306a36Sopenharmony_ci	if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
45962306a36Sopenharmony_ci		return;
46062306a36Sopenharmony_ci	if (atomic_read(&inode->i_count))
46162306a36Sopenharmony_ci		return;
46262306a36Sopenharmony_ci	if (!(inode->i_sb->s_flags & SB_ACTIVE))
46362306a36Sopenharmony_ci		return;
46462306a36Sopenharmony_ci	if (!mapping_shrinkable(&inode->i_data))
46562306a36Sopenharmony_ci		return;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
46862306a36Sopenharmony_ci		this_cpu_inc(nr_unused);
46962306a36Sopenharmony_ci	else if (rotate)
47062306a36Sopenharmony_ci		inode->i_state |= I_REFERENCED;
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci/*
47462306a36Sopenharmony_ci * Add inode to LRU if needed (inode is unused and clean).
47562306a36Sopenharmony_ci *
47662306a36Sopenharmony_ci * Needs inode->i_lock held.
47762306a36Sopenharmony_ci */
47862306a36Sopenharmony_civoid inode_add_lru(struct inode *inode)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	__inode_add_lru(inode, false);
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistatic void inode_lru_list_del(struct inode *inode)
48462306a36Sopenharmony_ci{
48562306a36Sopenharmony_ci	if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
48662306a36Sopenharmony_ci		this_cpu_dec(nr_unused);
48762306a36Sopenharmony_ci}
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci/**
49062306a36Sopenharmony_ci * inode_sb_list_add - add inode to the superblock list of inodes
49162306a36Sopenharmony_ci * @inode: inode to add
49262306a36Sopenharmony_ci */
49362306a36Sopenharmony_civoid inode_sb_list_add(struct inode *inode)
49462306a36Sopenharmony_ci{
49562306a36Sopenharmony_ci	spin_lock(&inode->i_sb->s_inode_list_lock);
49662306a36Sopenharmony_ci	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
49762306a36Sopenharmony_ci	spin_unlock(&inode->i_sb->s_inode_list_lock);
49862306a36Sopenharmony_ci}
49962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inode_sb_list_add);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_cistatic inline void inode_sb_list_del(struct inode *inode)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	if (!list_empty(&inode->i_sb_list)) {
50462306a36Sopenharmony_ci		spin_lock(&inode->i_sb->s_inode_list_lock);
50562306a36Sopenharmony_ci		list_del_init(&inode->i_sb_list);
50662306a36Sopenharmony_ci		spin_unlock(&inode->i_sb->s_inode_list_lock);
50762306a36Sopenharmony_ci	}
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_cistatic unsigned long hash(struct super_block *sb, unsigned long hashval)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	unsigned long tmp;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
51562306a36Sopenharmony_ci			L1_CACHE_BYTES;
51662306a36Sopenharmony_ci	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
51762306a36Sopenharmony_ci	return tmp & i_hash_mask;
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci/**
52162306a36Sopenharmony_ci *	__insert_inode_hash - hash an inode
52262306a36Sopenharmony_ci *	@inode: unhashed inode
52362306a36Sopenharmony_ci *	@hashval: unsigned long value used to locate this object in the
52462306a36Sopenharmony_ci *		inode_hashtable.
52562306a36Sopenharmony_ci *
52662306a36Sopenharmony_ci *	Add an inode to the inode hash for this superblock.
52762306a36Sopenharmony_ci */
52862306a36Sopenharmony_civoid __insert_inode_hash(struct inode *inode, unsigned long hashval)
52962306a36Sopenharmony_ci{
53062306a36Sopenharmony_ci	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
53362306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
53462306a36Sopenharmony_ci	hlist_add_head_rcu(&inode->i_hash, b);
53562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
53662306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
53762306a36Sopenharmony_ci}
53862306a36Sopenharmony_ciEXPORT_SYMBOL(__insert_inode_hash);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci/**
54162306a36Sopenharmony_ci *	__remove_inode_hash - remove an inode from the hash
54262306a36Sopenharmony_ci *	@inode: inode to unhash
54362306a36Sopenharmony_ci *
54462306a36Sopenharmony_ci *	Remove an inode from the superblock.
54562306a36Sopenharmony_ci */
54662306a36Sopenharmony_civoid __remove_inode_hash(struct inode *inode)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
54962306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
55062306a36Sopenharmony_ci	hlist_del_init_rcu(&inode->i_hash);
55162306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
55262306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
55362306a36Sopenharmony_ci}
55462306a36Sopenharmony_ciEXPORT_SYMBOL(__remove_inode_hash);
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_civoid dump_mapping(const struct address_space *mapping)
55762306a36Sopenharmony_ci{
55862306a36Sopenharmony_ci	struct inode *host;
55962306a36Sopenharmony_ci	const struct address_space_operations *a_ops;
56062306a36Sopenharmony_ci	struct hlist_node *dentry_first;
56162306a36Sopenharmony_ci	struct dentry *dentry_ptr;
56262306a36Sopenharmony_ci	struct dentry dentry;
56362306a36Sopenharmony_ci	unsigned long ino;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	/*
56662306a36Sopenharmony_ci	 * If mapping is an invalid pointer, we don't want to crash
56762306a36Sopenharmony_ci	 * accessing it, so probe everything depending on it carefully.
56862306a36Sopenharmony_ci	 */
56962306a36Sopenharmony_ci	if (get_kernel_nofault(host, &mapping->host) ||
57062306a36Sopenharmony_ci	    get_kernel_nofault(a_ops, &mapping->a_ops)) {
57162306a36Sopenharmony_ci		pr_warn("invalid mapping:%px\n", mapping);
57262306a36Sopenharmony_ci		return;
57362306a36Sopenharmony_ci	}
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	if (!host) {
57662306a36Sopenharmony_ci		pr_warn("aops:%ps\n", a_ops);
57762306a36Sopenharmony_ci		return;
57862306a36Sopenharmony_ci	}
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
58162306a36Sopenharmony_ci	    get_kernel_nofault(ino, &host->i_ino)) {
58262306a36Sopenharmony_ci		pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
58362306a36Sopenharmony_ci		return;
58462306a36Sopenharmony_ci	}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	if (!dentry_first) {
58762306a36Sopenharmony_ci		pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
58862306a36Sopenharmony_ci		return;
58962306a36Sopenharmony_ci	}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
59262306a36Sopenharmony_ci	if (get_kernel_nofault(dentry, dentry_ptr)) {
59362306a36Sopenharmony_ci		pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
59462306a36Sopenharmony_ci				a_ops, ino, dentry_ptr);
59562306a36Sopenharmony_ci		return;
59662306a36Sopenharmony_ci	}
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	/*
59962306a36Sopenharmony_ci	 * if dentry is corrupted, the %pd handler may still crash,
60062306a36Sopenharmony_ci	 * but it's unlikely that we reach here with a corrupt mapping
60162306a36Sopenharmony_ci	 */
60262306a36Sopenharmony_ci	pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
60362306a36Sopenharmony_ci}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_civoid clear_inode(struct inode *inode)
60662306a36Sopenharmony_ci{
60762306a36Sopenharmony_ci	/*
60862306a36Sopenharmony_ci	 * We have to cycle the i_pages lock here because reclaim can be in the
60962306a36Sopenharmony_ci	 * process of removing the last page (in __filemap_remove_folio())
61062306a36Sopenharmony_ci	 * and we must not free the mapping under it.
61162306a36Sopenharmony_ci	 */
61262306a36Sopenharmony_ci	xa_lock_irq(&inode->i_data.i_pages);
61362306a36Sopenharmony_ci	BUG_ON(inode->i_data.nrpages);
61462306a36Sopenharmony_ci	/*
61562306a36Sopenharmony_ci	 * Almost always, mapping_empty(&inode->i_data) here; but there are
61662306a36Sopenharmony_ci	 * two known and long-standing ways in which nodes may get left behind
61762306a36Sopenharmony_ci	 * (when deep radix-tree node allocation failed partway; or when THP
61862306a36Sopenharmony_ci	 * collapse_file() failed). Until those two known cases are cleaned up,
61962306a36Sopenharmony_ci	 * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
62062306a36Sopenharmony_ci	 * nor even WARN_ON(!mapping_empty).
62162306a36Sopenharmony_ci	 */
62262306a36Sopenharmony_ci	xa_unlock_irq(&inode->i_data.i_pages);
62362306a36Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_data.private_list));
62462306a36Sopenharmony_ci	BUG_ON(!(inode->i_state & I_FREEING));
62562306a36Sopenharmony_ci	BUG_ON(inode->i_state & I_CLEAR);
62662306a36Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_wb_list));
62762306a36Sopenharmony_ci	/* don't need i_lock here, no concurrent mods to i_state */
62862306a36Sopenharmony_ci	inode->i_state = I_FREEING | I_CLEAR;
62962306a36Sopenharmony_ci}
63062306a36Sopenharmony_ciEXPORT_SYMBOL(clear_inode);
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci/*
63362306a36Sopenharmony_ci * Free the inode passed in, removing it from the lists it is still connected
63462306a36Sopenharmony_ci * to. We remove any pages still attached to the inode and wait for any IO that
63562306a36Sopenharmony_ci * is still in progress before finally destroying the inode.
63662306a36Sopenharmony_ci *
63762306a36Sopenharmony_ci * An inode must already be marked I_FREEING so that we avoid the inode being
63862306a36Sopenharmony_ci * moved back onto lists if we race with other code that manipulates the lists
63962306a36Sopenharmony_ci * (e.g. writeback_single_inode). The caller is responsible for setting this.
64062306a36Sopenharmony_ci *
64162306a36Sopenharmony_ci * An inode must already be removed from the LRU list before being evicted from
64262306a36Sopenharmony_ci * the cache. This should occur atomically with setting the I_FREEING state
64362306a36Sopenharmony_ci * flag, so no inodes here should ever be on the LRU when being evicted.
64462306a36Sopenharmony_ci */
64562306a36Sopenharmony_cistatic void evict(struct inode *inode)
64662306a36Sopenharmony_ci{
64762306a36Sopenharmony_ci	const struct super_operations *op = inode->i_sb->s_op;
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	BUG_ON(!(inode->i_state & I_FREEING));
65062306a36Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_lru));
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	if (!list_empty(&inode->i_io_list))
65362306a36Sopenharmony_ci		inode_io_list_del(inode);
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	inode_sb_list_del(inode);
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	/*
65862306a36Sopenharmony_ci	 * Wait for flusher thread to be done with the inode so that filesystem
65962306a36Sopenharmony_ci	 * does not start destroying it while writeback is still running. Since
66062306a36Sopenharmony_ci	 * the inode has I_FREEING set, flusher thread won't start new work on
66162306a36Sopenharmony_ci	 * the inode.  We just have to wait for running writeback to finish.
66262306a36Sopenharmony_ci	 */
66362306a36Sopenharmony_ci	inode_wait_for_writeback(inode);
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci	if (op->evict_inode) {
66662306a36Sopenharmony_ci		op->evict_inode(inode);
66762306a36Sopenharmony_ci	} else {
66862306a36Sopenharmony_ci		truncate_inode_pages_final(&inode->i_data);
66962306a36Sopenharmony_ci		clear_inode(inode);
67062306a36Sopenharmony_ci	}
67162306a36Sopenharmony_ci	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
67262306a36Sopenharmony_ci		cd_forget(inode);
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	remove_inode_hash(inode);
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
67762306a36Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_NEW);
67862306a36Sopenharmony_ci	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
67962306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	destroy_inode(inode);
68262306a36Sopenharmony_ci}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci/*
68562306a36Sopenharmony_ci * dispose_list - dispose of the contents of a local list
68662306a36Sopenharmony_ci * @head: the head of the list to free
68762306a36Sopenharmony_ci *
68862306a36Sopenharmony_ci * Dispose-list gets a local list with local inodes in it, so it doesn't
68962306a36Sopenharmony_ci * need to worry about list corruption and SMP locks.
69062306a36Sopenharmony_ci */
69162306a36Sopenharmony_cistatic void dispose_list(struct list_head *head)
69262306a36Sopenharmony_ci{
69362306a36Sopenharmony_ci	while (!list_empty(head)) {
69462306a36Sopenharmony_ci		struct inode *inode;
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci		inode = list_first_entry(head, struct inode, i_lru);
69762306a36Sopenharmony_ci		list_del_init(&inode->i_lru);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci		evict(inode);
70062306a36Sopenharmony_ci		cond_resched();
70162306a36Sopenharmony_ci	}
70262306a36Sopenharmony_ci}
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci/**
70562306a36Sopenharmony_ci * evict_inodes	- evict all evictable inodes for a superblock
70662306a36Sopenharmony_ci * @sb:		superblock to operate on
70762306a36Sopenharmony_ci *
70862306a36Sopenharmony_ci * Make sure that no inodes with zero refcount are retained.  This is
70962306a36Sopenharmony_ci * called by superblock shutdown after having SB_ACTIVE flag removed,
71062306a36Sopenharmony_ci * so any inode reaching zero refcount during or after that call will
71162306a36Sopenharmony_ci * be immediately evicted.
71262306a36Sopenharmony_ci */
71362306a36Sopenharmony_civoid evict_inodes(struct super_block *sb)
71462306a36Sopenharmony_ci{
71562306a36Sopenharmony_ci	struct inode *inode, *next;
71662306a36Sopenharmony_ci	LIST_HEAD(dispose);
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ciagain:
71962306a36Sopenharmony_ci	spin_lock(&sb->s_inode_list_lock);
72062306a36Sopenharmony_ci	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
72162306a36Sopenharmony_ci		if (atomic_read(&inode->i_count))
72262306a36Sopenharmony_ci			continue;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
72562306a36Sopenharmony_ci		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
72662306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
72762306a36Sopenharmony_ci			continue;
72862306a36Sopenharmony_ci		}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci		inode->i_state |= I_FREEING;
73162306a36Sopenharmony_ci		inode_lru_list_del(inode);
73262306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
73362306a36Sopenharmony_ci		list_add(&inode->i_lru, &dispose);
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci		/*
73662306a36Sopenharmony_ci		 * We can have a ton of inodes to evict at unmount time given
73762306a36Sopenharmony_ci		 * enough memory, check to see if we need to go to sleep for a
73862306a36Sopenharmony_ci		 * bit so we don't livelock.
73962306a36Sopenharmony_ci		 */
74062306a36Sopenharmony_ci		if (need_resched()) {
74162306a36Sopenharmony_ci			spin_unlock(&sb->s_inode_list_lock);
74262306a36Sopenharmony_ci			cond_resched();
74362306a36Sopenharmony_ci			dispose_list(&dispose);
74462306a36Sopenharmony_ci			goto again;
74562306a36Sopenharmony_ci		}
74662306a36Sopenharmony_ci	}
74762306a36Sopenharmony_ci	spin_unlock(&sb->s_inode_list_lock);
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci	dispose_list(&dispose);
75062306a36Sopenharmony_ci}
75162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(evict_inodes);
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci/**
75462306a36Sopenharmony_ci * invalidate_inodes	- attempt to free all inodes on a superblock
75562306a36Sopenharmony_ci * @sb:		superblock to operate on
75662306a36Sopenharmony_ci *
75762306a36Sopenharmony_ci * Attempts to free all inodes (including dirty inodes) for a given superblock.
75862306a36Sopenharmony_ci */
75962306a36Sopenharmony_civoid invalidate_inodes(struct super_block *sb)
76062306a36Sopenharmony_ci{
76162306a36Sopenharmony_ci	struct inode *inode, *next;
76262306a36Sopenharmony_ci	LIST_HEAD(dispose);
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ciagain:
76562306a36Sopenharmony_ci	spin_lock(&sb->s_inode_list_lock);
76662306a36Sopenharmony_ci	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
76762306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
76862306a36Sopenharmony_ci		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
76962306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
77062306a36Sopenharmony_ci			continue;
77162306a36Sopenharmony_ci		}
77262306a36Sopenharmony_ci		if (atomic_read(&inode->i_count)) {
77362306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
77462306a36Sopenharmony_ci			continue;
77562306a36Sopenharmony_ci		}
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci		inode->i_state |= I_FREEING;
77862306a36Sopenharmony_ci		inode_lru_list_del(inode);
77962306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
78062306a36Sopenharmony_ci		list_add(&inode->i_lru, &dispose);
78162306a36Sopenharmony_ci		if (need_resched()) {
78262306a36Sopenharmony_ci			spin_unlock(&sb->s_inode_list_lock);
78362306a36Sopenharmony_ci			cond_resched();
78462306a36Sopenharmony_ci			dispose_list(&dispose);
78562306a36Sopenharmony_ci			goto again;
78662306a36Sopenharmony_ci		}
78762306a36Sopenharmony_ci	}
78862306a36Sopenharmony_ci	spin_unlock(&sb->s_inode_list_lock);
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	dispose_list(&dispose);
79162306a36Sopenharmony_ci}
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci/*
79462306a36Sopenharmony_ci * Isolate the inode from the LRU in preparation for freeing it.
79562306a36Sopenharmony_ci *
79662306a36Sopenharmony_ci * If the inode has the I_REFERENCED flag set, then it means that it has been
79762306a36Sopenharmony_ci * used recently - the flag is set in iput_final(). When we encounter such an
79862306a36Sopenharmony_ci * inode, clear the flag and move it to the back of the LRU so it gets another
79962306a36Sopenharmony_ci * pass through the LRU before it gets reclaimed. This is necessary because of
80062306a36Sopenharmony_ci * the fact we are doing lazy LRU updates to minimise lock contention so the
80162306a36Sopenharmony_ci * LRU does not have strict ordering. Hence we don't want to reclaim inodes
80262306a36Sopenharmony_ci * with this flag set because they are the inodes that are out of order.
80362306a36Sopenharmony_ci */
80462306a36Sopenharmony_cistatic enum lru_status inode_lru_isolate(struct list_head *item,
80562306a36Sopenharmony_ci		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
80662306a36Sopenharmony_ci{
80762306a36Sopenharmony_ci	struct list_head *freeable = arg;
80862306a36Sopenharmony_ci	struct inode	*inode = container_of(item, struct inode, i_lru);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	/*
81162306a36Sopenharmony_ci	 * We are inverting the lru lock/inode->i_lock here, so use a
81262306a36Sopenharmony_ci	 * trylock. If we fail to get the lock, just skip it.
81362306a36Sopenharmony_ci	 */
81462306a36Sopenharmony_ci	if (!spin_trylock(&inode->i_lock))
81562306a36Sopenharmony_ci		return LRU_SKIP;
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	/*
81862306a36Sopenharmony_ci	 * Inodes can get referenced, redirtied, or repopulated while
81962306a36Sopenharmony_ci	 * they're already on the LRU, and this can make them
82062306a36Sopenharmony_ci	 * unreclaimable for a while. Remove them lazily here; iput,
82162306a36Sopenharmony_ci	 * sync, or the last page cache deletion will requeue them.
82262306a36Sopenharmony_ci	 */
82362306a36Sopenharmony_ci	if (atomic_read(&inode->i_count) ||
82462306a36Sopenharmony_ci	    (inode->i_state & ~I_REFERENCED) ||
82562306a36Sopenharmony_ci	    !mapping_shrinkable(&inode->i_data)) {
82662306a36Sopenharmony_ci		list_lru_isolate(lru, &inode->i_lru);
82762306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
82862306a36Sopenharmony_ci		this_cpu_dec(nr_unused);
82962306a36Sopenharmony_ci		return LRU_REMOVED;
83062306a36Sopenharmony_ci	}
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci	/* Recently referenced inodes get one more pass */
83362306a36Sopenharmony_ci	if (inode->i_state & I_REFERENCED) {
83462306a36Sopenharmony_ci		inode->i_state &= ~I_REFERENCED;
83562306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
83662306a36Sopenharmony_ci		return LRU_ROTATE;
83762306a36Sopenharmony_ci	}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	/*
84062306a36Sopenharmony_ci	 * On highmem systems, mapping_shrinkable() permits dropping
84162306a36Sopenharmony_ci	 * page cache in order to free up struct inodes: lowmem might
84262306a36Sopenharmony_ci	 * be under pressure before the cache inside the highmem zone.
84362306a36Sopenharmony_ci	 */
84462306a36Sopenharmony_ci	if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
84562306a36Sopenharmony_ci		__iget(inode);
84662306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
84762306a36Sopenharmony_ci		spin_unlock(lru_lock);
84862306a36Sopenharmony_ci		if (remove_inode_buffers(inode)) {
84962306a36Sopenharmony_ci			unsigned long reap;
85062306a36Sopenharmony_ci			reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
85162306a36Sopenharmony_ci			if (current_is_kswapd())
85262306a36Sopenharmony_ci				__count_vm_events(KSWAPD_INODESTEAL, reap);
85362306a36Sopenharmony_ci			else
85462306a36Sopenharmony_ci				__count_vm_events(PGINODESTEAL, reap);
85562306a36Sopenharmony_ci			mm_account_reclaimed_pages(reap);
85662306a36Sopenharmony_ci		}
85762306a36Sopenharmony_ci		iput(inode);
85862306a36Sopenharmony_ci		spin_lock(lru_lock);
85962306a36Sopenharmony_ci		return LRU_RETRY;
86062306a36Sopenharmony_ci	}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci	WARN_ON(inode->i_state & I_NEW);
86362306a36Sopenharmony_ci	inode->i_state |= I_FREEING;
86462306a36Sopenharmony_ci	list_lru_isolate_move(lru, &inode->i_lru, freeable);
86562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	this_cpu_dec(nr_unused);
86862306a36Sopenharmony_ci	return LRU_REMOVED;
86962306a36Sopenharmony_ci}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci/*
87262306a36Sopenharmony_ci * Walk the superblock inode LRU for freeable inodes and attempt to free them.
87362306a36Sopenharmony_ci * This is called from the superblock shrinker function with a number of inodes
87462306a36Sopenharmony_ci * to trim from the LRU. Inodes to be freed are moved to a temporary list and
87562306a36Sopenharmony_ci * then are freed outside inode_lock by dispose_list().
87662306a36Sopenharmony_ci */
87762306a36Sopenharmony_cilong prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
87862306a36Sopenharmony_ci{
87962306a36Sopenharmony_ci	LIST_HEAD(freeable);
88062306a36Sopenharmony_ci	long freed;
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
88362306a36Sopenharmony_ci				     inode_lru_isolate, &freeable);
88462306a36Sopenharmony_ci	dispose_list(&freeable);
88562306a36Sopenharmony_ci	return freed;
88662306a36Sopenharmony_ci}
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_cistatic void __wait_on_freeing_inode(struct inode *inode);
88962306a36Sopenharmony_ci/*
89062306a36Sopenharmony_ci * Called with the inode lock held.
89162306a36Sopenharmony_ci */
89262306a36Sopenharmony_cistatic struct inode *find_inode(struct super_block *sb,
89362306a36Sopenharmony_ci				struct hlist_head *head,
89462306a36Sopenharmony_ci				int (*test)(struct inode *, void *),
89562306a36Sopenharmony_ci				void *data)
89662306a36Sopenharmony_ci{
89762306a36Sopenharmony_ci	struct inode *inode = NULL;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_cirepeat:
90062306a36Sopenharmony_ci	hlist_for_each_entry(inode, head, i_hash) {
90162306a36Sopenharmony_ci		if (inode->i_sb != sb)
90262306a36Sopenharmony_ci			continue;
90362306a36Sopenharmony_ci		if (!test(inode, data))
90462306a36Sopenharmony_ci			continue;
90562306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
90662306a36Sopenharmony_ci		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
90762306a36Sopenharmony_ci			__wait_on_freeing_inode(inode);
90862306a36Sopenharmony_ci			goto repeat;
90962306a36Sopenharmony_ci		}
91062306a36Sopenharmony_ci		if (unlikely(inode->i_state & I_CREATING)) {
91162306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
91262306a36Sopenharmony_ci			return ERR_PTR(-ESTALE);
91362306a36Sopenharmony_ci		}
91462306a36Sopenharmony_ci		__iget(inode);
91562306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
91662306a36Sopenharmony_ci		return inode;
91762306a36Sopenharmony_ci	}
91862306a36Sopenharmony_ci	return NULL;
91962306a36Sopenharmony_ci}
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci/*
92262306a36Sopenharmony_ci * find_inode_fast is the fast path version of find_inode, see the comment at
92362306a36Sopenharmony_ci * iget_locked for details.
92462306a36Sopenharmony_ci */
92562306a36Sopenharmony_cistatic struct inode *find_inode_fast(struct super_block *sb,
92662306a36Sopenharmony_ci				struct hlist_head *head, unsigned long ino)
92762306a36Sopenharmony_ci{
92862306a36Sopenharmony_ci	struct inode *inode = NULL;
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_cirepeat:
93162306a36Sopenharmony_ci	hlist_for_each_entry(inode, head, i_hash) {
93262306a36Sopenharmony_ci		if (inode->i_ino != ino)
93362306a36Sopenharmony_ci			continue;
93462306a36Sopenharmony_ci		if (inode->i_sb != sb)
93562306a36Sopenharmony_ci			continue;
93662306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
93762306a36Sopenharmony_ci		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
93862306a36Sopenharmony_ci			__wait_on_freeing_inode(inode);
93962306a36Sopenharmony_ci			goto repeat;
94062306a36Sopenharmony_ci		}
94162306a36Sopenharmony_ci		if (unlikely(inode->i_state & I_CREATING)) {
94262306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
94362306a36Sopenharmony_ci			return ERR_PTR(-ESTALE);
94462306a36Sopenharmony_ci		}
94562306a36Sopenharmony_ci		__iget(inode);
94662306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
94762306a36Sopenharmony_ci		return inode;
94862306a36Sopenharmony_ci	}
94962306a36Sopenharmony_ci	return NULL;
95062306a36Sopenharmony_ci}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci/*
95362306a36Sopenharmony_ci * Each cpu owns a range of LAST_INO_BATCH numbers.
95462306a36Sopenharmony_ci * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
95562306a36Sopenharmony_ci * to renew the exhausted range.
95662306a36Sopenharmony_ci *
95762306a36Sopenharmony_ci * This does not significantly increase overflow rate because every CPU can
95862306a36Sopenharmony_ci * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
95962306a36Sopenharmony_ci * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
96062306a36Sopenharmony_ci * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
96162306a36Sopenharmony_ci * overflow rate by 2x, which does not seem too significant.
96262306a36Sopenharmony_ci *
96362306a36Sopenharmony_ci * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
96462306a36Sopenharmony_ci * error if st_ino won't fit in target struct field. Use 32bit counter
96562306a36Sopenharmony_ci * here to attempt to avoid that.
96662306a36Sopenharmony_ci */
96762306a36Sopenharmony_ci#define LAST_INO_BATCH 1024
96862306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned int, last_ino);
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ciunsigned int get_next_ino(void)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	unsigned int *p = &get_cpu_var(last_ino);
97362306a36Sopenharmony_ci	unsigned int res = *p;
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci#ifdef CONFIG_SMP
97662306a36Sopenharmony_ci	if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
97762306a36Sopenharmony_ci		static atomic_t shared_last_ino;
97862306a36Sopenharmony_ci		int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci		res = next - LAST_INO_BATCH;
98162306a36Sopenharmony_ci	}
98262306a36Sopenharmony_ci#endif
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci	res++;
98562306a36Sopenharmony_ci	/* get_next_ino should not provide a 0 inode number */
98662306a36Sopenharmony_ci	if (unlikely(!res))
98762306a36Sopenharmony_ci		res++;
98862306a36Sopenharmony_ci	*p = res;
98962306a36Sopenharmony_ci	put_cpu_var(last_ino);
99062306a36Sopenharmony_ci	return res;
99162306a36Sopenharmony_ci}
99262306a36Sopenharmony_ciEXPORT_SYMBOL(get_next_ino);
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci/**
99562306a36Sopenharmony_ci *	new_inode_pseudo 	- obtain an inode
99662306a36Sopenharmony_ci *	@sb: superblock
99762306a36Sopenharmony_ci *
99862306a36Sopenharmony_ci *	Allocates a new inode for given superblock.
99962306a36Sopenharmony_ci *	Inode wont be chained in superblock s_inodes list
100062306a36Sopenharmony_ci *	This means :
100162306a36Sopenharmony_ci *	- fs can't be unmount
100262306a36Sopenharmony_ci *	- quotas, fsnotify, writeback can't work
100362306a36Sopenharmony_ci */
100462306a36Sopenharmony_cistruct inode *new_inode_pseudo(struct super_block *sb)
100562306a36Sopenharmony_ci{
100662306a36Sopenharmony_ci	struct inode *inode = alloc_inode(sb);
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci	if (inode) {
100962306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
101062306a36Sopenharmony_ci		inode->i_state = 0;
101162306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
101262306a36Sopenharmony_ci	}
101362306a36Sopenharmony_ci	return inode;
101462306a36Sopenharmony_ci}
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_ci/**
101762306a36Sopenharmony_ci *	new_inode 	- obtain an inode
101862306a36Sopenharmony_ci *	@sb: superblock
101962306a36Sopenharmony_ci *
102062306a36Sopenharmony_ci *	Allocates a new inode for given superblock. The default gfp_mask
102162306a36Sopenharmony_ci *	for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
102262306a36Sopenharmony_ci *	If HIGHMEM pages are unsuitable or it is known that pages allocated
102362306a36Sopenharmony_ci *	for the page cache are not reclaimable or migratable,
102462306a36Sopenharmony_ci *	mapping_set_gfp_mask() must be called with suitable flags on the
102562306a36Sopenharmony_ci *	newly created inode's mapping
102662306a36Sopenharmony_ci *
102762306a36Sopenharmony_ci */
102862306a36Sopenharmony_cistruct inode *new_inode(struct super_block *sb)
102962306a36Sopenharmony_ci{
103062306a36Sopenharmony_ci	struct inode *inode;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	inode = new_inode_pseudo(sb);
103362306a36Sopenharmony_ci	if (inode)
103462306a36Sopenharmony_ci		inode_sb_list_add(inode);
103562306a36Sopenharmony_ci	return inode;
103662306a36Sopenharmony_ci}
103762306a36Sopenharmony_ciEXPORT_SYMBOL(new_inode);
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
104062306a36Sopenharmony_civoid lockdep_annotate_inode_mutex_key(struct inode *inode)
104162306a36Sopenharmony_ci{
104262306a36Sopenharmony_ci	if (S_ISDIR(inode->i_mode)) {
104362306a36Sopenharmony_ci		struct file_system_type *type = inode->i_sb->s_type;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci		/* Set new key only if filesystem hasn't already changed it */
104662306a36Sopenharmony_ci		if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
104762306a36Sopenharmony_ci			/*
104862306a36Sopenharmony_ci			 * ensure nobody is actually holding i_mutex
104962306a36Sopenharmony_ci			 */
105062306a36Sopenharmony_ci			// mutex_destroy(&inode->i_mutex);
105162306a36Sopenharmony_ci			init_rwsem(&inode->i_rwsem);
105262306a36Sopenharmony_ci			lockdep_set_class(&inode->i_rwsem,
105362306a36Sopenharmony_ci					  &type->i_mutex_dir_key);
105462306a36Sopenharmony_ci		}
105562306a36Sopenharmony_ci	}
105662306a36Sopenharmony_ci}
105762306a36Sopenharmony_ciEXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
105862306a36Sopenharmony_ci#endif
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci/**
106162306a36Sopenharmony_ci * unlock_new_inode - clear the I_NEW state and wake up any waiters
106262306a36Sopenharmony_ci * @inode:	new inode to unlock
106362306a36Sopenharmony_ci *
106462306a36Sopenharmony_ci * Called when the inode is fully initialised to clear the new state of the
106562306a36Sopenharmony_ci * inode and wake up anyone waiting for the inode to finish initialisation.
106662306a36Sopenharmony_ci */
106762306a36Sopenharmony_civoid unlock_new_inode(struct inode *inode)
106862306a36Sopenharmony_ci{
106962306a36Sopenharmony_ci	lockdep_annotate_inode_mutex_key(inode);
107062306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
107162306a36Sopenharmony_ci	WARN_ON(!(inode->i_state & I_NEW));
107262306a36Sopenharmony_ci	inode->i_state &= ~I_NEW & ~I_CREATING;
107362306a36Sopenharmony_ci	smp_mb();
107462306a36Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_NEW);
107562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
107662306a36Sopenharmony_ci}
107762306a36Sopenharmony_ciEXPORT_SYMBOL(unlock_new_inode);
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_civoid discard_new_inode(struct inode *inode)
108062306a36Sopenharmony_ci{
108162306a36Sopenharmony_ci	lockdep_annotate_inode_mutex_key(inode);
108262306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
108362306a36Sopenharmony_ci	WARN_ON(!(inode->i_state & I_NEW));
108462306a36Sopenharmony_ci	inode->i_state &= ~I_NEW;
108562306a36Sopenharmony_ci	smp_mb();
108662306a36Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_NEW);
108762306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
108862306a36Sopenharmony_ci	iput(inode);
108962306a36Sopenharmony_ci}
109062306a36Sopenharmony_ciEXPORT_SYMBOL(discard_new_inode);
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci/**
109362306a36Sopenharmony_ci * lock_two_inodes - lock two inodes (may be regular files but also dirs)
109462306a36Sopenharmony_ci *
109562306a36Sopenharmony_ci * Lock any non-NULL argument. The caller must make sure that if he is passing
109662306a36Sopenharmony_ci * in two directories, one is not ancestor of the other.  Zero, one or two
109762306a36Sopenharmony_ci * objects may be locked by this function.
109862306a36Sopenharmony_ci *
109962306a36Sopenharmony_ci * @inode1: first inode to lock
110062306a36Sopenharmony_ci * @inode2: second inode to lock
110162306a36Sopenharmony_ci * @subclass1: inode lock subclass for the first lock obtained
110262306a36Sopenharmony_ci * @subclass2: inode lock subclass for the second lock obtained
110362306a36Sopenharmony_ci */
110462306a36Sopenharmony_civoid lock_two_inodes(struct inode *inode1, struct inode *inode2,
110562306a36Sopenharmony_ci		     unsigned subclass1, unsigned subclass2)
110662306a36Sopenharmony_ci{
110762306a36Sopenharmony_ci	if (!inode1 || !inode2) {
110862306a36Sopenharmony_ci		/*
110962306a36Sopenharmony_ci		 * Make sure @subclass1 will be used for the acquired lock.
111062306a36Sopenharmony_ci		 * This is not strictly necessary (no current caller cares) but
111162306a36Sopenharmony_ci		 * let's keep things consistent.
111262306a36Sopenharmony_ci		 */
111362306a36Sopenharmony_ci		if (!inode1)
111462306a36Sopenharmony_ci			swap(inode1, inode2);
111562306a36Sopenharmony_ci		goto lock;
111662306a36Sopenharmony_ci	}
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci	/*
111962306a36Sopenharmony_ci	 * If one object is directory and the other is not, we must make sure
112062306a36Sopenharmony_ci	 * to lock directory first as the other object may be its child.
112162306a36Sopenharmony_ci	 */
112262306a36Sopenharmony_ci	if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
112362306a36Sopenharmony_ci		if (inode1 > inode2)
112462306a36Sopenharmony_ci			swap(inode1, inode2);
112562306a36Sopenharmony_ci	} else if (!S_ISDIR(inode1->i_mode))
112662306a36Sopenharmony_ci		swap(inode1, inode2);
112762306a36Sopenharmony_cilock:
112862306a36Sopenharmony_ci	if (inode1)
112962306a36Sopenharmony_ci		inode_lock_nested(inode1, subclass1);
113062306a36Sopenharmony_ci	if (inode2 && inode2 != inode1)
113162306a36Sopenharmony_ci		inode_lock_nested(inode2, subclass2);
113262306a36Sopenharmony_ci}
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci/**
113562306a36Sopenharmony_ci * lock_two_nondirectories - take two i_mutexes on non-directory objects
113662306a36Sopenharmony_ci *
113762306a36Sopenharmony_ci * Lock any non-NULL argument. Passed objects must not be directories.
113862306a36Sopenharmony_ci * Zero, one or two objects may be locked by this function.
113962306a36Sopenharmony_ci *
114062306a36Sopenharmony_ci * @inode1: first inode to lock
114162306a36Sopenharmony_ci * @inode2: second inode to lock
114262306a36Sopenharmony_ci */
114362306a36Sopenharmony_civoid lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
114462306a36Sopenharmony_ci{
114562306a36Sopenharmony_ci	if (inode1)
114662306a36Sopenharmony_ci		WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
114762306a36Sopenharmony_ci	if (inode2)
114862306a36Sopenharmony_ci		WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
114962306a36Sopenharmony_ci	lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
115062306a36Sopenharmony_ci}
115162306a36Sopenharmony_ciEXPORT_SYMBOL(lock_two_nondirectories);
115262306a36Sopenharmony_ci
115362306a36Sopenharmony_ci/**
115462306a36Sopenharmony_ci * unlock_two_nondirectories - release locks from lock_two_nondirectories()
115562306a36Sopenharmony_ci * @inode1: first inode to unlock
115662306a36Sopenharmony_ci * @inode2: second inode to unlock
115762306a36Sopenharmony_ci */
115862306a36Sopenharmony_civoid unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
115962306a36Sopenharmony_ci{
116062306a36Sopenharmony_ci	if (inode1) {
116162306a36Sopenharmony_ci		WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
116262306a36Sopenharmony_ci		inode_unlock(inode1);
116362306a36Sopenharmony_ci	}
116462306a36Sopenharmony_ci	if (inode2 && inode2 != inode1) {
116562306a36Sopenharmony_ci		WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
116662306a36Sopenharmony_ci		inode_unlock(inode2);
116762306a36Sopenharmony_ci	}
116862306a36Sopenharmony_ci}
116962306a36Sopenharmony_ciEXPORT_SYMBOL(unlock_two_nondirectories);
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci/**
117262306a36Sopenharmony_ci * inode_insert5 - obtain an inode from a mounted file system
117362306a36Sopenharmony_ci * @inode:	pre-allocated inode to use for insert to cache
117462306a36Sopenharmony_ci * @hashval:	hash value (usually inode number) to get
117562306a36Sopenharmony_ci * @test:	callback used for comparisons between inodes
117662306a36Sopenharmony_ci * @set:	callback used to initialize a new struct inode
117762306a36Sopenharmony_ci * @data:	opaque data pointer to pass to @test and @set
117862306a36Sopenharmony_ci *
117962306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
118062306a36Sopenharmony_ci * and if present it is return it with an increased reference count. This is
118162306a36Sopenharmony_ci * a variant of iget5_locked() for callers that don't want to fail on memory
118262306a36Sopenharmony_ci * allocation of inode.
118362306a36Sopenharmony_ci *
118462306a36Sopenharmony_ci * If the inode is not in cache, insert the pre-allocated inode to cache and
118562306a36Sopenharmony_ci * return it locked, hashed, and with the I_NEW flag set. The file system gets
118662306a36Sopenharmony_ci * to fill it in before unlocking it via unlock_new_inode().
118762306a36Sopenharmony_ci *
118862306a36Sopenharmony_ci * Note both @test and @set are called with the inode_hash_lock held, so can't
118962306a36Sopenharmony_ci * sleep.
119062306a36Sopenharmony_ci */
119162306a36Sopenharmony_cistruct inode *inode_insert5(struct inode *inode, unsigned long hashval,
119262306a36Sopenharmony_ci			    int (*test)(struct inode *, void *),
119362306a36Sopenharmony_ci			    int (*set)(struct inode *, void *), void *data)
119462306a36Sopenharmony_ci{
119562306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
119662306a36Sopenharmony_ci	struct inode *old;
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ciagain:
119962306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
120062306a36Sopenharmony_ci	old = find_inode(inode->i_sb, head, test, data);
120162306a36Sopenharmony_ci	if (unlikely(old)) {
120262306a36Sopenharmony_ci		/*
120362306a36Sopenharmony_ci		 * Uhhuh, somebody else created the same inode under us.
120462306a36Sopenharmony_ci		 * Use the old inode instead of the preallocated one.
120562306a36Sopenharmony_ci		 */
120662306a36Sopenharmony_ci		spin_unlock(&inode_hash_lock);
120762306a36Sopenharmony_ci		if (IS_ERR(old))
120862306a36Sopenharmony_ci			return NULL;
120962306a36Sopenharmony_ci		wait_on_inode(old);
121062306a36Sopenharmony_ci		if (unlikely(inode_unhashed(old))) {
121162306a36Sopenharmony_ci			iput(old);
121262306a36Sopenharmony_ci			goto again;
121362306a36Sopenharmony_ci		}
121462306a36Sopenharmony_ci		return old;
121562306a36Sopenharmony_ci	}
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	if (set && unlikely(set(inode, data))) {
121862306a36Sopenharmony_ci		inode = NULL;
121962306a36Sopenharmony_ci		goto unlock;
122062306a36Sopenharmony_ci	}
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	/*
122362306a36Sopenharmony_ci	 * Return the locked inode with I_NEW set, the
122462306a36Sopenharmony_ci	 * caller is responsible for filling in the contents
122562306a36Sopenharmony_ci	 */
122662306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
122762306a36Sopenharmony_ci	inode->i_state |= I_NEW;
122862306a36Sopenharmony_ci	hlist_add_head_rcu(&inode->i_hash, head);
122962306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
123062306a36Sopenharmony_ci
123162306a36Sopenharmony_ci	/*
123262306a36Sopenharmony_ci	 * Add inode to the sb list if it's not already. It has I_NEW at this
123362306a36Sopenharmony_ci	 * point, so it should be safe to test i_sb_list locklessly.
123462306a36Sopenharmony_ci	 */
123562306a36Sopenharmony_ci	if (list_empty(&inode->i_sb_list))
123662306a36Sopenharmony_ci		inode_sb_list_add(inode);
123762306a36Sopenharmony_ciunlock:
123862306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci	return inode;
124162306a36Sopenharmony_ci}
124262306a36Sopenharmony_ciEXPORT_SYMBOL(inode_insert5);
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci/**
124562306a36Sopenharmony_ci * iget5_locked - obtain an inode from a mounted file system
124662306a36Sopenharmony_ci * @sb:		super block of file system
124762306a36Sopenharmony_ci * @hashval:	hash value (usually inode number) to get
124862306a36Sopenharmony_ci * @test:	callback used for comparisons between inodes
124962306a36Sopenharmony_ci * @set:	callback used to initialize a new struct inode
125062306a36Sopenharmony_ci * @data:	opaque data pointer to pass to @test and @set
125162306a36Sopenharmony_ci *
125262306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
125362306a36Sopenharmony_ci * and if present it is return it with an increased reference count. This is
125462306a36Sopenharmony_ci * a generalized version of iget_locked() for file systems where the inode
125562306a36Sopenharmony_ci * number is not sufficient for unique identification of an inode.
125662306a36Sopenharmony_ci *
125762306a36Sopenharmony_ci * If the inode is not in cache, allocate a new inode and return it locked,
125862306a36Sopenharmony_ci * hashed, and with the I_NEW flag set. The file system gets to fill it in
125962306a36Sopenharmony_ci * before unlocking it via unlock_new_inode().
126062306a36Sopenharmony_ci *
126162306a36Sopenharmony_ci * Note both @test and @set are called with the inode_hash_lock held, so can't
126262306a36Sopenharmony_ci * sleep.
126362306a36Sopenharmony_ci */
126462306a36Sopenharmony_cistruct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
126562306a36Sopenharmony_ci		int (*test)(struct inode *, void *),
126662306a36Sopenharmony_ci		int (*set)(struct inode *, void *), void *data)
126762306a36Sopenharmony_ci{
126862306a36Sopenharmony_ci	struct inode *inode = ilookup5(sb, hashval, test, data);
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	if (!inode) {
127162306a36Sopenharmony_ci		struct inode *new = alloc_inode(sb);
127262306a36Sopenharmony_ci
127362306a36Sopenharmony_ci		if (new) {
127462306a36Sopenharmony_ci			new->i_state = 0;
127562306a36Sopenharmony_ci			inode = inode_insert5(new, hashval, test, set, data);
127662306a36Sopenharmony_ci			if (unlikely(inode != new))
127762306a36Sopenharmony_ci				destroy_inode(new);
127862306a36Sopenharmony_ci		}
127962306a36Sopenharmony_ci	}
128062306a36Sopenharmony_ci	return inode;
128162306a36Sopenharmony_ci}
128262306a36Sopenharmony_ciEXPORT_SYMBOL(iget5_locked);
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci/**
128562306a36Sopenharmony_ci * iget_locked - obtain an inode from a mounted file system
128662306a36Sopenharmony_ci * @sb:		super block of file system
128762306a36Sopenharmony_ci * @ino:	inode number to get
128862306a36Sopenharmony_ci *
128962306a36Sopenharmony_ci * Search for the inode specified by @ino in the inode cache and if present
129062306a36Sopenharmony_ci * return it with an increased reference count. This is for file systems
129162306a36Sopenharmony_ci * where the inode number is sufficient for unique identification of an inode.
129262306a36Sopenharmony_ci *
129362306a36Sopenharmony_ci * If the inode is not in cache, allocate a new inode and return it locked,
129462306a36Sopenharmony_ci * hashed, and with the I_NEW flag set.  The file system gets to fill it in
129562306a36Sopenharmony_ci * before unlocking it via unlock_new_inode().
129662306a36Sopenharmony_ci */
129762306a36Sopenharmony_cistruct inode *iget_locked(struct super_block *sb, unsigned long ino)
129862306a36Sopenharmony_ci{
129962306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
130062306a36Sopenharmony_ci	struct inode *inode;
130162306a36Sopenharmony_ciagain:
130262306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
130362306a36Sopenharmony_ci	inode = find_inode_fast(sb, head, ino);
130462306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
130562306a36Sopenharmony_ci	if (inode) {
130662306a36Sopenharmony_ci		if (IS_ERR(inode))
130762306a36Sopenharmony_ci			return NULL;
130862306a36Sopenharmony_ci		wait_on_inode(inode);
130962306a36Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
131062306a36Sopenharmony_ci			iput(inode);
131162306a36Sopenharmony_ci			goto again;
131262306a36Sopenharmony_ci		}
131362306a36Sopenharmony_ci		return inode;
131462306a36Sopenharmony_ci	}
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci	inode = alloc_inode(sb);
131762306a36Sopenharmony_ci	if (inode) {
131862306a36Sopenharmony_ci		struct inode *old;
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci		spin_lock(&inode_hash_lock);
132162306a36Sopenharmony_ci		/* We released the lock, so.. */
132262306a36Sopenharmony_ci		old = find_inode_fast(sb, head, ino);
132362306a36Sopenharmony_ci		if (!old) {
132462306a36Sopenharmony_ci			inode->i_ino = ino;
132562306a36Sopenharmony_ci			spin_lock(&inode->i_lock);
132662306a36Sopenharmony_ci			inode->i_state = I_NEW;
132762306a36Sopenharmony_ci			hlist_add_head_rcu(&inode->i_hash, head);
132862306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
132962306a36Sopenharmony_ci			inode_sb_list_add(inode);
133062306a36Sopenharmony_ci			spin_unlock(&inode_hash_lock);
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_ci			/* Return the locked inode with I_NEW set, the
133362306a36Sopenharmony_ci			 * caller is responsible for filling in the contents
133462306a36Sopenharmony_ci			 */
133562306a36Sopenharmony_ci			return inode;
133662306a36Sopenharmony_ci		}
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci		/*
133962306a36Sopenharmony_ci		 * Uhhuh, somebody else created the same inode under
134062306a36Sopenharmony_ci		 * us. Use the old inode instead of the one we just
134162306a36Sopenharmony_ci		 * allocated.
134262306a36Sopenharmony_ci		 */
134362306a36Sopenharmony_ci		spin_unlock(&inode_hash_lock);
134462306a36Sopenharmony_ci		destroy_inode(inode);
134562306a36Sopenharmony_ci		if (IS_ERR(old))
134662306a36Sopenharmony_ci			return NULL;
134762306a36Sopenharmony_ci		inode = old;
134862306a36Sopenharmony_ci		wait_on_inode(inode);
134962306a36Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
135062306a36Sopenharmony_ci			iput(inode);
135162306a36Sopenharmony_ci			goto again;
135262306a36Sopenharmony_ci		}
135362306a36Sopenharmony_ci	}
135462306a36Sopenharmony_ci	return inode;
135562306a36Sopenharmony_ci}
135662306a36Sopenharmony_ciEXPORT_SYMBOL(iget_locked);
135762306a36Sopenharmony_ci
135862306a36Sopenharmony_ci/*
135962306a36Sopenharmony_ci * search the inode cache for a matching inode number.
136062306a36Sopenharmony_ci * If we find one, then the inode number we are trying to
136162306a36Sopenharmony_ci * allocate is not unique and so we should not use it.
136262306a36Sopenharmony_ci *
136362306a36Sopenharmony_ci * Returns 1 if the inode number is unique, 0 if it is not.
136462306a36Sopenharmony_ci */
136562306a36Sopenharmony_cistatic int test_inode_iunique(struct super_block *sb, unsigned long ino)
136662306a36Sopenharmony_ci{
136762306a36Sopenharmony_ci	struct hlist_head *b = inode_hashtable + hash(sb, ino);
136862306a36Sopenharmony_ci	struct inode *inode;
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_ci	hlist_for_each_entry_rcu(inode, b, i_hash) {
137162306a36Sopenharmony_ci		if (inode->i_ino == ino && inode->i_sb == sb)
137262306a36Sopenharmony_ci			return 0;
137362306a36Sopenharmony_ci	}
137462306a36Sopenharmony_ci	return 1;
137562306a36Sopenharmony_ci}
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_ci/**
137862306a36Sopenharmony_ci *	iunique - get a unique inode number
137962306a36Sopenharmony_ci *	@sb: superblock
138062306a36Sopenharmony_ci *	@max_reserved: highest reserved inode number
138162306a36Sopenharmony_ci *
138262306a36Sopenharmony_ci *	Obtain an inode number that is unique on the system for a given
138362306a36Sopenharmony_ci *	superblock. This is used by file systems that have no natural
138462306a36Sopenharmony_ci *	permanent inode numbering system. An inode number is returned that
138562306a36Sopenharmony_ci *	is higher than the reserved limit but unique.
138662306a36Sopenharmony_ci *
138762306a36Sopenharmony_ci *	BUGS:
138862306a36Sopenharmony_ci *	With a large number of inodes live on the file system this function
138962306a36Sopenharmony_ci *	currently becomes quite slow.
139062306a36Sopenharmony_ci */
139162306a36Sopenharmony_ciino_t iunique(struct super_block *sb, ino_t max_reserved)
139262306a36Sopenharmony_ci{
139362306a36Sopenharmony_ci	/*
139462306a36Sopenharmony_ci	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
139562306a36Sopenharmony_ci	 * error if st_ino won't fit in target struct field. Use 32bit counter
139662306a36Sopenharmony_ci	 * here to attempt to avoid that.
139762306a36Sopenharmony_ci	 */
139862306a36Sopenharmony_ci	static DEFINE_SPINLOCK(iunique_lock);
139962306a36Sopenharmony_ci	static unsigned int counter;
140062306a36Sopenharmony_ci	ino_t res;
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci	rcu_read_lock();
140362306a36Sopenharmony_ci	spin_lock(&iunique_lock);
140462306a36Sopenharmony_ci	do {
140562306a36Sopenharmony_ci		if (counter <= max_reserved)
140662306a36Sopenharmony_ci			counter = max_reserved + 1;
140762306a36Sopenharmony_ci		res = counter++;
140862306a36Sopenharmony_ci	} while (!test_inode_iunique(sb, res));
140962306a36Sopenharmony_ci	spin_unlock(&iunique_lock);
141062306a36Sopenharmony_ci	rcu_read_unlock();
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci	return res;
141362306a36Sopenharmony_ci}
141462306a36Sopenharmony_ciEXPORT_SYMBOL(iunique);
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_cistruct inode *igrab(struct inode *inode)
141762306a36Sopenharmony_ci{
141862306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
141962306a36Sopenharmony_ci	if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
142062306a36Sopenharmony_ci		__iget(inode);
142162306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
142262306a36Sopenharmony_ci	} else {
142362306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
142462306a36Sopenharmony_ci		/*
142562306a36Sopenharmony_ci		 * Handle the case where s_op->clear_inode is not been
142662306a36Sopenharmony_ci		 * called yet, and somebody is calling igrab
142762306a36Sopenharmony_ci		 * while the inode is getting freed.
142862306a36Sopenharmony_ci		 */
142962306a36Sopenharmony_ci		inode = NULL;
143062306a36Sopenharmony_ci	}
143162306a36Sopenharmony_ci	return inode;
143262306a36Sopenharmony_ci}
143362306a36Sopenharmony_ciEXPORT_SYMBOL(igrab);
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci/**
143662306a36Sopenharmony_ci * ilookup5_nowait - search for an inode in the inode cache
143762306a36Sopenharmony_ci * @sb:		super block of file system to search
143862306a36Sopenharmony_ci * @hashval:	hash value (usually inode number) to search for
143962306a36Sopenharmony_ci * @test:	callback used for comparisons between inodes
144062306a36Sopenharmony_ci * @data:	opaque data pointer to pass to @test
144162306a36Sopenharmony_ci *
144262306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache.
144362306a36Sopenharmony_ci * If the inode is in the cache, the inode is returned with an incremented
144462306a36Sopenharmony_ci * reference count.
144562306a36Sopenharmony_ci *
144662306a36Sopenharmony_ci * Note: I_NEW is not waited upon so you have to be very careful what you do
144762306a36Sopenharmony_ci * with the returned inode.  You probably should be using ilookup5() instead.
144862306a36Sopenharmony_ci *
144962306a36Sopenharmony_ci * Note2: @test is called with the inode_hash_lock held, so can't sleep.
145062306a36Sopenharmony_ci */
145162306a36Sopenharmony_cistruct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
145262306a36Sopenharmony_ci		int (*test)(struct inode *, void *), void *data)
145362306a36Sopenharmony_ci{
145462306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
145562306a36Sopenharmony_ci	struct inode *inode;
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
145862306a36Sopenharmony_ci	inode = find_inode(sb, head, test, data);
145962306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	return IS_ERR(inode) ? NULL : inode;
146262306a36Sopenharmony_ci}
146362306a36Sopenharmony_ciEXPORT_SYMBOL(ilookup5_nowait);
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci/**
146662306a36Sopenharmony_ci * ilookup5 - search for an inode in the inode cache
146762306a36Sopenharmony_ci * @sb:		super block of file system to search
146862306a36Sopenharmony_ci * @hashval:	hash value (usually inode number) to search for
146962306a36Sopenharmony_ci * @test:	callback used for comparisons between inodes
147062306a36Sopenharmony_ci * @data:	opaque data pointer to pass to @test
147162306a36Sopenharmony_ci *
147262306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
147362306a36Sopenharmony_ci * and if the inode is in the cache, return the inode with an incremented
147462306a36Sopenharmony_ci * reference count.  Waits on I_NEW before returning the inode.
147562306a36Sopenharmony_ci * returned with an incremented reference count.
147662306a36Sopenharmony_ci *
147762306a36Sopenharmony_ci * This is a generalized version of ilookup() for file systems where the
147862306a36Sopenharmony_ci * inode number is not sufficient for unique identification of an inode.
147962306a36Sopenharmony_ci *
148062306a36Sopenharmony_ci * Note: @test is called with the inode_hash_lock held, so can't sleep.
148162306a36Sopenharmony_ci */
148262306a36Sopenharmony_cistruct inode *ilookup5(struct super_block *sb, unsigned long hashval,
148362306a36Sopenharmony_ci		int (*test)(struct inode *, void *), void *data)
148462306a36Sopenharmony_ci{
148562306a36Sopenharmony_ci	struct inode *inode;
148662306a36Sopenharmony_ciagain:
148762306a36Sopenharmony_ci	inode = ilookup5_nowait(sb, hashval, test, data);
148862306a36Sopenharmony_ci	if (inode) {
148962306a36Sopenharmony_ci		wait_on_inode(inode);
149062306a36Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
149162306a36Sopenharmony_ci			iput(inode);
149262306a36Sopenharmony_ci			goto again;
149362306a36Sopenharmony_ci		}
149462306a36Sopenharmony_ci	}
149562306a36Sopenharmony_ci	return inode;
149662306a36Sopenharmony_ci}
149762306a36Sopenharmony_ciEXPORT_SYMBOL(ilookup5);
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci/**
150062306a36Sopenharmony_ci * ilookup - search for an inode in the inode cache
150162306a36Sopenharmony_ci * @sb:		super block of file system to search
150262306a36Sopenharmony_ci * @ino:	inode number to search for
150362306a36Sopenharmony_ci *
150462306a36Sopenharmony_ci * Search for the inode @ino in the inode cache, and if the inode is in the
150562306a36Sopenharmony_ci * cache, the inode is returned with an incremented reference count.
150662306a36Sopenharmony_ci */
150762306a36Sopenharmony_cistruct inode *ilookup(struct super_block *sb, unsigned long ino)
150862306a36Sopenharmony_ci{
150962306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
151062306a36Sopenharmony_ci	struct inode *inode;
151162306a36Sopenharmony_ciagain:
151262306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
151362306a36Sopenharmony_ci	inode = find_inode_fast(sb, head, ino);
151462306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ci	if (inode) {
151762306a36Sopenharmony_ci		if (IS_ERR(inode))
151862306a36Sopenharmony_ci			return NULL;
151962306a36Sopenharmony_ci		wait_on_inode(inode);
152062306a36Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
152162306a36Sopenharmony_ci			iput(inode);
152262306a36Sopenharmony_ci			goto again;
152362306a36Sopenharmony_ci		}
152462306a36Sopenharmony_ci	}
152562306a36Sopenharmony_ci	return inode;
152662306a36Sopenharmony_ci}
152762306a36Sopenharmony_ciEXPORT_SYMBOL(ilookup);
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci/**
153062306a36Sopenharmony_ci * find_inode_nowait - find an inode in the inode cache
153162306a36Sopenharmony_ci * @sb:		super block of file system to search
153262306a36Sopenharmony_ci * @hashval:	hash value (usually inode number) to search for
153362306a36Sopenharmony_ci * @match:	callback used for comparisons between inodes
153462306a36Sopenharmony_ci * @data:	opaque data pointer to pass to @match
153562306a36Sopenharmony_ci *
153662306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode
153762306a36Sopenharmony_ci * cache, where the helper function @match will return 0 if the inode
153862306a36Sopenharmony_ci * does not match, 1 if the inode does match, and -1 if the search
153962306a36Sopenharmony_ci * should be stopped.  The @match function must be responsible for
154062306a36Sopenharmony_ci * taking the i_lock spin_lock and checking i_state for an inode being
154162306a36Sopenharmony_ci * freed or being initialized, and incrementing the reference count
154262306a36Sopenharmony_ci * before returning 1.  It also must not sleep, since it is called with
154362306a36Sopenharmony_ci * the inode_hash_lock spinlock held.
154462306a36Sopenharmony_ci *
154562306a36Sopenharmony_ci * This is a even more generalized version of ilookup5() when the
154662306a36Sopenharmony_ci * function must never block --- find_inode() can block in
154762306a36Sopenharmony_ci * __wait_on_freeing_inode() --- or when the caller can not increment
154862306a36Sopenharmony_ci * the reference count because the resulting iput() might cause an
154962306a36Sopenharmony_ci * inode eviction.  The tradeoff is that the @match funtion must be
155062306a36Sopenharmony_ci * very carefully implemented.
155162306a36Sopenharmony_ci */
155262306a36Sopenharmony_cistruct inode *find_inode_nowait(struct super_block *sb,
155362306a36Sopenharmony_ci				unsigned long hashval,
155462306a36Sopenharmony_ci				int (*match)(struct inode *, unsigned long,
155562306a36Sopenharmony_ci					     void *),
155662306a36Sopenharmony_ci				void *data)
155762306a36Sopenharmony_ci{
155862306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
155962306a36Sopenharmony_ci	struct inode *inode, *ret_inode = NULL;
156062306a36Sopenharmony_ci	int mval;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
156362306a36Sopenharmony_ci	hlist_for_each_entry(inode, head, i_hash) {
156462306a36Sopenharmony_ci		if (inode->i_sb != sb)
156562306a36Sopenharmony_ci			continue;
156662306a36Sopenharmony_ci		mval = match(inode, hashval, data);
156762306a36Sopenharmony_ci		if (mval == 0)
156862306a36Sopenharmony_ci			continue;
156962306a36Sopenharmony_ci		if (mval == 1)
157062306a36Sopenharmony_ci			ret_inode = inode;
157162306a36Sopenharmony_ci		goto out;
157262306a36Sopenharmony_ci	}
157362306a36Sopenharmony_ciout:
157462306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
157562306a36Sopenharmony_ci	return ret_inode;
157662306a36Sopenharmony_ci}
157762306a36Sopenharmony_ciEXPORT_SYMBOL(find_inode_nowait);
157862306a36Sopenharmony_ci
157962306a36Sopenharmony_ci/**
158062306a36Sopenharmony_ci * find_inode_rcu - find an inode in the inode cache
158162306a36Sopenharmony_ci * @sb:		Super block of file system to search
158262306a36Sopenharmony_ci * @hashval:	Key to hash
158362306a36Sopenharmony_ci * @test:	Function to test match on an inode
158462306a36Sopenharmony_ci * @data:	Data for test function
158562306a36Sopenharmony_ci *
158662306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
158762306a36Sopenharmony_ci * where the helper function @test will return 0 if the inode does not match
158862306a36Sopenharmony_ci * and 1 if it does.  The @test function must be responsible for taking the
158962306a36Sopenharmony_ci * i_lock spin_lock and checking i_state for an inode being freed or being
159062306a36Sopenharmony_ci * initialized.
159162306a36Sopenharmony_ci *
159262306a36Sopenharmony_ci * If successful, this will return the inode for which the @test function
159362306a36Sopenharmony_ci * returned 1 and NULL otherwise.
159462306a36Sopenharmony_ci *
159562306a36Sopenharmony_ci * The @test function is not permitted to take a ref on any inode presented.
159662306a36Sopenharmony_ci * It is also not permitted to sleep.
159762306a36Sopenharmony_ci *
159862306a36Sopenharmony_ci * The caller must hold the RCU read lock.
159962306a36Sopenharmony_ci */
160062306a36Sopenharmony_cistruct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
160162306a36Sopenharmony_ci			     int (*test)(struct inode *, void *), void *data)
160262306a36Sopenharmony_ci{
160362306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
160462306a36Sopenharmony_ci	struct inode *inode;
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
160762306a36Sopenharmony_ci			 "suspicious find_inode_rcu() usage");
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ci	hlist_for_each_entry_rcu(inode, head, i_hash) {
161062306a36Sopenharmony_ci		if (inode->i_sb == sb &&
161162306a36Sopenharmony_ci		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
161262306a36Sopenharmony_ci		    test(inode, data))
161362306a36Sopenharmony_ci			return inode;
161462306a36Sopenharmony_ci	}
161562306a36Sopenharmony_ci	return NULL;
161662306a36Sopenharmony_ci}
161762306a36Sopenharmony_ciEXPORT_SYMBOL(find_inode_rcu);
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci/**
162062306a36Sopenharmony_ci * find_inode_by_ino_rcu - Find an inode in the inode cache
162162306a36Sopenharmony_ci * @sb:		Super block of file system to search
162262306a36Sopenharmony_ci * @ino:	The inode number to match
162362306a36Sopenharmony_ci *
162462306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
162562306a36Sopenharmony_ci * where the helper function @test will return 0 if the inode does not match
162662306a36Sopenharmony_ci * and 1 if it does.  The @test function must be responsible for taking the
162762306a36Sopenharmony_ci * i_lock spin_lock and checking i_state for an inode being freed or being
162862306a36Sopenharmony_ci * initialized.
162962306a36Sopenharmony_ci *
163062306a36Sopenharmony_ci * If successful, this will return the inode for which the @test function
163162306a36Sopenharmony_ci * returned 1 and NULL otherwise.
163262306a36Sopenharmony_ci *
163362306a36Sopenharmony_ci * The @test function is not permitted to take a ref on any inode presented.
163462306a36Sopenharmony_ci * It is also not permitted to sleep.
163562306a36Sopenharmony_ci *
163662306a36Sopenharmony_ci * The caller must hold the RCU read lock.
163762306a36Sopenharmony_ci */
163862306a36Sopenharmony_cistruct inode *find_inode_by_ino_rcu(struct super_block *sb,
163962306a36Sopenharmony_ci				    unsigned long ino)
164062306a36Sopenharmony_ci{
164162306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
164262306a36Sopenharmony_ci	struct inode *inode;
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_ci	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
164562306a36Sopenharmony_ci			 "suspicious find_inode_by_ino_rcu() usage");
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_ci	hlist_for_each_entry_rcu(inode, head, i_hash) {
164862306a36Sopenharmony_ci		if (inode->i_ino == ino &&
164962306a36Sopenharmony_ci		    inode->i_sb == sb &&
165062306a36Sopenharmony_ci		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
165162306a36Sopenharmony_ci		    return inode;
165262306a36Sopenharmony_ci	}
165362306a36Sopenharmony_ci	return NULL;
165462306a36Sopenharmony_ci}
165562306a36Sopenharmony_ciEXPORT_SYMBOL(find_inode_by_ino_rcu);
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ciint insert_inode_locked(struct inode *inode)
165862306a36Sopenharmony_ci{
165962306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
166062306a36Sopenharmony_ci	ino_t ino = inode->i_ino;
166162306a36Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
166262306a36Sopenharmony_ci
166362306a36Sopenharmony_ci	while (1) {
166462306a36Sopenharmony_ci		struct inode *old = NULL;
166562306a36Sopenharmony_ci		spin_lock(&inode_hash_lock);
166662306a36Sopenharmony_ci		hlist_for_each_entry(old, head, i_hash) {
166762306a36Sopenharmony_ci			if (old->i_ino != ino)
166862306a36Sopenharmony_ci				continue;
166962306a36Sopenharmony_ci			if (old->i_sb != sb)
167062306a36Sopenharmony_ci				continue;
167162306a36Sopenharmony_ci			spin_lock(&old->i_lock);
167262306a36Sopenharmony_ci			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
167362306a36Sopenharmony_ci				spin_unlock(&old->i_lock);
167462306a36Sopenharmony_ci				continue;
167562306a36Sopenharmony_ci			}
167662306a36Sopenharmony_ci			break;
167762306a36Sopenharmony_ci		}
167862306a36Sopenharmony_ci		if (likely(!old)) {
167962306a36Sopenharmony_ci			spin_lock(&inode->i_lock);
168062306a36Sopenharmony_ci			inode->i_state |= I_NEW | I_CREATING;
168162306a36Sopenharmony_ci			hlist_add_head_rcu(&inode->i_hash, head);
168262306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
168362306a36Sopenharmony_ci			spin_unlock(&inode_hash_lock);
168462306a36Sopenharmony_ci			return 0;
168562306a36Sopenharmony_ci		}
168662306a36Sopenharmony_ci		if (unlikely(old->i_state & I_CREATING)) {
168762306a36Sopenharmony_ci			spin_unlock(&old->i_lock);
168862306a36Sopenharmony_ci			spin_unlock(&inode_hash_lock);
168962306a36Sopenharmony_ci			return -EBUSY;
169062306a36Sopenharmony_ci		}
169162306a36Sopenharmony_ci		__iget(old);
169262306a36Sopenharmony_ci		spin_unlock(&old->i_lock);
169362306a36Sopenharmony_ci		spin_unlock(&inode_hash_lock);
169462306a36Sopenharmony_ci		wait_on_inode(old);
169562306a36Sopenharmony_ci		if (unlikely(!inode_unhashed(old))) {
169662306a36Sopenharmony_ci			iput(old);
169762306a36Sopenharmony_ci			return -EBUSY;
169862306a36Sopenharmony_ci		}
169962306a36Sopenharmony_ci		iput(old);
170062306a36Sopenharmony_ci	}
170162306a36Sopenharmony_ci}
170262306a36Sopenharmony_ciEXPORT_SYMBOL(insert_inode_locked);
170362306a36Sopenharmony_ci
170462306a36Sopenharmony_ciint insert_inode_locked4(struct inode *inode, unsigned long hashval,
170562306a36Sopenharmony_ci		int (*test)(struct inode *, void *), void *data)
170662306a36Sopenharmony_ci{
170762306a36Sopenharmony_ci	struct inode *old;
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_ci	inode->i_state |= I_CREATING;
171062306a36Sopenharmony_ci	old = inode_insert5(inode, hashval, test, NULL, data);
171162306a36Sopenharmony_ci
171262306a36Sopenharmony_ci	if (old != inode) {
171362306a36Sopenharmony_ci		iput(old);
171462306a36Sopenharmony_ci		return -EBUSY;
171562306a36Sopenharmony_ci	}
171662306a36Sopenharmony_ci	return 0;
171762306a36Sopenharmony_ci}
171862306a36Sopenharmony_ciEXPORT_SYMBOL(insert_inode_locked4);
171962306a36Sopenharmony_ci
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_ciint generic_delete_inode(struct inode *inode)
172262306a36Sopenharmony_ci{
172362306a36Sopenharmony_ci	return 1;
172462306a36Sopenharmony_ci}
172562306a36Sopenharmony_ciEXPORT_SYMBOL(generic_delete_inode);
172662306a36Sopenharmony_ci
172762306a36Sopenharmony_ci/*
172862306a36Sopenharmony_ci * Called when we're dropping the last reference
172962306a36Sopenharmony_ci * to an inode.
173062306a36Sopenharmony_ci *
173162306a36Sopenharmony_ci * Call the FS "drop_inode()" function, defaulting to
173262306a36Sopenharmony_ci * the legacy UNIX filesystem behaviour.  If it tells
173362306a36Sopenharmony_ci * us to evict inode, do so.  Otherwise, retain inode
173462306a36Sopenharmony_ci * in cache if fs is alive, sync and evict if fs is
173562306a36Sopenharmony_ci * shutting down.
173662306a36Sopenharmony_ci */
173762306a36Sopenharmony_cistatic void iput_final(struct inode *inode)
173862306a36Sopenharmony_ci{
173962306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
174062306a36Sopenharmony_ci	const struct super_operations *op = inode->i_sb->s_op;
174162306a36Sopenharmony_ci	unsigned long state;
174262306a36Sopenharmony_ci	int drop;
174362306a36Sopenharmony_ci
174462306a36Sopenharmony_ci	WARN_ON(inode->i_state & I_NEW);
174562306a36Sopenharmony_ci
174662306a36Sopenharmony_ci	if (op->drop_inode)
174762306a36Sopenharmony_ci		drop = op->drop_inode(inode);
174862306a36Sopenharmony_ci	else
174962306a36Sopenharmony_ci		drop = generic_drop_inode(inode);
175062306a36Sopenharmony_ci
175162306a36Sopenharmony_ci	if (!drop &&
175262306a36Sopenharmony_ci	    !(inode->i_state & I_DONTCACHE) &&
175362306a36Sopenharmony_ci	    (sb->s_flags & SB_ACTIVE)) {
175462306a36Sopenharmony_ci		__inode_add_lru(inode, true);
175562306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
175662306a36Sopenharmony_ci		return;
175762306a36Sopenharmony_ci	}
175862306a36Sopenharmony_ci
175962306a36Sopenharmony_ci	state = inode->i_state;
176062306a36Sopenharmony_ci	if (!drop) {
176162306a36Sopenharmony_ci		WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
176262306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
176362306a36Sopenharmony_ci
176462306a36Sopenharmony_ci		write_inode_now(inode, 1);
176562306a36Sopenharmony_ci
176662306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
176762306a36Sopenharmony_ci		state = inode->i_state;
176862306a36Sopenharmony_ci		WARN_ON(state & I_NEW);
176962306a36Sopenharmony_ci		state &= ~I_WILL_FREE;
177062306a36Sopenharmony_ci	}
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_ci	WRITE_ONCE(inode->i_state, state | I_FREEING);
177362306a36Sopenharmony_ci	if (!list_empty(&inode->i_lru))
177462306a36Sopenharmony_ci		inode_lru_list_del(inode);
177562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ci	evict(inode);
177862306a36Sopenharmony_ci}
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci/**
178162306a36Sopenharmony_ci *	iput	- put an inode
178262306a36Sopenharmony_ci *	@inode: inode to put
178362306a36Sopenharmony_ci *
178462306a36Sopenharmony_ci *	Puts an inode, dropping its usage count. If the inode use count hits
178562306a36Sopenharmony_ci *	zero, the inode is then freed and may also be destroyed.
178662306a36Sopenharmony_ci *
178762306a36Sopenharmony_ci *	Consequently, iput() can sleep.
178862306a36Sopenharmony_ci */
178962306a36Sopenharmony_civoid iput(struct inode *inode)
179062306a36Sopenharmony_ci{
179162306a36Sopenharmony_ci	if (!inode)
179262306a36Sopenharmony_ci		return;
179362306a36Sopenharmony_ci	BUG_ON(inode->i_state & I_CLEAR);
179462306a36Sopenharmony_ciretry:
179562306a36Sopenharmony_ci	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
179662306a36Sopenharmony_ci		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
179762306a36Sopenharmony_ci			atomic_inc(&inode->i_count);
179862306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
179962306a36Sopenharmony_ci			trace_writeback_lazytime_iput(inode);
180062306a36Sopenharmony_ci			mark_inode_dirty_sync(inode);
180162306a36Sopenharmony_ci			goto retry;
180262306a36Sopenharmony_ci		}
180362306a36Sopenharmony_ci		iput_final(inode);
180462306a36Sopenharmony_ci	}
180562306a36Sopenharmony_ci}
180662306a36Sopenharmony_ciEXPORT_SYMBOL(iput);
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci#ifdef CONFIG_BLOCK
180962306a36Sopenharmony_ci/**
181062306a36Sopenharmony_ci *	bmap	- find a block number in a file
181162306a36Sopenharmony_ci *	@inode:  inode owning the block number being requested
181262306a36Sopenharmony_ci *	@block: pointer containing the block to find
181362306a36Sopenharmony_ci *
181462306a36Sopenharmony_ci *	Replaces the value in ``*block`` with the block number on the device holding
181562306a36Sopenharmony_ci *	corresponding to the requested block number in the file.
181662306a36Sopenharmony_ci *	That is, asked for block 4 of inode 1 the function will replace the
181762306a36Sopenharmony_ci *	4 in ``*block``, with disk block relative to the disk start that holds that
181862306a36Sopenharmony_ci *	block of the file.
181962306a36Sopenharmony_ci *
182062306a36Sopenharmony_ci *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
182162306a36Sopenharmony_ci *	hole, returns 0 and ``*block`` is also set to 0.
182262306a36Sopenharmony_ci */
182362306a36Sopenharmony_ciint bmap(struct inode *inode, sector_t *block)
182462306a36Sopenharmony_ci{
182562306a36Sopenharmony_ci	if (!inode->i_mapping->a_ops->bmap)
182662306a36Sopenharmony_ci		return -EINVAL;
182762306a36Sopenharmony_ci
182862306a36Sopenharmony_ci	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
182962306a36Sopenharmony_ci	return 0;
183062306a36Sopenharmony_ci}
183162306a36Sopenharmony_ciEXPORT_SYMBOL(bmap);
183262306a36Sopenharmony_ci#endif
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci/*
183562306a36Sopenharmony_ci * With relative atime, only update atime if the previous atime is
183662306a36Sopenharmony_ci * earlier than or equal to either the ctime or mtime,
183762306a36Sopenharmony_ci * or if at least a day has passed since the last atime update.
183862306a36Sopenharmony_ci */
183962306a36Sopenharmony_cistatic int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
184062306a36Sopenharmony_ci			     struct timespec64 now)
184162306a36Sopenharmony_ci{
184262306a36Sopenharmony_ci	struct timespec64 ctime;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci	if (!(mnt->mnt_flags & MNT_RELATIME))
184562306a36Sopenharmony_ci		return 1;
184662306a36Sopenharmony_ci	/*
184762306a36Sopenharmony_ci	 * Is mtime younger than or equal to atime? If yes, update atime:
184862306a36Sopenharmony_ci	 */
184962306a36Sopenharmony_ci	if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
185062306a36Sopenharmony_ci		return 1;
185162306a36Sopenharmony_ci	/*
185262306a36Sopenharmony_ci	 * Is ctime younger than or equal to atime? If yes, update atime:
185362306a36Sopenharmony_ci	 */
185462306a36Sopenharmony_ci	ctime = inode_get_ctime(inode);
185562306a36Sopenharmony_ci	if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
185662306a36Sopenharmony_ci		return 1;
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	/*
185962306a36Sopenharmony_ci	 * Is the previous atime value older than a day? If yes,
186062306a36Sopenharmony_ci	 * update atime:
186162306a36Sopenharmony_ci	 */
186262306a36Sopenharmony_ci	if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
186362306a36Sopenharmony_ci		return 1;
186462306a36Sopenharmony_ci	/*
186562306a36Sopenharmony_ci	 * Good, we can skip the atime update:
186662306a36Sopenharmony_ci	 */
186762306a36Sopenharmony_ci	return 0;
186862306a36Sopenharmony_ci}
186962306a36Sopenharmony_ci
187062306a36Sopenharmony_ci/**
187162306a36Sopenharmony_ci * inode_update_timestamps - update the timestamps on the inode
187262306a36Sopenharmony_ci * @inode: inode to be updated
187362306a36Sopenharmony_ci * @flags: S_* flags that needed to be updated
187462306a36Sopenharmony_ci *
187562306a36Sopenharmony_ci * The update_time function is called when an inode's timestamps need to be
187662306a36Sopenharmony_ci * updated for a read or write operation. This function handles updating the
187762306a36Sopenharmony_ci * actual timestamps. It's up to the caller to ensure that the inode is marked
187862306a36Sopenharmony_ci * dirty appropriately.
187962306a36Sopenharmony_ci *
188062306a36Sopenharmony_ci * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
188162306a36Sopenharmony_ci * attempt to update all three of them. S_ATIME updates can be handled
188262306a36Sopenharmony_ci * independently of the rest.
188362306a36Sopenharmony_ci *
188462306a36Sopenharmony_ci * Returns a set of S_* flags indicating which values changed.
188562306a36Sopenharmony_ci */
188662306a36Sopenharmony_ciint inode_update_timestamps(struct inode *inode, int flags)
188762306a36Sopenharmony_ci{
188862306a36Sopenharmony_ci	int updated = 0;
188962306a36Sopenharmony_ci	struct timespec64 now;
189062306a36Sopenharmony_ci
189162306a36Sopenharmony_ci	if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
189262306a36Sopenharmony_ci		struct timespec64 ctime = inode_get_ctime(inode);
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_ci		now = inode_set_ctime_current(inode);
189562306a36Sopenharmony_ci		if (!timespec64_equal(&now, &ctime))
189662306a36Sopenharmony_ci			updated |= S_CTIME;
189762306a36Sopenharmony_ci		if (!timespec64_equal(&now, &inode->i_mtime)) {
189862306a36Sopenharmony_ci			inode->i_mtime = now;
189962306a36Sopenharmony_ci			updated |= S_MTIME;
190062306a36Sopenharmony_ci		}
190162306a36Sopenharmony_ci		if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
190262306a36Sopenharmony_ci			updated |= S_VERSION;
190362306a36Sopenharmony_ci	} else {
190462306a36Sopenharmony_ci		now = current_time(inode);
190562306a36Sopenharmony_ci	}
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci	if (flags & S_ATIME) {
190862306a36Sopenharmony_ci		if (!timespec64_equal(&now, &inode->i_atime)) {
190962306a36Sopenharmony_ci			inode->i_atime = now;
191062306a36Sopenharmony_ci			updated |= S_ATIME;
191162306a36Sopenharmony_ci		}
191262306a36Sopenharmony_ci	}
191362306a36Sopenharmony_ci	return updated;
191462306a36Sopenharmony_ci}
191562306a36Sopenharmony_ciEXPORT_SYMBOL(inode_update_timestamps);
191662306a36Sopenharmony_ci
191762306a36Sopenharmony_ci/**
191862306a36Sopenharmony_ci * generic_update_time - update the timestamps on the inode
191962306a36Sopenharmony_ci * @inode: inode to be updated
192062306a36Sopenharmony_ci * @flags: S_* flags that needed to be updated
192162306a36Sopenharmony_ci *
192262306a36Sopenharmony_ci * The update_time function is called when an inode's timestamps need to be
192362306a36Sopenharmony_ci * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
192462306a36Sopenharmony_ci * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
192562306a36Sopenharmony_ci * updates can be handled done independently of the rest.
192662306a36Sopenharmony_ci *
192762306a36Sopenharmony_ci * Returns a S_* mask indicating which fields were updated.
192862306a36Sopenharmony_ci */
192962306a36Sopenharmony_ciint generic_update_time(struct inode *inode, int flags)
193062306a36Sopenharmony_ci{
193162306a36Sopenharmony_ci	int updated = inode_update_timestamps(inode, flags);
193262306a36Sopenharmony_ci	int dirty_flags = 0;
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci	if (updated & (S_ATIME|S_MTIME|S_CTIME))
193562306a36Sopenharmony_ci		dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
193662306a36Sopenharmony_ci	if (updated & S_VERSION)
193762306a36Sopenharmony_ci		dirty_flags |= I_DIRTY_SYNC;
193862306a36Sopenharmony_ci	__mark_inode_dirty(inode, dirty_flags);
193962306a36Sopenharmony_ci	return updated;
194062306a36Sopenharmony_ci}
194162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_update_time);
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ci/*
194462306a36Sopenharmony_ci * This does the actual work of updating an inodes time or version.  Must have
194562306a36Sopenharmony_ci * had called mnt_want_write() before calling this.
194662306a36Sopenharmony_ci */
194762306a36Sopenharmony_ciint inode_update_time(struct inode *inode, int flags)
194862306a36Sopenharmony_ci{
194962306a36Sopenharmony_ci	if (inode->i_op->update_time)
195062306a36Sopenharmony_ci		return inode->i_op->update_time(inode, flags);
195162306a36Sopenharmony_ci	generic_update_time(inode, flags);
195262306a36Sopenharmony_ci	return 0;
195362306a36Sopenharmony_ci}
195462306a36Sopenharmony_ciEXPORT_SYMBOL(inode_update_time);
195562306a36Sopenharmony_ci
195662306a36Sopenharmony_ci/**
195762306a36Sopenharmony_ci *	atime_needs_update	-	update the access time
195862306a36Sopenharmony_ci *	@path: the &struct path to update
195962306a36Sopenharmony_ci *	@inode: inode to update
196062306a36Sopenharmony_ci *
196162306a36Sopenharmony_ci *	Update the accessed time on an inode and mark it for writeback.
196262306a36Sopenharmony_ci *	This function automatically handles read only file systems and media,
196362306a36Sopenharmony_ci *	as well as the "noatime" flag and inode specific "noatime" markers.
196462306a36Sopenharmony_ci */
196562306a36Sopenharmony_cibool atime_needs_update(const struct path *path, struct inode *inode)
196662306a36Sopenharmony_ci{
196762306a36Sopenharmony_ci	struct vfsmount *mnt = path->mnt;
196862306a36Sopenharmony_ci	struct timespec64 now;
196962306a36Sopenharmony_ci
197062306a36Sopenharmony_ci	if (inode->i_flags & S_NOATIME)
197162306a36Sopenharmony_ci		return false;
197262306a36Sopenharmony_ci
197362306a36Sopenharmony_ci	/* Atime updates will likely cause i_uid and i_gid to be written
197462306a36Sopenharmony_ci	 * back improprely if their true value is unknown to the vfs.
197562306a36Sopenharmony_ci	 */
197662306a36Sopenharmony_ci	if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
197762306a36Sopenharmony_ci		return false;
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_ci	if (IS_NOATIME(inode))
198062306a36Sopenharmony_ci		return false;
198162306a36Sopenharmony_ci	if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
198262306a36Sopenharmony_ci		return false;
198362306a36Sopenharmony_ci
198462306a36Sopenharmony_ci	if (mnt->mnt_flags & MNT_NOATIME)
198562306a36Sopenharmony_ci		return false;
198662306a36Sopenharmony_ci	if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
198762306a36Sopenharmony_ci		return false;
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci	now = current_time(inode);
199062306a36Sopenharmony_ci
199162306a36Sopenharmony_ci	if (!relatime_need_update(mnt, inode, now))
199262306a36Sopenharmony_ci		return false;
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci	if (timespec64_equal(&inode->i_atime, &now))
199562306a36Sopenharmony_ci		return false;
199662306a36Sopenharmony_ci
199762306a36Sopenharmony_ci	return true;
199862306a36Sopenharmony_ci}
199962306a36Sopenharmony_ci
200062306a36Sopenharmony_civoid touch_atime(const struct path *path)
200162306a36Sopenharmony_ci{
200262306a36Sopenharmony_ci	struct vfsmount *mnt = path->mnt;
200362306a36Sopenharmony_ci	struct inode *inode = d_inode(path->dentry);
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	if (!atime_needs_update(path, inode))
200662306a36Sopenharmony_ci		return;
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci	if (!sb_start_write_trylock(inode->i_sb))
200962306a36Sopenharmony_ci		return;
201062306a36Sopenharmony_ci
201162306a36Sopenharmony_ci	if (__mnt_want_write(mnt) != 0)
201262306a36Sopenharmony_ci		goto skip_update;
201362306a36Sopenharmony_ci	/*
201462306a36Sopenharmony_ci	 * File systems can error out when updating inodes if they need to
201562306a36Sopenharmony_ci	 * allocate new space to modify an inode (such is the case for
201662306a36Sopenharmony_ci	 * Btrfs), but since we touch atime while walking down the path we
201762306a36Sopenharmony_ci	 * really don't care if we failed to update the atime of the file,
201862306a36Sopenharmony_ci	 * so just ignore the return value.
201962306a36Sopenharmony_ci	 * We may also fail on filesystems that have the ability to make parts
202062306a36Sopenharmony_ci	 * of the fs read only, e.g. subvolumes in Btrfs.
202162306a36Sopenharmony_ci	 */
202262306a36Sopenharmony_ci	inode_update_time(inode, S_ATIME);
202362306a36Sopenharmony_ci	__mnt_drop_write(mnt);
202462306a36Sopenharmony_ciskip_update:
202562306a36Sopenharmony_ci	sb_end_write(inode->i_sb);
202662306a36Sopenharmony_ci}
202762306a36Sopenharmony_ciEXPORT_SYMBOL(touch_atime);
202862306a36Sopenharmony_ci
202962306a36Sopenharmony_ci/*
203062306a36Sopenharmony_ci * Return mask of changes for notify_change() that need to be done as a
203162306a36Sopenharmony_ci * response to write or truncate. Return 0 if nothing has to be changed.
203262306a36Sopenharmony_ci * Negative value on error (change should be denied).
203362306a36Sopenharmony_ci */
203462306a36Sopenharmony_ciint dentry_needs_remove_privs(struct mnt_idmap *idmap,
203562306a36Sopenharmony_ci			      struct dentry *dentry)
203662306a36Sopenharmony_ci{
203762306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
203862306a36Sopenharmony_ci	int mask = 0;
203962306a36Sopenharmony_ci	int ret;
204062306a36Sopenharmony_ci
204162306a36Sopenharmony_ci	if (IS_NOSEC(inode))
204262306a36Sopenharmony_ci		return 0;
204362306a36Sopenharmony_ci
204462306a36Sopenharmony_ci	mask = setattr_should_drop_suidgid(idmap, inode);
204562306a36Sopenharmony_ci	ret = security_inode_need_killpriv(dentry);
204662306a36Sopenharmony_ci	if (ret < 0)
204762306a36Sopenharmony_ci		return ret;
204862306a36Sopenharmony_ci	if (ret)
204962306a36Sopenharmony_ci		mask |= ATTR_KILL_PRIV;
205062306a36Sopenharmony_ci	return mask;
205162306a36Sopenharmony_ci}
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_cistatic int __remove_privs(struct mnt_idmap *idmap,
205462306a36Sopenharmony_ci			  struct dentry *dentry, int kill)
205562306a36Sopenharmony_ci{
205662306a36Sopenharmony_ci	struct iattr newattrs;
205762306a36Sopenharmony_ci
205862306a36Sopenharmony_ci	newattrs.ia_valid = ATTR_FORCE | kill;
205962306a36Sopenharmony_ci	/*
206062306a36Sopenharmony_ci	 * Note we call this on write, so notify_change will not
206162306a36Sopenharmony_ci	 * encounter any conflicting delegations:
206262306a36Sopenharmony_ci	 */
206362306a36Sopenharmony_ci	return notify_change(idmap, dentry, &newattrs, NULL);
206462306a36Sopenharmony_ci}
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_cistatic int __file_remove_privs(struct file *file, unsigned int flags)
206762306a36Sopenharmony_ci{
206862306a36Sopenharmony_ci	struct dentry *dentry = file_dentry(file);
206962306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
207062306a36Sopenharmony_ci	int error = 0;
207162306a36Sopenharmony_ci	int kill;
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci	if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
207462306a36Sopenharmony_ci		return 0;
207562306a36Sopenharmony_ci
207662306a36Sopenharmony_ci	kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
207762306a36Sopenharmony_ci	if (kill < 0)
207862306a36Sopenharmony_ci		return kill;
207962306a36Sopenharmony_ci
208062306a36Sopenharmony_ci	if (kill) {
208162306a36Sopenharmony_ci		if (flags & IOCB_NOWAIT)
208262306a36Sopenharmony_ci			return -EAGAIN;
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci		error = __remove_privs(file_mnt_idmap(file), dentry, kill);
208562306a36Sopenharmony_ci	}
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci	if (!error)
208862306a36Sopenharmony_ci		inode_has_no_xattr(inode);
208962306a36Sopenharmony_ci	return error;
209062306a36Sopenharmony_ci}
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_ci/**
209362306a36Sopenharmony_ci * file_remove_privs - remove special file privileges (suid, capabilities)
209462306a36Sopenharmony_ci * @file: file to remove privileges from
209562306a36Sopenharmony_ci *
209662306a36Sopenharmony_ci * When file is modified by a write or truncation ensure that special
209762306a36Sopenharmony_ci * file privileges are removed.
209862306a36Sopenharmony_ci *
209962306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure.
210062306a36Sopenharmony_ci */
210162306a36Sopenharmony_ciint file_remove_privs(struct file *file)
210262306a36Sopenharmony_ci{
210362306a36Sopenharmony_ci	return __file_remove_privs(file, 0);
210462306a36Sopenharmony_ci}
210562306a36Sopenharmony_ciEXPORT_SYMBOL(file_remove_privs);
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_cistatic int inode_needs_update_time(struct inode *inode)
210862306a36Sopenharmony_ci{
210962306a36Sopenharmony_ci	int sync_it = 0;
211062306a36Sopenharmony_ci	struct timespec64 now = current_time(inode);
211162306a36Sopenharmony_ci	struct timespec64 ctime;
211262306a36Sopenharmony_ci
211362306a36Sopenharmony_ci	/* First try to exhaust all avenues to not sync */
211462306a36Sopenharmony_ci	if (IS_NOCMTIME(inode))
211562306a36Sopenharmony_ci		return 0;
211662306a36Sopenharmony_ci
211762306a36Sopenharmony_ci	if (!timespec64_equal(&inode->i_mtime, &now))
211862306a36Sopenharmony_ci		sync_it = S_MTIME;
211962306a36Sopenharmony_ci
212062306a36Sopenharmony_ci	ctime = inode_get_ctime(inode);
212162306a36Sopenharmony_ci	if (!timespec64_equal(&ctime, &now))
212262306a36Sopenharmony_ci		sync_it |= S_CTIME;
212362306a36Sopenharmony_ci
212462306a36Sopenharmony_ci	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
212562306a36Sopenharmony_ci		sync_it |= S_VERSION;
212662306a36Sopenharmony_ci
212762306a36Sopenharmony_ci	return sync_it;
212862306a36Sopenharmony_ci}
212962306a36Sopenharmony_ci
213062306a36Sopenharmony_cistatic int __file_update_time(struct file *file, int sync_mode)
213162306a36Sopenharmony_ci{
213262306a36Sopenharmony_ci	int ret = 0;
213362306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
213462306a36Sopenharmony_ci
213562306a36Sopenharmony_ci	/* try to update time settings */
213662306a36Sopenharmony_ci	if (!__mnt_want_write_file(file)) {
213762306a36Sopenharmony_ci		ret = inode_update_time(inode, sync_mode);
213862306a36Sopenharmony_ci		__mnt_drop_write_file(file);
213962306a36Sopenharmony_ci	}
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci	return ret;
214262306a36Sopenharmony_ci}
214362306a36Sopenharmony_ci
214462306a36Sopenharmony_ci/**
214562306a36Sopenharmony_ci * file_update_time - update mtime and ctime time
214662306a36Sopenharmony_ci * @file: file accessed
214762306a36Sopenharmony_ci *
214862306a36Sopenharmony_ci * Update the mtime and ctime members of an inode and mark the inode for
214962306a36Sopenharmony_ci * writeback. Note that this function is meant exclusively for usage in
215062306a36Sopenharmony_ci * the file write path of filesystems, and filesystems may choose to
215162306a36Sopenharmony_ci * explicitly ignore updates via this function with the _NOCMTIME inode
215262306a36Sopenharmony_ci * flag, e.g. for network filesystem where these imestamps are handled
215362306a36Sopenharmony_ci * by the server. This can return an error for file systems who need to
215462306a36Sopenharmony_ci * allocate space in order to update an inode.
215562306a36Sopenharmony_ci *
215662306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure.
215762306a36Sopenharmony_ci */
215862306a36Sopenharmony_ciint file_update_time(struct file *file)
215962306a36Sopenharmony_ci{
216062306a36Sopenharmony_ci	int ret;
216162306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
216262306a36Sopenharmony_ci
216362306a36Sopenharmony_ci	ret = inode_needs_update_time(inode);
216462306a36Sopenharmony_ci	if (ret <= 0)
216562306a36Sopenharmony_ci		return ret;
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci	return __file_update_time(file, ret);
216862306a36Sopenharmony_ci}
216962306a36Sopenharmony_ciEXPORT_SYMBOL(file_update_time);
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci/**
217262306a36Sopenharmony_ci * file_modified_flags - handle mandated vfs changes when modifying a file
217362306a36Sopenharmony_ci * @file: file that was modified
217462306a36Sopenharmony_ci * @flags: kiocb flags
217562306a36Sopenharmony_ci *
217662306a36Sopenharmony_ci * When file has been modified ensure that special
217762306a36Sopenharmony_ci * file privileges are removed and time settings are updated.
217862306a36Sopenharmony_ci *
217962306a36Sopenharmony_ci * If IOCB_NOWAIT is set, special file privileges will not be removed and
218062306a36Sopenharmony_ci * time settings will not be updated. It will return -EAGAIN.
218162306a36Sopenharmony_ci *
218262306a36Sopenharmony_ci * Context: Caller must hold the file's inode lock.
218362306a36Sopenharmony_ci *
218462306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure.
218562306a36Sopenharmony_ci */
218662306a36Sopenharmony_cistatic int file_modified_flags(struct file *file, int flags)
218762306a36Sopenharmony_ci{
218862306a36Sopenharmony_ci	int ret;
218962306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
219062306a36Sopenharmony_ci
219162306a36Sopenharmony_ci	/*
219262306a36Sopenharmony_ci	 * Clear the security bits if the process is not being run by root.
219362306a36Sopenharmony_ci	 * This keeps people from modifying setuid and setgid binaries.
219462306a36Sopenharmony_ci	 */
219562306a36Sopenharmony_ci	ret = __file_remove_privs(file, flags);
219662306a36Sopenharmony_ci	if (ret)
219762306a36Sopenharmony_ci		return ret;
219862306a36Sopenharmony_ci
219962306a36Sopenharmony_ci	if (unlikely(file->f_mode & FMODE_NOCMTIME))
220062306a36Sopenharmony_ci		return 0;
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci	ret = inode_needs_update_time(inode);
220362306a36Sopenharmony_ci	if (ret <= 0)
220462306a36Sopenharmony_ci		return ret;
220562306a36Sopenharmony_ci	if (flags & IOCB_NOWAIT)
220662306a36Sopenharmony_ci		return -EAGAIN;
220762306a36Sopenharmony_ci
220862306a36Sopenharmony_ci	return __file_update_time(file, ret);
220962306a36Sopenharmony_ci}
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci/**
221262306a36Sopenharmony_ci * file_modified - handle mandated vfs changes when modifying a file
221362306a36Sopenharmony_ci * @file: file that was modified
221462306a36Sopenharmony_ci *
221562306a36Sopenharmony_ci * When file has been modified ensure that special
221662306a36Sopenharmony_ci * file privileges are removed and time settings are updated.
221762306a36Sopenharmony_ci *
221862306a36Sopenharmony_ci * Context: Caller must hold the file's inode lock.
221962306a36Sopenharmony_ci *
222062306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure.
222162306a36Sopenharmony_ci */
222262306a36Sopenharmony_ciint file_modified(struct file *file)
222362306a36Sopenharmony_ci{
222462306a36Sopenharmony_ci	return file_modified_flags(file, 0);
222562306a36Sopenharmony_ci}
222662306a36Sopenharmony_ciEXPORT_SYMBOL(file_modified);
222762306a36Sopenharmony_ci
222862306a36Sopenharmony_ci/**
222962306a36Sopenharmony_ci * kiocb_modified - handle mandated vfs changes when modifying a file
223062306a36Sopenharmony_ci * @iocb: iocb that was modified
223162306a36Sopenharmony_ci *
223262306a36Sopenharmony_ci * When file has been modified ensure that special
223362306a36Sopenharmony_ci * file privileges are removed and time settings are updated.
223462306a36Sopenharmony_ci *
223562306a36Sopenharmony_ci * Context: Caller must hold the file's inode lock.
223662306a36Sopenharmony_ci *
223762306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure.
223862306a36Sopenharmony_ci */
223962306a36Sopenharmony_ciint kiocb_modified(struct kiocb *iocb)
224062306a36Sopenharmony_ci{
224162306a36Sopenharmony_ci	return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
224262306a36Sopenharmony_ci}
224362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kiocb_modified);
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ciint inode_needs_sync(struct inode *inode)
224662306a36Sopenharmony_ci{
224762306a36Sopenharmony_ci	if (IS_SYNC(inode))
224862306a36Sopenharmony_ci		return 1;
224962306a36Sopenharmony_ci	if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
225062306a36Sopenharmony_ci		return 1;
225162306a36Sopenharmony_ci	return 0;
225262306a36Sopenharmony_ci}
225362306a36Sopenharmony_ciEXPORT_SYMBOL(inode_needs_sync);
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ci/*
225662306a36Sopenharmony_ci * If we try to find an inode in the inode hash while it is being
225762306a36Sopenharmony_ci * deleted, we have to wait until the filesystem completes its
225862306a36Sopenharmony_ci * deletion before reporting that it isn't found.  This function waits
225962306a36Sopenharmony_ci * until the deletion _might_ have completed.  Callers are responsible
226062306a36Sopenharmony_ci * to recheck inode state.
226162306a36Sopenharmony_ci *
226262306a36Sopenharmony_ci * It doesn't matter if I_NEW is not set initially, a call to
226362306a36Sopenharmony_ci * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
226462306a36Sopenharmony_ci * will DTRT.
226562306a36Sopenharmony_ci */
226662306a36Sopenharmony_cistatic void __wait_on_freeing_inode(struct inode *inode)
226762306a36Sopenharmony_ci{
226862306a36Sopenharmony_ci	wait_queue_head_t *wq;
226962306a36Sopenharmony_ci	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
227062306a36Sopenharmony_ci	wq = bit_waitqueue(&inode->i_state, __I_NEW);
227162306a36Sopenharmony_ci	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
227262306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
227362306a36Sopenharmony_ci	spin_unlock(&inode_hash_lock);
227462306a36Sopenharmony_ci	schedule();
227562306a36Sopenharmony_ci	finish_wait(wq, &wait.wq_entry);
227662306a36Sopenharmony_ci	spin_lock(&inode_hash_lock);
227762306a36Sopenharmony_ci}
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_cistatic __initdata unsigned long ihash_entries;
228062306a36Sopenharmony_cistatic int __init set_ihash_entries(char *str)
228162306a36Sopenharmony_ci{
228262306a36Sopenharmony_ci	if (!str)
228362306a36Sopenharmony_ci		return 0;
228462306a36Sopenharmony_ci	ihash_entries = simple_strtoul(str, &str, 0);
228562306a36Sopenharmony_ci	return 1;
228662306a36Sopenharmony_ci}
228762306a36Sopenharmony_ci__setup("ihash_entries=", set_ihash_entries);
228862306a36Sopenharmony_ci
228962306a36Sopenharmony_ci/*
229062306a36Sopenharmony_ci * Initialize the waitqueues and inode hash table.
229162306a36Sopenharmony_ci */
229262306a36Sopenharmony_civoid __init inode_init_early(void)
229362306a36Sopenharmony_ci{
229462306a36Sopenharmony_ci	/* If hashes are distributed across NUMA nodes, defer
229562306a36Sopenharmony_ci	 * hash allocation until vmalloc space is available.
229662306a36Sopenharmony_ci	 */
229762306a36Sopenharmony_ci	if (hashdist)
229862306a36Sopenharmony_ci		return;
229962306a36Sopenharmony_ci
230062306a36Sopenharmony_ci	inode_hashtable =
230162306a36Sopenharmony_ci		alloc_large_system_hash("Inode-cache",
230262306a36Sopenharmony_ci					sizeof(struct hlist_head),
230362306a36Sopenharmony_ci					ihash_entries,
230462306a36Sopenharmony_ci					14,
230562306a36Sopenharmony_ci					HASH_EARLY | HASH_ZERO,
230662306a36Sopenharmony_ci					&i_hash_shift,
230762306a36Sopenharmony_ci					&i_hash_mask,
230862306a36Sopenharmony_ci					0,
230962306a36Sopenharmony_ci					0);
231062306a36Sopenharmony_ci}
231162306a36Sopenharmony_ci
231262306a36Sopenharmony_civoid __init inode_init(void)
231362306a36Sopenharmony_ci{
231462306a36Sopenharmony_ci	/* inode slab cache */
231562306a36Sopenharmony_ci	inode_cachep = kmem_cache_create("inode_cache",
231662306a36Sopenharmony_ci					 sizeof(struct inode),
231762306a36Sopenharmony_ci					 0,
231862306a36Sopenharmony_ci					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
231962306a36Sopenharmony_ci					 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
232062306a36Sopenharmony_ci					 init_once);
232162306a36Sopenharmony_ci
232262306a36Sopenharmony_ci	/* Hash may have been set up in inode_init_early */
232362306a36Sopenharmony_ci	if (!hashdist)
232462306a36Sopenharmony_ci		return;
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_ci	inode_hashtable =
232762306a36Sopenharmony_ci		alloc_large_system_hash("Inode-cache",
232862306a36Sopenharmony_ci					sizeof(struct hlist_head),
232962306a36Sopenharmony_ci					ihash_entries,
233062306a36Sopenharmony_ci					14,
233162306a36Sopenharmony_ci					HASH_ZERO,
233262306a36Sopenharmony_ci					&i_hash_shift,
233362306a36Sopenharmony_ci					&i_hash_mask,
233462306a36Sopenharmony_ci					0,
233562306a36Sopenharmony_ci					0);
233662306a36Sopenharmony_ci}
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_civoid init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
233962306a36Sopenharmony_ci{
234062306a36Sopenharmony_ci	inode->i_mode = mode;
234162306a36Sopenharmony_ci	if (S_ISCHR(mode)) {
234262306a36Sopenharmony_ci		inode->i_fop = &def_chr_fops;
234362306a36Sopenharmony_ci		inode->i_rdev = rdev;
234462306a36Sopenharmony_ci	} else if (S_ISBLK(mode)) {
234562306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_BLOCK))
234662306a36Sopenharmony_ci			inode->i_fop = &def_blk_fops;
234762306a36Sopenharmony_ci		inode->i_rdev = rdev;
234862306a36Sopenharmony_ci	} else if (S_ISFIFO(mode))
234962306a36Sopenharmony_ci		inode->i_fop = &pipefifo_fops;
235062306a36Sopenharmony_ci	else if (S_ISSOCK(mode))
235162306a36Sopenharmony_ci		;	/* leave it no_open_fops */
235262306a36Sopenharmony_ci	else
235362306a36Sopenharmony_ci		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
235462306a36Sopenharmony_ci				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
235562306a36Sopenharmony_ci				  inode->i_ino);
235662306a36Sopenharmony_ci}
235762306a36Sopenharmony_ciEXPORT_SYMBOL(init_special_inode);
235862306a36Sopenharmony_ci
235962306a36Sopenharmony_ci/**
236062306a36Sopenharmony_ci * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
236162306a36Sopenharmony_ci * @idmap: idmap of the mount the inode was created from
236262306a36Sopenharmony_ci * @inode: New inode
236362306a36Sopenharmony_ci * @dir: Directory inode
236462306a36Sopenharmony_ci * @mode: mode of the new inode
236562306a36Sopenharmony_ci *
236662306a36Sopenharmony_ci * If the inode has been created through an idmapped mount the idmap of
236762306a36Sopenharmony_ci * the vfsmount must be passed through @idmap. This function will then take
236862306a36Sopenharmony_ci * care to map the inode according to @idmap before checking permissions
236962306a36Sopenharmony_ci * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
237062306a36Sopenharmony_ci * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
237162306a36Sopenharmony_ci */
237262306a36Sopenharmony_civoid inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
237362306a36Sopenharmony_ci		      const struct inode *dir, umode_t mode)
237462306a36Sopenharmony_ci{
237562306a36Sopenharmony_ci	inode_fsuid_set(inode, idmap);
237662306a36Sopenharmony_ci	if (dir && dir->i_mode & S_ISGID) {
237762306a36Sopenharmony_ci		inode->i_gid = dir->i_gid;
237862306a36Sopenharmony_ci
237962306a36Sopenharmony_ci		/* Directories are special, and always inherit S_ISGID */
238062306a36Sopenharmony_ci		if (S_ISDIR(mode))
238162306a36Sopenharmony_ci			mode |= S_ISGID;
238262306a36Sopenharmony_ci	} else
238362306a36Sopenharmony_ci		inode_fsgid_set(inode, idmap);
238462306a36Sopenharmony_ci	inode->i_mode = mode;
238562306a36Sopenharmony_ci}
238662306a36Sopenharmony_ciEXPORT_SYMBOL(inode_init_owner);
238762306a36Sopenharmony_ci
238862306a36Sopenharmony_ci/**
238962306a36Sopenharmony_ci * inode_owner_or_capable - check current task permissions to inode
239062306a36Sopenharmony_ci * @idmap: idmap of the mount the inode was found from
239162306a36Sopenharmony_ci * @inode: inode being checked
239262306a36Sopenharmony_ci *
239362306a36Sopenharmony_ci * Return true if current either has CAP_FOWNER in a namespace with the
239462306a36Sopenharmony_ci * inode owner uid mapped, or owns the file.
239562306a36Sopenharmony_ci *
239662306a36Sopenharmony_ci * If the inode has been found through an idmapped mount the idmap of
239762306a36Sopenharmony_ci * the vfsmount must be passed through @idmap. This function will then take
239862306a36Sopenharmony_ci * care to map the inode according to @idmap before checking permissions.
239962306a36Sopenharmony_ci * On non-idmapped mounts or if permission checking is to be performed on the
240062306a36Sopenharmony_ci * raw inode simply passs @nop_mnt_idmap.
240162306a36Sopenharmony_ci */
240262306a36Sopenharmony_cibool inode_owner_or_capable(struct mnt_idmap *idmap,
240362306a36Sopenharmony_ci			    const struct inode *inode)
240462306a36Sopenharmony_ci{
240562306a36Sopenharmony_ci	vfsuid_t vfsuid;
240662306a36Sopenharmony_ci	struct user_namespace *ns;
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci	vfsuid = i_uid_into_vfsuid(idmap, inode);
240962306a36Sopenharmony_ci	if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
241062306a36Sopenharmony_ci		return true;
241162306a36Sopenharmony_ci
241262306a36Sopenharmony_ci	ns = current_user_ns();
241362306a36Sopenharmony_ci	if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
241462306a36Sopenharmony_ci		return true;
241562306a36Sopenharmony_ci	return false;
241662306a36Sopenharmony_ci}
241762306a36Sopenharmony_ciEXPORT_SYMBOL(inode_owner_or_capable);
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ci/*
242062306a36Sopenharmony_ci * Direct i/o helper functions
242162306a36Sopenharmony_ci */
242262306a36Sopenharmony_cistatic void __inode_dio_wait(struct inode *inode)
242362306a36Sopenharmony_ci{
242462306a36Sopenharmony_ci	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
242562306a36Sopenharmony_ci	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
242662306a36Sopenharmony_ci
242762306a36Sopenharmony_ci	do {
242862306a36Sopenharmony_ci		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
242962306a36Sopenharmony_ci		if (atomic_read(&inode->i_dio_count))
243062306a36Sopenharmony_ci			schedule();
243162306a36Sopenharmony_ci	} while (atomic_read(&inode->i_dio_count));
243262306a36Sopenharmony_ci	finish_wait(wq, &q.wq_entry);
243362306a36Sopenharmony_ci}
243462306a36Sopenharmony_ci
243562306a36Sopenharmony_ci/**
243662306a36Sopenharmony_ci * inode_dio_wait - wait for outstanding DIO requests to finish
243762306a36Sopenharmony_ci * @inode: inode to wait for
243862306a36Sopenharmony_ci *
243962306a36Sopenharmony_ci * Waits for all pending direct I/O requests to finish so that we can
244062306a36Sopenharmony_ci * proceed with a truncate or equivalent operation.
244162306a36Sopenharmony_ci *
244262306a36Sopenharmony_ci * Must be called under a lock that serializes taking new references
244362306a36Sopenharmony_ci * to i_dio_count, usually by inode->i_mutex.
244462306a36Sopenharmony_ci */
244562306a36Sopenharmony_civoid inode_dio_wait(struct inode *inode)
244662306a36Sopenharmony_ci{
244762306a36Sopenharmony_ci	if (atomic_read(&inode->i_dio_count))
244862306a36Sopenharmony_ci		__inode_dio_wait(inode);
244962306a36Sopenharmony_ci}
245062306a36Sopenharmony_ciEXPORT_SYMBOL(inode_dio_wait);
245162306a36Sopenharmony_ci
245262306a36Sopenharmony_ci/*
245362306a36Sopenharmony_ci * inode_set_flags - atomically set some inode flags
245462306a36Sopenharmony_ci *
245562306a36Sopenharmony_ci * Note: the caller should be holding i_mutex, or else be sure that
245662306a36Sopenharmony_ci * they have exclusive access to the inode structure (i.e., while the
245762306a36Sopenharmony_ci * inode is being instantiated).  The reason for the cmpxchg() loop
245862306a36Sopenharmony_ci * --- which wouldn't be necessary if all code paths which modify
245962306a36Sopenharmony_ci * i_flags actually followed this rule, is that there is at least one
246062306a36Sopenharmony_ci * code path which doesn't today so we use cmpxchg() out of an abundance
246162306a36Sopenharmony_ci * of caution.
246262306a36Sopenharmony_ci *
246362306a36Sopenharmony_ci * In the long run, i_mutex is overkill, and we should probably look
246462306a36Sopenharmony_ci * at using the i_lock spinlock to protect i_flags, and then make sure
246562306a36Sopenharmony_ci * it is so documented in include/linux/fs.h and that all code follows
246662306a36Sopenharmony_ci * the locking convention!!
246762306a36Sopenharmony_ci */
246862306a36Sopenharmony_civoid inode_set_flags(struct inode *inode, unsigned int flags,
246962306a36Sopenharmony_ci		     unsigned int mask)
247062306a36Sopenharmony_ci{
247162306a36Sopenharmony_ci	WARN_ON_ONCE(flags & ~mask);
247262306a36Sopenharmony_ci	set_mask_bits(&inode->i_flags, mask, flags);
247362306a36Sopenharmony_ci}
247462306a36Sopenharmony_ciEXPORT_SYMBOL(inode_set_flags);
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_civoid inode_nohighmem(struct inode *inode)
247762306a36Sopenharmony_ci{
247862306a36Sopenharmony_ci	mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
247962306a36Sopenharmony_ci}
248062306a36Sopenharmony_ciEXPORT_SYMBOL(inode_nohighmem);
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_ci/**
248362306a36Sopenharmony_ci * timestamp_truncate - Truncate timespec to a granularity
248462306a36Sopenharmony_ci * @t: Timespec
248562306a36Sopenharmony_ci * @inode: inode being updated
248662306a36Sopenharmony_ci *
248762306a36Sopenharmony_ci * Truncate a timespec to the granularity supported by the fs
248862306a36Sopenharmony_ci * containing the inode. Always rounds down. gran must
248962306a36Sopenharmony_ci * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
249062306a36Sopenharmony_ci */
249162306a36Sopenharmony_cistruct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
249262306a36Sopenharmony_ci{
249362306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
249462306a36Sopenharmony_ci	unsigned int gran = sb->s_time_gran;
249562306a36Sopenharmony_ci
249662306a36Sopenharmony_ci	t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
249762306a36Sopenharmony_ci	if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
249862306a36Sopenharmony_ci		t.tv_nsec = 0;
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_ci	/* Avoid division in the common cases 1 ns and 1 s. */
250162306a36Sopenharmony_ci	if (gran == 1)
250262306a36Sopenharmony_ci		; /* nothing */
250362306a36Sopenharmony_ci	else if (gran == NSEC_PER_SEC)
250462306a36Sopenharmony_ci		t.tv_nsec = 0;
250562306a36Sopenharmony_ci	else if (gran > 1 && gran < NSEC_PER_SEC)
250662306a36Sopenharmony_ci		t.tv_nsec -= t.tv_nsec % gran;
250762306a36Sopenharmony_ci	else
250862306a36Sopenharmony_ci		WARN(1, "invalid file time granularity: %u", gran);
250962306a36Sopenharmony_ci	return t;
251062306a36Sopenharmony_ci}
251162306a36Sopenharmony_ciEXPORT_SYMBOL(timestamp_truncate);
251262306a36Sopenharmony_ci
251362306a36Sopenharmony_ci/**
251462306a36Sopenharmony_ci * current_time - Return FS time
251562306a36Sopenharmony_ci * @inode: inode.
251662306a36Sopenharmony_ci *
251762306a36Sopenharmony_ci * Return the current time truncated to the time granularity supported by
251862306a36Sopenharmony_ci * the fs.
251962306a36Sopenharmony_ci *
252062306a36Sopenharmony_ci * Note that inode and inode->sb cannot be NULL.
252162306a36Sopenharmony_ci * Otherwise, the function warns and returns time without truncation.
252262306a36Sopenharmony_ci */
252362306a36Sopenharmony_cistruct timespec64 current_time(struct inode *inode)
252462306a36Sopenharmony_ci{
252562306a36Sopenharmony_ci	struct timespec64 now;
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_ci	ktime_get_coarse_real_ts64(&now);
252862306a36Sopenharmony_ci	return timestamp_truncate(now, inode);
252962306a36Sopenharmony_ci}
253062306a36Sopenharmony_ciEXPORT_SYMBOL(current_time);
253162306a36Sopenharmony_ci
253262306a36Sopenharmony_ci/**
253362306a36Sopenharmony_ci * inode_set_ctime_current - set the ctime to current_time
253462306a36Sopenharmony_ci * @inode: inode
253562306a36Sopenharmony_ci *
253662306a36Sopenharmony_ci * Set the inode->i_ctime to the current value for the inode. Returns
253762306a36Sopenharmony_ci * the current value that was assigned to i_ctime.
253862306a36Sopenharmony_ci */
253962306a36Sopenharmony_cistruct timespec64 inode_set_ctime_current(struct inode *inode)
254062306a36Sopenharmony_ci{
254162306a36Sopenharmony_ci	struct timespec64 now = current_time(inode);
254262306a36Sopenharmony_ci
254362306a36Sopenharmony_ci	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
254462306a36Sopenharmony_ci	return now;
254562306a36Sopenharmony_ci}
254662306a36Sopenharmony_ciEXPORT_SYMBOL(inode_set_ctime_current);
254762306a36Sopenharmony_ci
254862306a36Sopenharmony_ci/**
254962306a36Sopenharmony_ci * in_group_or_capable - check whether caller is CAP_FSETID privileged
255062306a36Sopenharmony_ci * @idmap:	idmap of the mount @inode was found from
255162306a36Sopenharmony_ci * @inode:	inode to check
255262306a36Sopenharmony_ci * @vfsgid:	the new/current vfsgid of @inode
255362306a36Sopenharmony_ci *
255462306a36Sopenharmony_ci * Check wether @vfsgid is in the caller's group list or if the caller is
255562306a36Sopenharmony_ci * privileged with CAP_FSETID over @inode. This can be used to determine
255662306a36Sopenharmony_ci * whether the setgid bit can be kept or must be dropped.
255762306a36Sopenharmony_ci *
255862306a36Sopenharmony_ci * Return: true if the caller is sufficiently privileged, false if not.
255962306a36Sopenharmony_ci */
256062306a36Sopenharmony_cibool in_group_or_capable(struct mnt_idmap *idmap,
256162306a36Sopenharmony_ci			 const struct inode *inode, vfsgid_t vfsgid)
256262306a36Sopenharmony_ci{
256362306a36Sopenharmony_ci	if (vfsgid_in_group_p(vfsgid))
256462306a36Sopenharmony_ci		return true;
256562306a36Sopenharmony_ci	if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
256662306a36Sopenharmony_ci		return true;
256762306a36Sopenharmony_ci	return false;
256862306a36Sopenharmony_ci}
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci/**
257162306a36Sopenharmony_ci * mode_strip_sgid - handle the sgid bit for non-directories
257262306a36Sopenharmony_ci * @idmap: idmap of the mount the inode was created from
257362306a36Sopenharmony_ci * @dir: parent directory inode
257462306a36Sopenharmony_ci * @mode: mode of the file to be created in @dir
257562306a36Sopenharmony_ci *
257662306a36Sopenharmony_ci * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
257762306a36Sopenharmony_ci * raised and @dir has the S_ISGID bit raised ensure that the caller is
257862306a36Sopenharmony_ci * either in the group of the parent directory or they have CAP_FSETID
257962306a36Sopenharmony_ci * in their user namespace and are privileged over the parent directory.
258062306a36Sopenharmony_ci * In all other cases, strip the S_ISGID bit from @mode.
258162306a36Sopenharmony_ci *
258262306a36Sopenharmony_ci * Return: the new mode to use for the file
258362306a36Sopenharmony_ci */
258462306a36Sopenharmony_ciumode_t mode_strip_sgid(struct mnt_idmap *idmap,
258562306a36Sopenharmony_ci			const struct inode *dir, umode_t mode)
258662306a36Sopenharmony_ci{
258762306a36Sopenharmony_ci	if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
258862306a36Sopenharmony_ci		return mode;
258962306a36Sopenharmony_ci	if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
259062306a36Sopenharmony_ci		return mode;
259162306a36Sopenharmony_ci	if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
259262306a36Sopenharmony_ci		return mode;
259362306a36Sopenharmony_ci	return mode & ~S_ISGID;
259462306a36Sopenharmony_ci}
259562306a36Sopenharmony_ciEXPORT_SYMBOL(mode_strip_sgid);
2596