18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * (C) 1997 Linus Torvalds
48c2ecf20Sopenharmony_ci * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci#include <linux/export.h>
78c2ecf20Sopenharmony_ci#include <linux/fs.h>
88c2ecf20Sopenharmony_ci#include <linux/mm.h>
98c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
108c2ecf20Sopenharmony_ci#include <linux/hash.h>
118c2ecf20Sopenharmony_ci#include <linux/swap.h>
128c2ecf20Sopenharmony_ci#include <linux/security.h>
138c2ecf20Sopenharmony_ci#include <linux/cdev.h>
148c2ecf20Sopenharmony_ci#include <linux/memblock.h>
158c2ecf20Sopenharmony_ci#include <linux/fscrypt.h>
168c2ecf20Sopenharmony_ci#include <linux/fsnotify.h>
178c2ecf20Sopenharmony_ci#include <linux/mount.h>
188c2ecf20Sopenharmony_ci#include <linux/posix_acl.h>
198c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
208c2ecf20Sopenharmony_ci#include <linux/buffer_head.h> /* for inode_has_buffers */
218c2ecf20Sopenharmony_ci#include <linux/ratelimit.h>
228c2ecf20Sopenharmony_ci#include <linux/list_lru.h>
238c2ecf20Sopenharmony_ci#include <linux/iversion.h>
248c2ecf20Sopenharmony_ci#include <linux/xpm.h>
258c2ecf20Sopenharmony_ci#include <trace/events/writeback.h>
268c2ecf20Sopenharmony_ci#include "internal.h"
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci/*
298c2ecf20Sopenharmony_ci * Inode locking rules:
308c2ecf20Sopenharmony_ci *
318c2ecf20Sopenharmony_ci * inode->i_lock protects:
328c2ecf20Sopenharmony_ci *   inode->i_state, inode->i_hash, __iget()
338c2ecf20Sopenharmony_ci * Inode LRU list locks protect:
348c2ecf20Sopenharmony_ci *   inode->i_sb->s_inode_lru, inode->i_lru
358c2ecf20Sopenharmony_ci * inode->i_sb->s_inode_list_lock protects:
368c2ecf20Sopenharmony_ci *   inode->i_sb->s_inodes, inode->i_sb_list
378c2ecf20Sopenharmony_ci * bdi->wb.list_lock protects:
388c2ecf20Sopenharmony_ci *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
398c2ecf20Sopenharmony_ci * inode_hash_lock protects:
408c2ecf20Sopenharmony_ci *   inode_hashtable, inode->i_hash
418c2ecf20Sopenharmony_ci *
428c2ecf20Sopenharmony_ci * Lock ordering:
438c2ecf20Sopenharmony_ci *
448c2ecf20Sopenharmony_ci * inode->i_sb->s_inode_list_lock
458c2ecf20Sopenharmony_ci *   inode->i_lock
468c2ecf20Sopenharmony_ci *     Inode LRU list locks
478c2ecf20Sopenharmony_ci *
488c2ecf20Sopenharmony_ci * bdi->wb.list_lock
498c2ecf20Sopenharmony_ci *   inode->i_lock
508c2ecf20Sopenharmony_ci *
518c2ecf20Sopenharmony_ci * inode_hash_lock
528c2ecf20Sopenharmony_ci *   inode->i_sb->s_inode_list_lock
538c2ecf20Sopenharmony_ci *   inode->i_lock
548c2ecf20Sopenharmony_ci *
558c2ecf20Sopenharmony_ci * iunique_lock
568c2ecf20Sopenharmony_ci *   inode_hash_lock
578c2ecf20Sopenharmony_ci */
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_cistatic unsigned int i_hash_mask __read_mostly;
608c2ecf20Sopenharmony_cistatic unsigned int i_hash_shift __read_mostly;
618c2ecf20Sopenharmony_cistatic struct hlist_head *inode_hashtable __read_mostly;
628c2ecf20Sopenharmony_cistatic __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci/*
658c2ecf20Sopenharmony_ci * Empty aops. Can be used for the cases where the user does not
668c2ecf20Sopenharmony_ci * define any of the address_space operations.
678c2ecf20Sopenharmony_ci */
688c2ecf20Sopenharmony_ciconst struct address_space_operations empty_aops = {
698c2ecf20Sopenharmony_ci};
708c2ecf20Sopenharmony_ciEXPORT_SYMBOL(empty_aops);
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci/*
738c2ecf20Sopenharmony_ci * Statistics gathering..
748c2ecf20Sopenharmony_ci */
758c2ecf20Sopenharmony_cistruct inodes_stat_t inodes_stat;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, nr_inodes);
788c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, nr_unused);
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_cistatic struct kmem_cache *inode_cachep __read_mostly;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_cistatic long get_nr_inodes(void)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	int i;
858c2ecf20Sopenharmony_ci	long sum = 0;
868c2ecf20Sopenharmony_ci	for_each_possible_cpu(i)
878c2ecf20Sopenharmony_ci		sum += per_cpu(nr_inodes, i);
888c2ecf20Sopenharmony_ci	return sum < 0 ? 0 : sum;
898c2ecf20Sopenharmony_ci}
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cistatic inline long get_nr_inodes_unused(void)
928c2ecf20Sopenharmony_ci{
938c2ecf20Sopenharmony_ci	int i;
948c2ecf20Sopenharmony_ci	long sum = 0;
958c2ecf20Sopenharmony_ci	for_each_possible_cpu(i)
968c2ecf20Sopenharmony_ci		sum += per_cpu(nr_unused, i);
978c2ecf20Sopenharmony_ci	return sum < 0 ? 0 : sum;
988c2ecf20Sopenharmony_ci}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_cilong get_nr_dirty_inodes(void)
1018c2ecf20Sopenharmony_ci{
1028c2ecf20Sopenharmony_ci	/* not actually dirty inodes, but a wild approximation */
1038c2ecf20Sopenharmony_ci	long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
1048c2ecf20Sopenharmony_ci	return nr_dirty > 0 ? nr_dirty : 0;
1058c2ecf20Sopenharmony_ci}
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci/*
1088c2ecf20Sopenharmony_ci * Handle nr_inode sysctl
1098c2ecf20Sopenharmony_ci */
1108c2ecf20Sopenharmony_ci#ifdef CONFIG_SYSCTL
1118c2ecf20Sopenharmony_ciint proc_nr_inodes(struct ctl_table *table, int write,
1128c2ecf20Sopenharmony_ci		   void *buffer, size_t *lenp, loff_t *ppos)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	inodes_stat.nr_inodes = get_nr_inodes();
1158c2ecf20Sopenharmony_ci	inodes_stat.nr_unused = get_nr_inodes_unused();
1168c2ecf20Sopenharmony_ci	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
1178c2ecf20Sopenharmony_ci}
1188c2ecf20Sopenharmony_ci#endif
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_cistatic int no_open(struct inode *inode, struct file *file)
1218c2ecf20Sopenharmony_ci{
1228c2ecf20Sopenharmony_ci	return -ENXIO;
1238c2ecf20Sopenharmony_ci}
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci/**
1268c2ecf20Sopenharmony_ci * inode_init_always - perform inode structure initialisation
1278c2ecf20Sopenharmony_ci * @sb: superblock inode belongs to
1288c2ecf20Sopenharmony_ci * @inode: inode to initialise
1298c2ecf20Sopenharmony_ci *
1308c2ecf20Sopenharmony_ci * These are initializations that need to be done on every inode
1318c2ecf20Sopenharmony_ci * allocation as the fields are not initialised by slab allocation.
1328c2ecf20Sopenharmony_ci */
1338c2ecf20Sopenharmony_ciint inode_init_always(struct super_block *sb, struct inode *inode)
1348c2ecf20Sopenharmony_ci{
1358c2ecf20Sopenharmony_ci	static const struct inode_operations empty_iops;
1368c2ecf20Sopenharmony_ci	static const struct file_operations no_open_fops = {.open = no_open};
1378c2ecf20Sopenharmony_ci	struct address_space *const mapping = &inode->i_data;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	inode->i_sb = sb;
1408c2ecf20Sopenharmony_ci	inode->i_blkbits = sb->s_blocksize_bits;
1418c2ecf20Sopenharmony_ci	inode->i_flags = 0;
1428c2ecf20Sopenharmony_ci	atomic64_set(&inode->i_sequence, 0);
1438c2ecf20Sopenharmony_ci	atomic_set(&inode->i_count, 1);
1448c2ecf20Sopenharmony_ci	inode->i_op = &empty_iops;
1458c2ecf20Sopenharmony_ci	inode->i_fop = &no_open_fops;
1468c2ecf20Sopenharmony_ci	inode->__i_nlink = 1;
1478c2ecf20Sopenharmony_ci	inode->i_opflags = 0;
1488c2ecf20Sopenharmony_ci	if (sb->s_xattr)
1498c2ecf20Sopenharmony_ci		inode->i_opflags |= IOP_XATTR;
1508c2ecf20Sopenharmony_ci	i_uid_write(inode, 0);
1518c2ecf20Sopenharmony_ci	i_gid_write(inode, 0);
1528c2ecf20Sopenharmony_ci	atomic_set(&inode->i_writecount, 0);
1538c2ecf20Sopenharmony_ci	inode->i_size = 0;
1548c2ecf20Sopenharmony_ci	inode->i_write_hint = WRITE_LIFE_NOT_SET;
1558c2ecf20Sopenharmony_ci	inode->i_blocks = 0;
1568c2ecf20Sopenharmony_ci	inode->i_bytes = 0;
1578c2ecf20Sopenharmony_ci	inode->i_generation = 0;
1588c2ecf20Sopenharmony_ci	inode->i_pipe = NULL;
1598c2ecf20Sopenharmony_ci	inode->i_bdev = NULL;
1608c2ecf20Sopenharmony_ci	inode->i_cdev = NULL;
1618c2ecf20Sopenharmony_ci	inode->i_link = NULL;
1628c2ecf20Sopenharmony_ci	inode->i_dir_seq = 0;
1638c2ecf20Sopenharmony_ci	inode->i_rdev = 0;
1648c2ecf20Sopenharmony_ci	inode->dirtied_when = 0;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK
1678c2ecf20Sopenharmony_ci	inode->i_wb_frn_winner = 0;
1688c2ecf20Sopenharmony_ci	inode->i_wb_frn_avg_time = 0;
1698c2ecf20Sopenharmony_ci	inode->i_wb_frn_history = 0;
1708c2ecf20Sopenharmony_ci#endif
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	spin_lock_init(&inode->i_lock);
1738c2ecf20Sopenharmony_ci	lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	init_rwsem(&inode->i_rwsem);
1768c2ecf20Sopenharmony_ci	lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	atomic_set(&inode->i_dio_count, 0);
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	mapping->a_ops = &empty_aops;
1818c2ecf20Sopenharmony_ci	mapping->host = inode;
1828c2ecf20Sopenharmony_ci	mapping->flags = 0;
1838c2ecf20Sopenharmony_ci	if (sb->s_type->fs_flags & FS_THP_SUPPORT)
1848c2ecf20Sopenharmony_ci		__set_bit(AS_THP_SUPPORT, &mapping->flags);
1858c2ecf20Sopenharmony_ci	mapping->wb_err = 0;
1868c2ecf20Sopenharmony_ci	atomic_set(&mapping->i_mmap_writable, 0);
1878c2ecf20Sopenharmony_ci#ifdef CONFIG_READ_ONLY_THP_FOR_FS
1888c2ecf20Sopenharmony_ci	atomic_set(&mapping->nr_thps, 0);
1898c2ecf20Sopenharmony_ci#endif
1908c2ecf20Sopenharmony_ci	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
1918c2ecf20Sopenharmony_ci	mapping->private_data = NULL;
1928c2ecf20Sopenharmony_ci	mapping->writeback_index = 0;
1938c2ecf20Sopenharmony_ci	inode->i_private = NULL;
1948c2ecf20Sopenharmony_ci	inode->i_mapping = mapping;
1958c2ecf20Sopenharmony_ci	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
1968c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL
1978c2ecf20Sopenharmony_ci	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
1988c2ecf20Sopenharmony_ci#endif
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci#ifdef CONFIG_FSNOTIFY
2018c2ecf20Sopenharmony_ci	inode->i_fsnotify_mask = 0;
2028c2ecf20Sopenharmony_ci#endif
2038c2ecf20Sopenharmony_ci	inode->i_flctx = NULL;
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	if (unlikely(security_inode_alloc(inode)))
2068c2ecf20Sopenharmony_ci		return -ENOMEM;
2078c2ecf20Sopenharmony_ci	this_cpu_inc(nr_inodes);
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	return 0;
2108c2ecf20Sopenharmony_ci}
2118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_init_always);
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_civoid free_inode_nonrcu(struct inode *inode)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	kmem_cache_free(inode_cachep, inode);
2168c2ecf20Sopenharmony_ci}
2178c2ecf20Sopenharmony_ciEXPORT_SYMBOL(free_inode_nonrcu);
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_cistatic void i_callback(struct rcu_head *head)
2208c2ecf20Sopenharmony_ci{
2218c2ecf20Sopenharmony_ci	struct inode *inode = container_of(head, struct inode, i_rcu);
2228c2ecf20Sopenharmony_ci	if (inode->free_inode)
2238c2ecf20Sopenharmony_ci		inode->free_inode(inode);
2248c2ecf20Sopenharmony_ci	else
2258c2ecf20Sopenharmony_ci		free_inode_nonrcu(inode);
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_cistatic struct inode *alloc_inode(struct super_block *sb)
2298c2ecf20Sopenharmony_ci{
2308c2ecf20Sopenharmony_ci	const struct super_operations *ops = sb->s_op;
2318c2ecf20Sopenharmony_ci	struct inode *inode;
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	if (ops->alloc_inode)
2348c2ecf20Sopenharmony_ci		inode = ops->alloc_inode(sb);
2358c2ecf20Sopenharmony_ci	else
2368c2ecf20Sopenharmony_ci		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	if (!inode)
2398c2ecf20Sopenharmony_ci		return NULL;
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	if (unlikely(inode_init_always(sb, inode))) {
2428c2ecf20Sopenharmony_ci		if (ops->destroy_inode) {
2438c2ecf20Sopenharmony_ci			ops->destroy_inode(inode);
2448c2ecf20Sopenharmony_ci			if (!ops->free_inode)
2458c2ecf20Sopenharmony_ci				return NULL;
2468c2ecf20Sopenharmony_ci		}
2478c2ecf20Sopenharmony_ci		inode->free_inode = ops->free_inode;
2488c2ecf20Sopenharmony_ci		i_callback(&inode->i_rcu);
2498c2ecf20Sopenharmony_ci		return NULL;
2508c2ecf20Sopenharmony_ci	}
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	return inode;
2538c2ecf20Sopenharmony_ci}
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_civoid __destroy_inode(struct inode *inode)
2568c2ecf20Sopenharmony_ci{
2578c2ecf20Sopenharmony_ci	BUG_ON(inode_has_buffers(inode));
2588c2ecf20Sopenharmony_ci	inode_detach_wb(inode);
2598c2ecf20Sopenharmony_ci	security_inode_free(inode);
2608c2ecf20Sopenharmony_ci	fsnotify_inode_delete(inode);
2618c2ecf20Sopenharmony_ci	locks_free_lock_context(inode);
2628c2ecf20Sopenharmony_ci	xpm_delete_cache_node_hook(inode);
2638c2ecf20Sopenharmony_ci	if (!inode->i_nlink) {
2648c2ecf20Sopenharmony_ci		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
2658c2ecf20Sopenharmony_ci		atomic_long_dec(&inode->i_sb->s_remove_count);
2668c2ecf20Sopenharmony_ci	}
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL
2698c2ecf20Sopenharmony_ci	if (inode->i_acl && !is_uncached_acl(inode->i_acl))
2708c2ecf20Sopenharmony_ci		posix_acl_release(inode->i_acl);
2718c2ecf20Sopenharmony_ci	if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
2728c2ecf20Sopenharmony_ci		posix_acl_release(inode->i_default_acl);
2738c2ecf20Sopenharmony_ci#endif
2748c2ecf20Sopenharmony_ci	this_cpu_dec(nr_inodes);
2758c2ecf20Sopenharmony_ci}
2768c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__destroy_inode);
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_cistatic void destroy_inode(struct inode *inode)
2798c2ecf20Sopenharmony_ci{
2808c2ecf20Sopenharmony_ci	const struct super_operations *ops = inode->i_sb->s_op;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_lru));
2838c2ecf20Sopenharmony_ci	__destroy_inode(inode);
2848c2ecf20Sopenharmony_ci	if (ops->destroy_inode) {
2858c2ecf20Sopenharmony_ci		ops->destroy_inode(inode);
2868c2ecf20Sopenharmony_ci		if (!ops->free_inode)
2878c2ecf20Sopenharmony_ci			return;
2888c2ecf20Sopenharmony_ci	}
2898c2ecf20Sopenharmony_ci	inode->free_inode = ops->free_inode;
2908c2ecf20Sopenharmony_ci	call_rcu(&inode->i_rcu, i_callback);
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci/**
2948c2ecf20Sopenharmony_ci * drop_nlink - directly drop an inode's link count
2958c2ecf20Sopenharmony_ci * @inode: inode
2968c2ecf20Sopenharmony_ci *
2978c2ecf20Sopenharmony_ci * This is a low-level filesystem helper to replace any
2988c2ecf20Sopenharmony_ci * direct filesystem manipulation of i_nlink.  In cases
2998c2ecf20Sopenharmony_ci * where we are attempting to track writes to the
3008c2ecf20Sopenharmony_ci * filesystem, a decrement to zero means an imminent
3018c2ecf20Sopenharmony_ci * write when the file is truncated and actually unlinked
3028c2ecf20Sopenharmony_ci * on the filesystem.
3038c2ecf20Sopenharmony_ci */
3048c2ecf20Sopenharmony_civoid drop_nlink(struct inode *inode)
3058c2ecf20Sopenharmony_ci{
3068c2ecf20Sopenharmony_ci	WARN_ON(inode->i_nlink == 0);
3078c2ecf20Sopenharmony_ci	inode->__i_nlink--;
3088c2ecf20Sopenharmony_ci	if (!inode->i_nlink)
3098c2ecf20Sopenharmony_ci		atomic_long_inc(&inode->i_sb->s_remove_count);
3108c2ecf20Sopenharmony_ci}
3118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(drop_nlink);
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci/**
3148c2ecf20Sopenharmony_ci * clear_nlink - directly zero an inode's link count
3158c2ecf20Sopenharmony_ci * @inode: inode
3168c2ecf20Sopenharmony_ci *
3178c2ecf20Sopenharmony_ci * This is a low-level filesystem helper to replace any
3188c2ecf20Sopenharmony_ci * direct filesystem manipulation of i_nlink.  See
3198c2ecf20Sopenharmony_ci * drop_nlink() for why we care about i_nlink hitting zero.
3208c2ecf20Sopenharmony_ci */
3218c2ecf20Sopenharmony_civoid clear_nlink(struct inode *inode)
3228c2ecf20Sopenharmony_ci{
3238c2ecf20Sopenharmony_ci	if (inode->i_nlink) {
3248c2ecf20Sopenharmony_ci		inode->__i_nlink = 0;
3258c2ecf20Sopenharmony_ci		atomic_long_inc(&inode->i_sb->s_remove_count);
3268c2ecf20Sopenharmony_ci	}
3278c2ecf20Sopenharmony_ci}
3288c2ecf20Sopenharmony_ciEXPORT_SYMBOL(clear_nlink);
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci/**
3318c2ecf20Sopenharmony_ci * set_nlink - directly set an inode's link count
3328c2ecf20Sopenharmony_ci * @inode: inode
3338c2ecf20Sopenharmony_ci * @nlink: new nlink (should be non-zero)
3348c2ecf20Sopenharmony_ci *
3358c2ecf20Sopenharmony_ci * This is a low-level filesystem helper to replace any
3368c2ecf20Sopenharmony_ci * direct filesystem manipulation of i_nlink.
3378c2ecf20Sopenharmony_ci */
3388c2ecf20Sopenharmony_civoid set_nlink(struct inode *inode, unsigned int nlink)
3398c2ecf20Sopenharmony_ci{
3408c2ecf20Sopenharmony_ci	if (!nlink) {
3418c2ecf20Sopenharmony_ci		clear_nlink(inode);
3428c2ecf20Sopenharmony_ci	} else {
3438c2ecf20Sopenharmony_ci		/* Yes, some filesystems do change nlink from zero to one */
3448c2ecf20Sopenharmony_ci		if (inode->i_nlink == 0)
3458c2ecf20Sopenharmony_ci			atomic_long_dec(&inode->i_sb->s_remove_count);
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci		inode->__i_nlink = nlink;
3488c2ecf20Sopenharmony_ci	}
3498c2ecf20Sopenharmony_ci}
3508c2ecf20Sopenharmony_ciEXPORT_SYMBOL(set_nlink);
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci/**
3538c2ecf20Sopenharmony_ci * inc_nlink - directly increment an inode's link count
3548c2ecf20Sopenharmony_ci * @inode: inode
3558c2ecf20Sopenharmony_ci *
3568c2ecf20Sopenharmony_ci * This is a low-level filesystem helper to replace any
3578c2ecf20Sopenharmony_ci * direct filesystem manipulation of i_nlink.  Currently,
3588c2ecf20Sopenharmony_ci * it is only here for parity with dec_nlink().
3598c2ecf20Sopenharmony_ci */
3608c2ecf20Sopenharmony_civoid inc_nlink(struct inode *inode)
3618c2ecf20Sopenharmony_ci{
3628c2ecf20Sopenharmony_ci	if (unlikely(inode->i_nlink == 0)) {
3638c2ecf20Sopenharmony_ci		WARN_ON(!(inode->i_state & I_LINKABLE));
3648c2ecf20Sopenharmony_ci		atomic_long_dec(&inode->i_sb->s_remove_count);
3658c2ecf20Sopenharmony_ci	}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci	inode->__i_nlink++;
3688c2ecf20Sopenharmony_ci}
3698c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inc_nlink);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_cistatic void __address_space_init_once(struct address_space *mapping)
3728c2ecf20Sopenharmony_ci{
3738c2ecf20Sopenharmony_ci	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
3748c2ecf20Sopenharmony_ci	init_rwsem(&mapping->i_mmap_rwsem);
3758c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&mapping->private_list);
3768c2ecf20Sopenharmony_ci	spin_lock_init(&mapping->private_lock);
3778c2ecf20Sopenharmony_ci	mapping->i_mmap = RB_ROOT_CACHED;
3788c2ecf20Sopenharmony_ci}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_civoid address_space_init_once(struct address_space *mapping)
3818c2ecf20Sopenharmony_ci{
3828c2ecf20Sopenharmony_ci	memset(mapping, 0, sizeof(*mapping));
3838c2ecf20Sopenharmony_ci	__address_space_init_once(mapping);
3848c2ecf20Sopenharmony_ci}
3858c2ecf20Sopenharmony_ciEXPORT_SYMBOL(address_space_init_once);
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci/*
3888c2ecf20Sopenharmony_ci * These are initializations that only need to be done
3898c2ecf20Sopenharmony_ci * once, because the fields are idempotent across use
3908c2ecf20Sopenharmony_ci * of the inode, so let the slab aware of that.
3918c2ecf20Sopenharmony_ci */
3928c2ecf20Sopenharmony_civoid inode_init_once(struct inode *inode)
3938c2ecf20Sopenharmony_ci{
3948c2ecf20Sopenharmony_ci	memset(inode, 0, sizeof(*inode));
3958c2ecf20Sopenharmony_ci	INIT_HLIST_NODE(&inode->i_hash);
3968c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_devices);
3978c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_io_list);
3988c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_wb_list);
3998c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&inode->i_lru);
4008c2ecf20Sopenharmony_ci	__address_space_init_once(&inode->i_data);
4018c2ecf20Sopenharmony_ci	i_size_ordered_init(inode);
4028c2ecf20Sopenharmony_ci}
4038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_init_once);
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_cistatic void init_once(void *foo)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	struct inode *inode = (struct inode *) foo;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	inode_init_once(inode);
4108c2ecf20Sopenharmony_ci}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci/*
4138c2ecf20Sopenharmony_ci * inode->i_lock must be held
4148c2ecf20Sopenharmony_ci */
4158c2ecf20Sopenharmony_civoid __iget(struct inode *inode)
4168c2ecf20Sopenharmony_ci{
4178c2ecf20Sopenharmony_ci	atomic_inc(&inode->i_count);
4188c2ecf20Sopenharmony_ci}
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci/*
4218c2ecf20Sopenharmony_ci * get additional reference to inode; caller must already hold one.
4228c2ecf20Sopenharmony_ci */
4238c2ecf20Sopenharmony_civoid ihold(struct inode *inode)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
4268c2ecf20Sopenharmony_ci}
4278c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ihold);
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_cistatic void inode_lru_list_add(struct inode *inode)
4308c2ecf20Sopenharmony_ci{
4318c2ecf20Sopenharmony_ci	if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
4328c2ecf20Sopenharmony_ci		this_cpu_inc(nr_unused);
4338c2ecf20Sopenharmony_ci	else
4348c2ecf20Sopenharmony_ci		inode->i_state |= I_REFERENCED;
4358c2ecf20Sopenharmony_ci}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci/*
4388c2ecf20Sopenharmony_ci * Add inode to LRU if needed (inode is unused and clean).
4398c2ecf20Sopenharmony_ci *
4408c2ecf20Sopenharmony_ci * Needs inode->i_lock held.
4418c2ecf20Sopenharmony_ci */
4428c2ecf20Sopenharmony_civoid inode_add_lru(struct inode *inode)
4438c2ecf20Sopenharmony_ci{
4448c2ecf20Sopenharmony_ci	if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
4458c2ecf20Sopenharmony_ci				I_FREEING | I_WILL_FREE)) &&
4468c2ecf20Sopenharmony_ci	    !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE)
4478c2ecf20Sopenharmony_ci		inode_lru_list_add(inode);
4488c2ecf20Sopenharmony_ci}
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_cistatic void inode_lru_list_del(struct inode *inode)
4528c2ecf20Sopenharmony_ci{
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
4558c2ecf20Sopenharmony_ci		this_cpu_dec(nr_unused);
4568c2ecf20Sopenharmony_ci}
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_cistatic void inode_pin_lru_isolating(struct inode *inode)
4598c2ecf20Sopenharmony_ci{
4608c2ecf20Sopenharmony_ci	lockdep_assert_held(&inode->i_lock);
4618c2ecf20Sopenharmony_ci	WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
4628c2ecf20Sopenharmony_ci	inode->i_state |= I_LRU_ISOLATING;
4638c2ecf20Sopenharmony_ci}
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_cistatic void inode_unpin_lru_isolating(struct inode *inode)
4668c2ecf20Sopenharmony_ci{
4678c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
4688c2ecf20Sopenharmony_ci	WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
4698c2ecf20Sopenharmony_ci	inode->i_state &= ~I_LRU_ISOLATING;
4708c2ecf20Sopenharmony_ci	smp_mb();
4718c2ecf20Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
4728c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
4738c2ecf20Sopenharmony_ci}
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_cistatic void inode_wait_for_lru_isolating(struct inode *inode)
4768c2ecf20Sopenharmony_ci{
4778c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
4788c2ecf20Sopenharmony_ci	if (inode->i_state & I_LRU_ISOLATING) {
4798c2ecf20Sopenharmony_ci		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
4808c2ecf20Sopenharmony_ci		wait_queue_head_t *wqh;
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci		wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
4838c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
4848c2ecf20Sopenharmony_ci		__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
4858c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
4868c2ecf20Sopenharmony_ci		WARN_ON(inode->i_state & I_LRU_ISOLATING);
4878c2ecf20Sopenharmony_ci	}
4888c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
4898c2ecf20Sopenharmony_ci}
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci/**
4928c2ecf20Sopenharmony_ci * inode_sb_list_add - add inode to the superblock list of inodes
4938c2ecf20Sopenharmony_ci * @inode: inode to add
4948c2ecf20Sopenharmony_ci */
4958c2ecf20Sopenharmony_civoid inode_sb_list_add(struct inode *inode)
4968c2ecf20Sopenharmony_ci{
4978c2ecf20Sopenharmony_ci	spin_lock(&inode->i_sb->s_inode_list_lock);
4988c2ecf20Sopenharmony_ci	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
4998c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_sb->s_inode_list_lock);
5008c2ecf20Sopenharmony_ci}
5018c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(inode_sb_list_add);
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_cistatic inline void inode_sb_list_del(struct inode *inode)
5048c2ecf20Sopenharmony_ci{
5058c2ecf20Sopenharmony_ci	if (!list_empty(&inode->i_sb_list)) {
5068c2ecf20Sopenharmony_ci		spin_lock(&inode->i_sb->s_inode_list_lock);
5078c2ecf20Sopenharmony_ci		list_del_init(&inode->i_sb_list);
5088c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_sb->s_inode_list_lock);
5098c2ecf20Sopenharmony_ci	}
5108c2ecf20Sopenharmony_ci}
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_cistatic unsigned long hash(struct super_block *sb, unsigned long hashval)
5138c2ecf20Sopenharmony_ci{
5148c2ecf20Sopenharmony_ci	unsigned long tmp;
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
5178c2ecf20Sopenharmony_ci			L1_CACHE_BYTES;
5188c2ecf20Sopenharmony_ci	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
5198c2ecf20Sopenharmony_ci	return tmp & i_hash_mask;
5208c2ecf20Sopenharmony_ci}
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci/**
5238c2ecf20Sopenharmony_ci *	__insert_inode_hash - hash an inode
5248c2ecf20Sopenharmony_ci *	@inode: unhashed inode
5258c2ecf20Sopenharmony_ci *	@hashval: unsigned long value used to locate this object in the
5268c2ecf20Sopenharmony_ci *		inode_hashtable.
5278c2ecf20Sopenharmony_ci *
5288c2ecf20Sopenharmony_ci *	Add an inode to the inode hash for this superblock.
5298c2ecf20Sopenharmony_ci */
5308c2ecf20Sopenharmony_civoid __insert_inode_hash(struct inode *inode, unsigned long hashval)
5318c2ecf20Sopenharmony_ci{
5328c2ecf20Sopenharmony_ci	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
5358c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
5368c2ecf20Sopenharmony_ci	hlist_add_head_rcu(&inode->i_hash, b);
5378c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
5388c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
5398c2ecf20Sopenharmony_ci}
5408c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__insert_inode_hash);
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci/**
5438c2ecf20Sopenharmony_ci *	__remove_inode_hash - remove an inode from the hash
5448c2ecf20Sopenharmony_ci *	@inode: inode to unhash
5458c2ecf20Sopenharmony_ci *
5468c2ecf20Sopenharmony_ci *	Remove an inode from the superblock.
5478c2ecf20Sopenharmony_ci */
5488c2ecf20Sopenharmony_civoid __remove_inode_hash(struct inode *inode)
5498c2ecf20Sopenharmony_ci{
5508c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
5518c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
5528c2ecf20Sopenharmony_ci	hlist_del_init_rcu(&inode->i_hash);
5538c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
5548c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
5558c2ecf20Sopenharmony_ci}
5568c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__remove_inode_hash);
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_civoid clear_inode(struct inode *inode)
5598c2ecf20Sopenharmony_ci{
5608c2ecf20Sopenharmony_ci	/*
5618c2ecf20Sopenharmony_ci	 * We have to cycle the i_pages lock here because reclaim can be in the
5628c2ecf20Sopenharmony_ci	 * process of removing the last page (in __delete_from_page_cache())
5638c2ecf20Sopenharmony_ci	 * and we must not free the mapping under it.
5648c2ecf20Sopenharmony_ci	 */
5658c2ecf20Sopenharmony_ci	xa_lock_irq(&inode->i_data.i_pages);
5668c2ecf20Sopenharmony_ci	BUG_ON(inode->i_data.nrpages);
5678c2ecf20Sopenharmony_ci	BUG_ON(inode->i_data.nrexceptional);
5688c2ecf20Sopenharmony_ci	xa_unlock_irq(&inode->i_data.i_pages);
5698c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_data.private_list));
5708c2ecf20Sopenharmony_ci	BUG_ON(!(inode->i_state & I_FREEING));
5718c2ecf20Sopenharmony_ci	BUG_ON(inode->i_state & I_CLEAR);
5728c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_wb_list));
5738c2ecf20Sopenharmony_ci	/* don't need i_lock here, no concurrent mods to i_state */
5748c2ecf20Sopenharmony_ci	inode->i_state = I_FREEING | I_CLEAR;
5758c2ecf20Sopenharmony_ci}
5768c2ecf20Sopenharmony_ciEXPORT_SYMBOL(clear_inode);
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci/*
5798c2ecf20Sopenharmony_ci * Free the inode passed in, removing it from the lists it is still connected
5808c2ecf20Sopenharmony_ci * to. We remove any pages still attached to the inode and wait for any IO that
5818c2ecf20Sopenharmony_ci * is still in progress before finally destroying the inode.
5828c2ecf20Sopenharmony_ci *
5838c2ecf20Sopenharmony_ci * An inode must already be marked I_FREEING so that we avoid the inode being
5848c2ecf20Sopenharmony_ci * moved back onto lists if we race with other code that manipulates the lists
5858c2ecf20Sopenharmony_ci * (e.g. writeback_single_inode). The caller is responsible for setting this.
5868c2ecf20Sopenharmony_ci *
5878c2ecf20Sopenharmony_ci * An inode must already be removed from the LRU list before being evicted from
5888c2ecf20Sopenharmony_ci * the cache. This should occur atomically with setting the I_FREEING state
5898c2ecf20Sopenharmony_ci * flag, so no inodes here should ever be on the LRU when being evicted.
5908c2ecf20Sopenharmony_ci */
5918c2ecf20Sopenharmony_cistatic void evict(struct inode *inode)
5928c2ecf20Sopenharmony_ci{
5938c2ecf20Sopenharmony_ci	const struct super_operations *op = inode->i_sb->s_op;
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	BUG_ON(!(inode->i_state & I_FREEING));
5968c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&inode->i_lru));
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	if (!list_empty(&inode->i_io_list))
5998c2ecf20Sopenharmony_ci		inode_io_list_del(inode);
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	inode_sb_list_del(inode);
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	inode_wait_for_lru_isolating(inode);
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	/*
6068c2ecf20Sopenharmony_ci	 * Wait for flusher thread to be done with the inode so that filesystem
6078c2ecf20Sopenharmony_ci	 * does not start destroying it while writeback is still running. Since
6088c2ecf20Sopenharmony_ci	 * the inode has I_FREEING set, flusher thread won't start new work on
6098c2ecf20Sopenharmony_ci	 * the inode.  We just have to wait for running writeback to finish.
6108c2ecf20Sopenharmony_ci	 */
6118c2ecf20Sopenharmony_ci	inode_wait_for_writeback(inode);
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci	if (op->evict_inode) {
6148c2ecf20Sopenharmony_ci		op->evict_inode(inode);
6158c2ecf20Sopenharmony_ci	} else {
6168c2ecf20Sopenharmony_ci		truncate_inode_pages_final(&inode->i_data);
6178c2ecf20Sopenharmony_ci		clear_inode(inode);
6188c2ecf20Sopenharmony_ci	}
6198c2ecf20Sopenharmony_ci	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
6208c2ecf20Sopenharmony_ci		bd_forget(inode);
6218c2ecf20Sopenharmony_ci	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
6228c2ecf20Sopenharmony_ci		cd_forget(inode);
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	remove_inode_hash(inode);
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
6278c2ecf20Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_NEW);
6288c2ecf20Sopenharmony_ci	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
6298c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	destroy_inode(inode);
6328c2ecf20Sopenharmony_ci}
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci/*
6358c2ecf20Sopenharmony_ci * dispose_list - dispose of the contents of a local list
6368c2ecf20Sopenharmony_ci * @head: the head of the list to free
6378c2ecf20Sopenharmony_ci *
6388c2ecf20Sopenharmony_ci * Dispose-list gets a local list with local inodes in it, so it doesn't
6398c2ecf20Sopenharmony_ci * need to worry about list corruption and SMP locks.
6408c2ecf20Sopenharmony_ci */
6418c2ecf20Sopenharmony_cistatic void dispose_list(struct list_head *head)
6428c2ecf20Sopenharmony_ci{
6438c2ecf20Sopenharmony_ci	while (!list_empty(head)) {
6448c2ecf20Sopenharmony_ci		struct inode *inode;
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci		inode = list_first_entry(head, struct inode, i_lru);
6478c2ecf20Sopenharmony_ci		list_del_init(&inode->i_lru);
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci		evict(inode);
6508c2ecf20Sopenharmony_ci		cond_resched();
6518c2ecf20Sopenharmony_ci	}
6528c2ecf20Sopenharmony_ci}
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci/**
6558c2ecf20Sopenharmony_ci * evict_inodes	- evict all evictable inodes for a superblock
6568c2ecf20Sopenharmony_ci * @sb:		superblock to operate on
6578c2ecf20Sopenharmony_ci *
6588c2ecf20Sopenharmony_ci * Make sure that no inodes with zero refcount are retained.  This is
6598c2ecf20Sopenharmony_ci * called by superblock shutdown after having SB_ACTIVE flag removed,
6608c2ecf20Sopenharmony_ci * so any inode reaching zero refcount during or after that call will
6618c2ecf20Sopenharmony_ci * be immediately evicted.
6628c2ecf20Sopenharmony_ci */
6638c2ecf20Sopenharmony_civoid evict_inodes(struct super_block *sb)
6648c2ecf20Sopenharmony_ci{
6658c2ecf20Sopenharmony_ci	struct inode *inode, *next;
6668c2ecf20Sopenharmony_ci	LIST_HEAD(dispose);
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ciagain:
6698c2ecf20Sopenharmony_ci	spin_lock(&sb->s_inode_list_lock);
6708c2ecf20Sopenharmony_ci	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
6718c2ecf20Sopenharmony_ci		if (atomic_read(&inode->i_count))
6728c2ecf20Sopenharmony_ci			continue;
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
6758c2ecf20Sopenharmony_ci		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
6768c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
6778c2ecf20Sopenharmony_ci			continue;
6788c2ecf20Sopenharmony_ci		}
6798c2ecf20Sopenharmony_ci
6808c2ecf20Sopenharmony_ci		inode->i_state |= I_FREEING;
6818c2ecf20Sopenharmony_ci		inode_lru_list_del(inode);
6828c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
6838c2ecf20Sopenharmony_ci		list_add(&inode->i_lru, &dispose);
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci		/*
6868c2ecf20Sopenharmony_ci		 * We can have a ton of inodes to evict at unmount time given
6878c2ecf20Sopenharmony_ci		 * enough memory, check to see if we need to go to sleep for a
6888c2ecf20Sopenharmony_ci		 * bit so we don't livelock.
6898c2ecf20Sopenharmony_ci		 */
6908c2ecf20Sopenharmony_ci		if (need_resched()) {
6918c2ecf20Sopenharmony_ci			spin_unlock(&sb->s_inode_list_lock);
6928c2ecf20Sopenharmony_ci			cond_resched();
6938c2ecf20Sopenharmony_ci			dispose_list(&dispose);
6948c2ecf20Sopenharmony_ci			goto again;
6958c2ecf20Sopenharmony_ci		}
6968c2ecf20Sopenharmony_ci	}
6978c2ecf20Sopenharmony_ci	spin_unlock(&sb->s_inode_list_lock);
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	dispose_list(&dispose);
7008c2ecf20Sopenharmony_ci}
7018c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(evict_inodes);
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci/**
7048c2ecf20Sopenharmony_ci * invalidate_inodes	- attempt to free all inodes on a superblock
7058c2ecf20Sopenharmony_ci * @sb:		superblock to operate on
7068c2ecf20Sopenharmony_ci * @kill_dirty: flag to guide handling of dirty inodes
7078c2ecf20Sopenharmony_ci *
7088c2ecf20Sopenharmony_ci * Attempts to free all inodes for a given superblock.  If there were any
7098c2ecf20Sopenharmony_ci * busy inodes return a non-zero value, else zero.
7108c2ecf20Sopenharmony_ci * If @kill_dirty is set, discard dirty inodes too, otherwise treat
7118c2ecf20Sopenharmony_ci * them as busy.
7128c2ecf20Sopenharmony_ci */
7138c2ecf20Sopenharmony_ciint invalidate_inodes(struct super_block *sb, bool kill_dirty)
7148c2ecf20Sopenharmony_ci{
7158c2ecf20Sopenharmony_ci	int busy = 0;
7168c2ecf20Sopenharmony_ci	struct inode *inode, *next;
7178c2ecf20Sopenharmony_ci	LIST_HEAD(dispose);
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ciagain:
7208c2ecf20Sopenharmony_ci	spin_lock(&sb->s_inode_list_lock);
7218c2ecf20Sopenharmony_ci	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
7228c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
7238c2ecf20Sopenharmony_ci		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
7248c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
7258c2ecf20Sopenharmony_ci			continue;
7268c2ecf20Sopenharmony_ci		}
7278c2ecf20Sopenharmony_ci		if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
7288c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
7298c2ecf20Sopenharmony_ci			busy = 1;
7308c2ecf20Sopenharmony_ci			continue;
7318c2ecf20Sopenharmony_ci		}
7328c2ecf20Sopenharmony_ci		if (atomic_read(&inode->i_count)) {
7338c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
7348c2ecf20Sopenharmony_ci			busy = 1;
7358c2ecf20Sopenharmony_ci			continue;
7368c2ecf20Sopenharmony_ci		}
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci		inode->i_state |= I_FREEING;
7398c2ecf20Sopenharmony_ci		inode_lru_list_del(inode);
7408c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
7418c2ecf20Sopenharmony_ci		list_add(&inode->i_lru, &dispose);
7428c2ecf20Sopenharmony_ci		if (need_resched()) {
7438c2ecf20Sopenharmony_ci			spin_unlock(&sb->s_inode_list_lock);
7448c2ecf20Sopenharmony_ci			cond_resched();
7458c2ecf20Sopenharmony_ci			dispose_list(&dispose);
7468c2ecf20Sopenharmony_ci			goto again;
7478c2ecf20Sopenharmony_ci		}
7488c2ecf20Sopenharmony_ci	}
7498c2ecf20Sopenharmony_ci	spin_unlock(&sb->s_inode_list_lock);
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci	dispose_list(&dispose);
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci	return busy;
7548c2ecf20Sopenharmony_ci}
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_ci/*
7578c2ecf20Sopenharmony_ci * Isolate the inode from the LRU in preparation for freeing it.
7588c2ecf20Sopenharmony_ci *
7598c2ecf20Sopenharmony_ci * Any inodes which are pinned purely because of attached pagecache have their
7608c2ecf20Sopenharmony_ci * pagecache removed.  If the inode has metadata buffers attached to
7618c2ecf20Sopenharmony_ci * mapping->private_list then try to remove them.
7628c2ecf20Sopenharmony_ci *
7638c2ecf20Sopenharmony_ci * If the inode has the I_REFERENCED flag set, then it means that it has been
7648c2ecf20Sopenharmony_ci * used recently - the flag is set in iput_final(). When we encounter such an
7658c2ecf20Sopenharmony_ci * inode, clear the flag and move it to the back of the LRU so it gets another
7668c2ecf20Sopenharmony_ci * pass through the LRU before it gets reclaimed. This is necessary because of
7678c2ecf20Sopenharmony_ci * the fact we are doing lazy LRU updates to minimise lock contention so the
7688c2ecf20Sopenharmony_ci * LRU does not have strict ordering. Hence we don't want to reclaim inodes
7698c2ecf20Sopenharmony_ci * with this flag set because they are the inodes that are out of order.
7708c2ecf20Sopenharmony_ci */
7718c2ecf20Sopenharmony_cistatic enum lru_status inode_lru_isolate(struct list_head *item,
7728c2ecf20Sopenharmony_ci		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
7738c2ecf20Sopenharmony_ci{
7748c2ecf20Sopenharmony_ci	struct list_head *freeable = arg;
7758c2ecf20Sopenharmony_ci	struct inode	*inode = container_of(item, struct inode, i_lru);
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci	/*
7788c2ecf20Sopenharmony_ci	 * we are inverting the lru lock/inode->i_lock here, so use a trylock.
7798c2ecf20Sopenharmony_ci	 * If we fail to get the lock, just skip it.
7808c2ecf20Sopenharmony_ci	 */
7818c2ecf20Sopenharmony_ci	if (!spin_trylock(&inode->i_lock))
7828c2ecf20Sopenharmony_ci		return LRU_SKIP;
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	/*
7858c2ecf20Sopenharmony_ci	 * Referenced or dirty inodes are still in use. Give them another pass
7868c2ecf20Sopenharmony_ci	 * through the LRU as we canot reclaim them now.
7878c2ecf20Sopenharmony_ci	 */
7888c2ecf20Sopenharmony_ci	if (atomic_read(&inode->i_count) ||
7898c2ecf20Sopenharmony_ci	    (inode->i_state & ~I_REFERENCED)) {
7908c2ecf20Sopenharmony_ci		list_lru_isolate(lru, &inode->i_lru);
7918c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
7928c2ecf20Sopenharmony_ci		this_cpu_dec(nr_unused);
7938c2ecf20Sopenharmony_ci		return LRU_REMOVED;
7948c2ecf20Sopenharmony_ci	}
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	/* recently referenced inodes get one more pass */
7978c2ecf20Sopenharmony_ci	if (inode->i_state & I_REFERENCED) {
7988c2ecf20Sopenharmony_ci		inode->i_state &= ~I_REFERENCED;
7998c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
8008c2ecf20Sopenharmony_ci		return LRU_ROTATE;
8018c2ecf20Sopenharmony_ci	}
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci	if (inode_has_buffers(inode) || inode->i_data.nrpages) {
8048c2ecf20Sopenharmony_ci		inode_pin_lru_isolating(inode);
8058c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
8068c2ecf20Sopenharmony_ci		spin_unlock(lru_lock);
8078c2ecf20Sopenharmony_ci		if (remove_inode_buffers(inode)) {
8088c2ecf20Sopenharmony_ci			unsigned long reap;
8098c2ecf20Sopenharmony_ci			reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
8108c2ecf20Sopenharmony_ci			if (current_is_kswapd())
8118c2ecf20Sopenharmony_ci				__count_vm_events(KSWAPD_INODESTEAL, reap);
8128c2ecf20Sopenharmony_ci			else
8138c2ecf20Sopenharmony_ci				__count_vm_events(PGINODESTEAL, reap);
8148c2ecf20Sopenharmony_ci			if (current->reclaim_state)
8158c2ecf20Sopenharmony_ci				current->reclaim_state->reclaimed_slab += reap;
8168c2ecf20Sopenharmony_ci		}
8178c2ecf20Sopenharmony_ci		inode_unpin_lru_isolating(inode);
8188c2ecf20Sopenharmony_ci		spin_lock(lru_lock);
8198c2ecf20Sopenharmony_ci		return LRU_RETRY;
8208c2ecf20Sopenharmony_ci	}
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci	WARN_ON(inode->i_state & I_NEW);
8238c2ecf20Sopenharmony_ci	inode->i_state |= I_FREEING;
8248c2ecf20Sopenharmony_ci	list_lru_isolate_move(lru, &inode->i_lru, freeable);
8258c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci	this_cpu_dec(nr_unused);
8288c2ecf20Sopenharmony_ci	return LRU_REMOVED;
8298c2ecf20Sopenharmony_ci}
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci/*
8328c2ecf20Sopenharmony_ci * Walk the superblock inode LRU for freeable inodes and attempt to free them.
8338c2ecf20Sopenharmony_ci * This is called from the superblock shrinker function with a number of inodes
8348c2ecf20Sopenharmony_ci * to trim from the LRU. Inodes to be freed are moved to a temporary list and
8358c2ecf20Sopenharmony_ci * then are freed outside inode_lock by dispose_list().
8368c2ecf20Sopenharmony_ci */
8378c2ecf20Sopenharmony_cilong prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
8388c2ecf20Sopenharmony_ci{
8398c2ecf20Sopenharmony_ci	LIST_HEAD(freeable);
8408c2ecf20Sopenharmony_ci	long freed;
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ci	freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
8438c2ecf20Sopenharmony_ci				     inode_lru_isolate, &freeable);
8448c2ecf20Sopenharmony_ci	dispose_list(&freeable);
8458c2ecf20Sopenharmony_ci	return freed;
8468c2ecf20Sopenharmony_ci}
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_cistatic void __wait_on_freeing_inode(struct inode *inode);
8498c2ecf20Sopenharmony_ci/*
8508c2ecf20Sopenharmony_ci * Called with the inode lock held.
8518c2ecf20Sopenharmony_ci */
8528c2ecf20Sopenharmony_cistatic struct inode *find_inode(struct super_block *sb,
8538c2ecf20Sopenharmony_ci				struct hlist_head *head,
8548c2ecf20Sopenharmony_ci				int (*test)(struct inode *, void *),
8558c2ecf20Sopenharmony_ci				void *data)
8568c2ecf20Sopenharmony_ci{
8578c2ecf20Sopenharmony_ci	struct inode *inode = NULL;
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_cirepeat:
8608c2ecf20Sopenharmony_ci	hlist_for_each_entry(inode, head, i_hash) {
8618c2ecf20Sopenharmony_ci		if (inode->i_sb != sb)
8628c2ecf20Sopenharmony_ci			continue;
8638c2ecf20Sopenharmony_ci		if (!test(inode, data))
8648c2ecf20Sopenharmony_ci			continue;
8658c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
8668c2ecf20Sopenharmony_ci		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
8678c2ecf20Sopenharmony_ci			__wait_on_freeing_inode(inode);
8688c2ecf20Sopenharmony_ci			goto repeat;
8698c2ecf20Sopenharmony_ci		}
8708c2ecf20Sopenharmony_ci		if (unlikely(inode->i_state & I_CREATING)) {
8718c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
8728c2ecf20Sopenharmony_ci			return ERR_PTR(-ESTALE);
8738c2ecf20Sopenharmony_ci		}
8748c2ecf20Sopenharmony_ci		__iget(inode);
8758c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
8768c2ecf20Sopenharmony_ci		return inode;
8778c2ecf20Sopenharmony_ci	}
8788c2ecf20Sopenharmony_ci	return NULL;
8798c2ecf20Sopenharmony_ci}
8808c2ecf20Sopenharmony_ci
8818c2ecf20Sopenharmony_ci/*
8828c2ecf20Sopenharmony_ci * find_inode_fast is the fast path version of find_inode, see the comment at
8838c2ecf20Sopenharmony_ci * iget_locked for details.
8848c2ecf20Sopenharmony_ci */
8858c2ecf20Sopenharmony_cistatic struct inode *find_inode_fast(struct super_block *sb,
8868c2ecf20Sopenharmony_ci				struct hlist_head *head, unsigned long ino)
8878c2ecf20Sopenharmony_ci{
8888c2ecf20Sopenharmony_ci	struct inode *inode = NULL;
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_cirepeat:
8918c2ecf20Sopenharmony_ci	hlist_for_each_entry(inode, head, i_hash) {
8928c2ecf20Sopenharmony_ci		if (inode->i_ino != ino)
8938c2ecf20Sopenharmony_ci			continue;
8948c2ecf20Sopenharmony_ci		if (inode->i_sb != sb)
8958c2ecf20Sopenharmony_ci			continue;
8968c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
8978c2ecf20Sopenharmony_ci		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
8988c2ecf20Sopenharmony_ci			__wait_on_freeing_inode(inode);
8998c2ecf20Sopenharmony_ci			goto repeat;
9008c2ecf20Sopenharmony_ci		}
9018c2ecf20Sopenharmony_ci		if (unlikely(inode->i_state & I_CREATING)) {
9028c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
9038c2ecf20Sopenharmony_ci			return ERR_PTR(-ESTALE);
9048c2ecf20Sopenharmony_ci		}
9058c2ecf20Sopenharmony_ci		__iget(inode);
9068c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
9078c2ecf20Sopenharmony_ci		return inode;
9088c2ecf20Sopenharmony_ci	}
9098c2ecf20Sopenharmony_ci	return NULL;
9108c2ecf20Sopenharmony_ci}
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci/*
9138c2ecf20Sopenharmony_ci * Each cpu owns a range of LAST_INO_BATCH numbers.
9148c2ecf20Sopenharmony_ci * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
9158c2ecf20Sopenharmony_ci * to renew the exhausted range.
9168c2ecf20Sopenharmony_ci *
9178c2ecf20Sopenharmony_ci * This does not significantly increase overflow rate because every CPU can
9188c2ecf20Sopenharmony_ci * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
9198c2ecf20Sopenharmony_ci * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
9208c2ecf20Sopenharmony_ci * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
9218c2ecf20Sopenharmony_ci * overflow rate by 2x, which does not seem too significant.
9228c2ecf20Sopenharmony_ci *
9238c2ecf20Sopenharmony_ci * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
9248c2ecf20Sopenharmony_ci * error if st_ino won't fit in target struct field. Use 32bit counter
9258c2ecf20Sopenharmony_ci * here to attempt to avoid that.
9268c2ecf20Sopenharmony_ci */
9278c2ecf20Sopenharmony_ci#define LAST_INO_BATCH 1024
9288c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(unsigned int, last_ino);
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ciunsigned int get_next_ino(void)
9318c2ecf20Sopenharmony_ci{
9328c2ecf20Sopenharmony_ci	unsigned int *p = &get_cpu_var(last_ino);
9338c2ecf20Sopenharmony_ci	unsigned int res = *p;
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
9368c2ecf20Sopenharmony_ci	if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
9378c2ecf20Sopenharmony_ci		static atomic_t shared_last_ino;
9388c2ecf20Sopenharmony_ci		int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci		res = next - LAST_INO_BATCH;
9418c2ecf20Sopenharmony_ci	}
9428c2ecf20Sopenharmony_ci#endif
9438c2ecf20Sopenharmony_ci
9448c2ecf20Sopenharmony_ci	res++;
9458c2ecf20Sopenharmony_ci	/* get_next_ino should not provide a 0 inode number */
9468c2ecf20Sopenharmony_ci	if (unlikely(!res))
9478c2ecf20Sopenharmony_ci		res++;
9488c2ecf20Sopenharmony_ci	*p = res;
9498c2ecf20Sopenharmony_ci	put_cpu_var(last_ino);
9508c2ecf20Sopenharmony_ci	return res;
9518c2ecf20Sopenharmony_ci}
9528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_next_ino);
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ci/**
9558c2ecf20Sopenharmony_ci *	new_inode_pseudo 	- obtain an inode
9568c2ecf20Sopenharmony_ci *	@sb: superblock
9578c2ecf20Sopenharmony_ci *
9588c2ecf20Sopenharmony_ci *	Allocates a new inode for given superblock.
9598c2ecf20Sopenharmony_ci *	Inode wont be chained in superblock s_inodes list
9608c2ecf20Sopenharmony_ci *	This means :
9618c2ecf20Sopenharmony_ci *	- fs can't be unmount
9628c2ecf20Sopenharmony_ci *	- quotas, fsnotify, writeback can't work
9638c2ecf20Sopenharmony_ci */
9648c2ecf20Sopenharmony_cistruct inode *new_inode_pseudo(struct super_block *sb)
9658c2ecf20Sopenharmony_ci{
9668c2ecf20Sopenharmony_ci	struct inode *inode = alloc_inode(sb);
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci	if (inode) {
9698c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
9708c2ecf20Sopenharmony_ci		inode->i_state = 0;
9718c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
9728c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&inode->i_sb_list);
9738c2ecf20Sopenharmony_ci	}
9748c2ecf20Sopenharmony_ci	return inode;
9758c2ecf20Sopenharmony_ci}
9768c2ecf20Sopenharmony_ci
9778c2ecf20Sopenharmony_ci/**
9788c2ecf20Sopenharmony_ci *	new_inode 	- obtain an inode
9798c2ecf20Sopenharmony_ci *	@sb: superblock
9808c2ecf20Sopenharmony_ci *
9818c2ecf20Sopenharmony_ci *	Allocates a new inode for given superblock. The default gfp_mask
9828c2ecf20Sopenharmony_ci *	for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
9838c2ecf20Sopenharmony_ci *	If HIGHMEM pages are unsuitable or it is known that pages allocated
9848c2ecf20Sopenharmony_ci *	for the page cache are not reclaimable or migratable,
9858c2ecf20Sopenharmony_ci *	mapping_set_gfp_mask() must be called with suitable flags on the
9868c2ecf20Sopenharmony_ci *	newly created inode's mapping
9878c2ecf20Sopenharmony_ci *
9888c2ecf20Sopenharmony_ci */
9898c2ecf20Sopenharmony_cistruct inode *new_inode(struct super_block *sb)
9908c2ecf20Sopenharmony_ci{
9918c2ecf20Sopenharmony_ci	struct inode *inode;
9928c2ecf20Sopenharmony_ci
9938c2ecf20Sopenharmony_ci	spin_lock_prefetch(&sb->s_inode_list_lock);
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci	inode = new_inode_pseudo(sb);
9968c2ecf20Sopenharmony_ci	if (inode)
9978c2ecf20Sopenharmony_ci		inode_sb_list_add(inode);
9988c2ecf20Sopenharmony_ci	return inode;
9998c2ecf20Sopenharmony_ci}
10008c2ecf20Sopenharmony_ciEXPORT_SYMBOL(new_inode);
10018c2ecf20Sopenharmony_ci
10028c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
10038c2ecf20Sopenharmony_civoid lockdep_annotate_inode_mutex_key(struct inode *inode)
10048c2ecf20Sopenharmony_ci{
10058c2ecf20Sopenharmony_ci	if (S_ISDIR(inode->i_mode)) {
10068c2ecf20Sopenharmony_ci		struct file_system_type *type = inode->i_sb->s_type;
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci		/* Set new key only if filesystem hasn't already changed it */
10098c2ecf20Sopenharmony_ci		if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
10108c2ecf20Sopenharmony_ci			/*
10118c2ecf20Sopenharmony_ci			 * ensure nobody is actually holding i_mutex
10128c2ecf20Sopenharmony_ci			 */
10138c2ecf20Sopenharmony_ci			// mutex_destroy(&inode->i_mutex);
10148c2ecf20Sopenharmony_ci			init_rwsem(&inode->i_rwsem);
10158c2ecf20Sopenharmony_ci			lockdep_set_class(&inode->i_rwsem,
10168c2ecf20Sopenharmony_ci					  &type->i_mutex_dir_key);
10178c2ecf20Sopenharmony_ci		}
10188c2ecf20Sopenharmony_ci	}
10198c2ecf20Sopenharmony_ci}
10208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
10218c2ecf20Sopenharmony_ci#endif
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci/**
10248c2ecf20Sopenharmony_ci * unlock_new_inode - clear the I_NEW state and wake up any waiters
10258c2ecf20Sopenharmony_ci * @inode:	new inode to unlock
10268c2ecf20Sopenharmony_ci *
10278c2ecf20Sopenharmony_ci * Called when the inode is fully initialised to clear the new state of the
10288c2ecf20Sopenharmony_ci * inode and wake up anyone waiting for the inode to finish initialisation.
10298c2ecf20Sopenharmony_ci */
10308c2ecf20Sopenharmony_civoid unlock_new_inode(struct inode *inode)
10318c2ecf20Sopenharmony_ci{
10328c2ecf20Sopenharmony_ci	lockdep_annotate_inode_mutex_key(inode);
10338c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
10348c2ecf20Sopenharmony_ci	WARN_ON(!(inode->i_state & I_NEW));
10358c2ecf20Sopenharmony_ci	inode->i_state &= ~I_NEW & ~I_CREATING;
10368c2ecf20Sopenharmony_ci	smp_mb();
10378c2ecf20Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_NEW);
10388c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
10398c2ecf20Sopenharmony_ci}
10408c2ecf20Sopenharmony_ciEXPORT_SYMBOL(unlock_new_inode);
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_civoid discard_new_inode(struct inode *inode)
10438c2ecf20Sopenharmony_ci{
10448c2ecf20Sopenharmony_ci	lockdep_annotate_inode_mutex_key(inode);
10458c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
10468c2ecf20Sopenharmony_ci	WARN_ON(!(inode->i_state & I_NEW));
10478c2ecf20Sopenharmony_ci	inode->i_state &= ~I_NEW;
10488c2ecf20Sopenharmony_ci	smp_mb();
10498c2ecf20Sopenharmony_ci	wake_up_bit(&inode->i_state, __I_NEW);
10508c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
10518c2ecf20Sopenharmony_ci	iput(inode);
10528c2ecf20Sopenharmony_ci}
10538c2ecf20Sopenharmony_ciEXPORT_SYMBOL(discard_new_inode);
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci/**
10568c2ecf20Sopenharmony_ci * lock_two_inodes - lock two inodes (may be regular files but also dirs)
10578c2ecf20Sopenharmony_ci *
10588c2ecf20Sopenharmony_ci * Lock any non-NULL argument. The caller must make sure that if he is passing
10598c2ecf20Sopenharmony_ci * in two directories, one is not ancestor of the other.  Zero, one or two
10608c2ecf20Sopenharmony_ci * objects may be locked by this function.
10618c2ecf20Sopenharmony_ci *
10628c2ecf20Sopenharmony_ci * @inode1: first inode to lock
10638c2ecf20Sopenharmony_ci * @inode2: second inode to lock
10648c2ecf20Sopenharmony_ci * @subclass1: inode lock subclass for the first lock obtained
10658c2ecf20Sopenharmony_ci * @subclass2: inode lock subclass for the second lock obtained
10668c2ecf20Sopenharmony_ci */
10678c2ecf20Sopenharmony_civoid lock_two_inodes(struct inode *inode1, struct inode *inode2,
10688c2ecf20Sopenharmony_ci		     unsigned subclass1, unsigned subclass2)
10698c2ecf20Sopenharmony_ci{
10708c2ecf20Sopenharmony_ci	if (!inode1 || !inode2) {
10718c2ecf20Sopenharmony_ci		/*
10728c2ecf20Sopenharmony_ci		 * Make sure @subclass1 will be used for the acquired lock.
10738c2ecf20Sopenharmony_ci		 * This is not strictly necessary (no current caller cares) but
10748c2ecf20Sopenharmony_ci		 * let's keep things consistent.
10758c2ecf20Sopenharmony_ci		 */
10768c2ecf20Sopenharmony_ci		if (!inode1)
10778c2ecf20Sopenharmony_ci			swap(inode1, inode2);
10788c2ecf20Sopenharmony_ci		goto lock;
10798c2ecf20Sopenharmony_ci	}
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci	/*
10828c2ecf20Sopenharmony_ci	 * If one object is directory and the other is not, we must make sure
10838c2ecf20Sopenharmony_ci	 * to lock directory first as the other object may be its child.
10848c2ecf20Sopenharmony_ci	 */
10858c2ecf20Sopenharmony_ci	if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
10868c2ecf20Sopenharmony_ci		if (inode1 > inode2)
10878c2ecf20Sopenharmony_ci			swap(inode1, inode2);
10888c2ecf20Sopenharmony_ci	} else if (!S_ISDIR(inode1->i_mode))
10898c2ecf20Sopenharmony_ci		swap(inode1, inode2);
10908c2ecf20Sopenharmony_cilock:
10918c2ecf20Sopenharmony_ci	if (inode1)
10928c2ecf20Sopenharmony_ci		inode_lock_nested(inode1, subclass1);
10938c2ecf20Sopenharmony_ci	if (inode2 && inode2 != inode1)
10948c2ecf20Sopenharmony_ci		inode_lock_nested(inode2, subclass2);
10958c2ecf20Sopenharmony_ci}
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci/**
10988c2ecf20Sopenharmony_ci * lock_two_nondirectories - take two i_mutexes on non-directory objects
10998c2ecf20Sopenharmony_ci *
11008c2ecf20Sopenharmony_ci * Lock any non-NULL argument that is not a directory.
11018c2ecf20Sopenharmony_ci * Zero, one or two objects may be locked by this function.
11028c2ecf20Sopenharmony_ci *
11038c2ecf20Sopenharmony_ci * @inode1: first inode to lock
11048c2ecf20Sopenharmony_ci * @inode2: second inode to lock
11058c2ecf20Sopenharmony_ci */
11068c2ecf20Sopenharmony_civoid lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
11078c2ecf20Sopenharmony_ci{
11088c2ecf20Sopenharmony_ci	if (inode1 > inode2)
11098c2ecf20Sopenharmony_ci		swap(inode1, inode2);
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci	if (inode1 && !S_ISDIR(inode1->i_mode))
11128c2ecf20Sopenharmony_ci		inode_lock(inode1);
11138c2ecf20Sopenharmony_ci	if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
11148c2ecf20Sopenharmony_ci		inode_lock_nested(inode2, I_MUTEX_NONDIR2);
11158c2ecf20Sopenharmony_ci}
11168c2ecf20Sopenharmony_ciEXPORT_SYMBOL(lock_two_nondirectories);
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ci/**
11198c2ecf20Sopenharmony_ci * unlock_two_nondirectories - release locks from lock_two_nondirectories()
11208c2ecf20Sopenharmony_ci * @inode1: first inode to unlock
11218c2ecf20Sopenharmony_ci * @inode2: second inode to unlock
11228c2ecf20Sopenharmony_ci */
11238c2ecf20Sopenharmony_civoid unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
11248c2ecf20Sopenharmony_ci{
11258c2ecf20Sopenharmony_ci	if (inode1 && !S_ISDIR(inode1->i_mode))
11268c2ecf20Sopenharmony_ci		inode_unlock(inode1);
11278c2ecf20Sopenharmony_ci	if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
11288c2ecf20Sopenharmony_ci		inode_unlock(inode2);
11298c2ecf20Sopenharmony_ci}
11308c2ecf20Sopenharmony_ciEXPORT_SYMBOL(unlock_two_nondirectories);
11318c2ecf20Sopenharmony_ci
11328c2ecf20Sopenharmony_ci/**
11338c2ecf20Sopenharmony_ci * inode_insert5 - obtain an inode from a mounted file system
11348c2ecf20Sopenharmony_ci * @inode:	pre-allocated inode to use for insert to cache
11358c2ecf20Sopenharmony_ci * @hashval:	hash value (usually inode number) to get
11368c2ecf20Sopenharmony_ci * @test:	callback used for comparisons between inodes
11378c2ecf20Sopenharmony_ci * @set:	callback used to initialize a new struct inode
11388c2ecf20Sopenharmony_ci * @data:	opaque data pointer to pass to @test and @set
11398c2ecf20Sopenharmony_ci *
11408c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
11418c2ecf20Sopenharmony_ci * and if present it is return it with an increased reference count. This is
11428c2ecf20Sopenharmony_ci * a variant of iget5_locked() for callers that don't want to fail on memory
11438c2ecf20Sopenharmony_ci * allocation of inode.
11448c2ecf20Sopenharmony_ci *
11458c2ecf20Sopenharmony_ci * If the inode is not in cache, insert the pre-allocated inode to cache and
11468c2ecf20Sopenharmony_ci * return it locked, hashed, and with the I_NEW flag set. The file system gets
11478c2ecf20Sopenharmony_ci * to fill it in before unlocking it via unlock_new_inode().
11488c2ecf20Sopenharmony_ci *
11498c2ecf20Sopenharmony_ci * Note both @test and @set are called with the inode_hash_lock held, so can't
11508c2ecf20Sopenharmony_ci * sleep.
11518c2ecf20Sopenharmony_ci */
11528c2ecf20Sopenharmony_cistruct inode *inode_insert5(struct inode *inode, unsigned long hashval,
11538c2ecf20Sopenharmony_ci			    int (*test)(struct inode *, void *),
11548c2ecf20Sopenharmony_ci			    int (*set)(struct inode *, void *), void *data)
11558c2ecf20Sopenharmony_ci{
11568c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
11578c2ecf20Sopenharmony_ci	struct inode *old;
11588c2ecf20Sopenharmony_ci	bool creating = inode->i_state & I_CREATING;
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_ciagain:
11618c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
11628c2ecf20Sopenharmony_ci	old = find_inode(inode->i_sb, head, test, data);
11638c2ecf20Sopenharmony_ci	if (unlikely(old)) {
11648c2ecf20Sopenharmony_ci		/*
11658c2ecf20Sopenharmony_ci		 * Uhhuh, somebody else created the same inode under us.
11668c2ecf20Sopenharmony_ci		 * Use the old inode instead of the preallocated one.
11678c2ecf20Sopenharmony_ci		 */
11688c2ecf20Sopenharmony_ci		spin_unlock(&inode_hash_lock);
11698c2ecf20Sopenharmony_ci		if (IS_ERR(old))
11708c2ecf20Sopenharmony_ci			return NULL;
11718c2ecf20Sopenharmony_ci		wait_on_inode(old);
11728c2ecf20Sopenharmony_ci		if (unlikely(inode_unhashed(old))) {
11738c2ecf20Sopenharmony_ci			iput(old);
11748c2ecf20Sopenharmony_ci			goto again;
11758c2ecf20Sopenharmony_ci		}
11768c2ecf20Sopenharmony_ci		return old;
11778c2ecf20Sopenharmony_ci	}
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ci	if (set && unlikely(set(inode, data))) {
11808c2ecf20Sopenharmony_ci		inode = NULL;
11818c2ecf20Sopenharmony_ci		goto unlock;
11828c2ecf20Sopenharmony_ci	}
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci	/*
11858c2ecf20Sopenharmony_ci	 * Return the locked inode with I_NEW set, the
11868c2ecf20Sopenharmony_ci	 * caller is responsible for filling in the contents
11878c2ecf20Sopenharmony_ci	 */
11888c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
11898c2ecf20Sopenharmony_ci	inode->i_state |= I_NEW;
11908c2ecf20Sopenharmony_ci	hlist_add_head_rcu(&inode->i_hash, head);
11918c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
11928c2ecf20Sopenharmony_ci	if (!creating)
11938c2ecf20Sopenharmony_ci		inode_sb_list_add(inode);
11948c2ecf20Sopenharmony_ciunlock:
11958c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci	return inode;
11988c2ecf20Sopenharmony_ci}
11998c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_insert5);
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci/**
12028c2ecf20Sopenharmony_ci * iget5_locked - obtain an inode from a mounted file system
12038c2ecf20Sopenharmony_ci * @sb:		super block of file system
12048c2ecf20Sopenharmony_ci * @hashval:	hash value (usually inode number) to get
12058c2ecf20Sopenharmony_ci * @test:	callback used for comparisons between inodes
12068c2ecf20Sopenharmony_ci * @set:	callback used to initialize a new struct inode
12078c2ecf20Sopenharmony_ci * @data:	opaque data pointer to pass to @test and @set
12088c2ecf20Sopenharmony_ci *
12098c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
12108c2ecf20Sopenharmony_ci * and if present it is return it with an increased reference count. This is
12118c2ecf20Sopenharmony_ci * a generalized version of iget_locked() for file systems where the inode
12128c2ecf20Sopenharmony_ci * number is not sufficient for unique identification of an inode.
12138c2ecf20Sopenharmony_ci *
12148c2ecf20Sopenharmony_ci * If the inode is not in cache, allocate a new inode and return it locked,
12158c2ecf20Sopenharmony_ci * hashed, and with the I_NEW flag set. The file system gets to fill it in
12168c2ecf20Sopenharmony_ci * before unlocking it via unlock_new_inode().
12178c2ecf20Sopenharmony_ci *
12188c2ecf20Sopenharmony_ci * Note both @test and @set are called with the inode_hash_lock held, so can't
12198c2ecf20Sopenharmony_ci * sleep.
12208c2ecf20Sopenharmony_ci */
12218c2ecf20Sopenharmony_cistruct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
12228c2ecf20Sopenharmony_ci		int (*test)(struct inode *, void *),
12238c2ecf20Sopenharmony_ci		int (*set)(struct inode *, void *), void *data)
12248c2ecf20Sopenharmony_ci{
12258c2ecf20Sopenharmony_ci	struct inode *inode = ilookup5(sb, hashval, test, data);
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci	if (!inode) {
12288c2ecf20Sopenharmony_ci		struct inode *new = alloc_inode(sb);
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_ci		if (new) {
12318c2ecf20Sopenharmony_ci			new->i_state = 0;
12328c2ecf20Sopenharmony_ci			inode = inode_insert5(new, hashval, test, set, data);
12338c2ecf20Sopenharmony_ci			if (unlikely(inode != new))
12348c2ecf20Sopenharmony_ci				destroy_inode(new);
12358c2ecf20Sopenharmony_ci		}
12368c2ecf20Sopenharmony_ci	}
12378c2ecf20Sopenharmony_ci	return inode;
12388c2ecf20Sopenharmony_ci}
12398c2ecf20Sopenharmony_ciEXPORT_SYMBOL(iget5_locked);
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_ci/**
12428c2ecf20Sopenharmony_ci * iget_locked - obtain an inode from a mounted file system
12438c2ecf20Sopenharmony_ci * @sb:		super block of file system
12448c2ecf20Sopenharmony_ci * @ino:	inode number to get
12458c2ecf20Sopenharmony_ci *
12468c2ecf20Sopenharmony_ci * Search for the inode specified by @ino in the inode cache and if present
12478c2ecf20Sopenharmony_ci * return it with an increased reference count. This is for file systems
12488c2ecf20Sopenharmony_ci * where the inode number is sufficient for unique identification of an inode.
12498c2ecf20Sopenharmony_ci *
12508c2ecf20Sopenharmony_ci * If the inode is not in cache, allocate a new inode and return it locked,
12518c2ecf20Sopenharmony_ci * hashed, and with the I_NEW flag set.  The file system gets to fill it in
12528c2ecf20Sopenharmony_ci * before unlocking it via unlock_new_inode().
12538c2ecf20Sopenharmony_ci */
12548c2ecf20Sopenharmony_cistruct inode *iget_locked(struct super_block *sb, unsigned long ino)
12558c2ecf20Sopenharmony_ci{
12568c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
12578c2ecf20Sopenharmony_ci	struct inode *inode;
12588c2ecf20Sopenharmony_ciagain:
12598c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
12608c2ecf20Sopenharmony_ci	inode = find_inode_fast(sb, head, ino);
12618c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
12628c2ecf20Sopenharmony_ci	if (inode) {
12638c2ecf20Sopenharmony_ci		if (IS_ERR(inode))
12648c2ecf20Sopenharmony_ci			return NULL;
12658c2ecf20Sopenharmony_ci		wait_on_inode(inode);
12668c2ecf20Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
12678c2ecf20Sopenharmony_ci			iput(inode);
12688c2ecf20Sopenharmony_ci			goto again;
12698c2ecf20Sopenharmony_ci		}
12708c2ecf20Sopenharmony_ci		return inode;
12718c2ecf20Sopenharmony_ci	}
12728c2ecf20Sopenharmony_ci
12738c2ecf20Sopenharmony_ci	inode = alloc_inode(sb);
12748c2ecf20Sopenharmony_ci	if (inode) {
12758c2ecf20Sopenharmony_ci		struct inode *old;
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci		spin_lock(&inode_hash_lock);
12788c2ecf20Sopenharmony_ci		/* We released the lock, so.. */
12798c2ecf20Sopenharmony_ci		old = find_inode_fast(sb, head, ino);
12808c2ecf20Sopenharmony_ci		if (!old) {
12818c2ecf20Sopenharmony_ci			inode->i_ino = ino;
12828c2ecf20Sopenharmony_ci			spin_lock(&inode->i_lock);
12838c2ecf20Sopenharmony_ci			inode->i_state = I_NEW;
12848c2ecf20Sopenharmony_ci			hlist_add_head_rcu(&inode->i_hash, head);
12858c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
12868c2ecf20Sopenharmony_ci			inode_sb_list_add(inode);
12878c2ecf20Sopenharmony_ci			spin_unlock(&inode_hash_lock);
12888c2ecf20Sopenharmony_ci
12898c2ecf20Sopenharmony_ci			/* Return the locked inode with I_NEW set, the
12908c2ecf20Sopenharmony_ci			 * caller is responsible for filling in the contents
12918c2ecf20Sopenharmony_ci			 */
12928c2ecf20Sopenharmony_ci			return inode;
12938c2ecf20Sopenharmony_ci		}
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci		/*
12968c2ecf20Sopenharmony_ci		 * Uhhuh, somebody else created the same inode under
12978c2ecf20Sopenharmony_ci		 * us. Use the old inode instead of the one we just
12988c2ecf20Sopenharmony_ci		 * allocated.
12998c2ecf20Sopenharmony_ci		 */
13008c2ecf20Sopenharmony_ci		spin_unlock(&inode_hash_lock);
13018c2ecf20Sopenharmony_ci		destroy_inode(inode);
13028c2ecf20Sopenharmony_ci		if (IS_ERR(old))
13038c2ecf20Sopenharmony_ci			return NULL;
13048c2ecf20Sopenharmony_ci		inode = old;
13058c2ecf20Sopenharmony_ci		wait_on_inode(inode);
13068c2ecf20Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
13078c2ecf20Sopenharmony_ci			iput(inode);
13088c2ecf20Sopenharmony_ci			goto again;
13098c2ecf20Sopenharmony_ci		}
13108c2ecf20Sopenharmony_ci	}
13118c2ecf20Sopenharmony_ci	return inode;
13128c2ecf20Sopenharmony_ci}
13138c2ecf20Sopenharmony_ciEXPORT_SYMBOL(iget_locked);
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci/*
13168c2ecf20Sopenharmony_ci * search the inode cache for a matching inode number.
13178c2ecf20Sopenharmony_ci * If we find one, then the inode number we are trying to
13188c2ecf20Sopenharmony_ci * allocate is not unique and so we should not use it.
13198c2ecf20Sopenharmony_ci *
13208c2ecf20Sopenharmony_ci * Returns 1 if the inode number is unique, 0 if it is not.
13218c2ecf20Sopenharmony_ci */
13228c2ecf20Sopenharmony_cistatic int test_inode_iunique(struct super_block *sb, unsigned long ino)
13238c2ecf20Sopenharmony_ci{
13248c2ecf20Sopenharmony_ci	struct hlist_head *b = inode_hashtable + hash(sb, ino);
13258c2ecf20Sopenharmony_ci	struct inode *inode;
13268c2ecf20Sopenharmony_ci
13278c2ecf20Sopenharmony_ci	hlist_for_each_entry_rcu(inode, b, i_hash) {
13288c2ecf20Sopenharmony_ci		if (inode->i_ino == ino && inode->i_sb == sb)
13298c2ecf20Sopenharmony_ci			return 0;
13308c2ecf20Sopenharmony_ci	}
13318c2ecf20Sopenharmony_ci	return 1;
13328c2ecf20Sopenharmony_ci}
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci/**
13358c2ecf20Sopenharmony_ci *	iunique - get a unique inode number
13368c2ecf20Sopenharmony_ci *	@sb: superblock
13378c2ecf20Sopenharmony_ci *	@max_reserved: highest reserved inode number
13388c2ecf20Sopenharmony_ci *
13398c2ecf20Sopenharmony_ci *	Obtain an inode number that is unique on the system for a given
13408c2ecf20Sopenharmony_ci *	superblock. This is used by file systems that have no natural
13418c2ecf20Sopenharmony_ci *	permanent inode numbering system. An inode number is returned that
13428c2ecf20Sopenharmony_ci *	is higher than the reserved limit but unique.
13438c2ecf20Sopenharmony_ci *
13448c2ecf20Sopenharmony_ci *	BUGS:
13458c2ecf20Sopenharmony_ci *	With a large number of inodes live on the file system this function
13468c2ecf20Sopenharmony_ci *	currently becomes quite slow.
13478c2ecf20Sopenharmony_ci */
13488c2ecf20Sopenharmony_ciino_t iunique(struct super_block *sb, ino_t max_reserved)
13498c2ecf20Sopenharmony_ci{
13508c2ecf20Sopenharmony_ci	/*
13518c2ecf20Sopenharmony_ci	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
13528c2ecf20Sopenharmony_ci	 * error if st_ino won't fit in target struct field. Use 32bit counter
13538c2ecf20Sopenharmony_ci	 * here to attempt to avoid that.
13548c2ecf20Sopenharmony_ci	 */
13558c2ecf20Sopenharmony_ci	static DEFINE_SPINLOCK(iunique_lock);
13568c2ecf20Sopenharmony_ci	static unsigned int counter;
13578c2ecf20Sopenharmony_ci	ino_t res;
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_ci	rcu_read_lock();
13608c2ecf20Sopenharmony_ci	spin_lock(&iunique_lock);
13618c2ecf20Sopenharmony_ci	do {
13628c2ecf20Sopenharmony_ci		if (counter <= max_reserved)
13638c2ecf20Sopenharmony_ci			counter = max_reserved + 1;
13648c2ecf20Sopenharmony_ci		res = counter++;
13658c2ecf20Sopenharmony_ci	} while (!test_inode_iunique(sb, res));
13668c2ecf20Sopenharmony_ci	spin_unlock(&iunique_lock);
13678c2ecf20Sopenharmony_ci	rcu_read_unlock();
13688c2ecf20Sopenharmony_ci
13698c2ecf20Sopenharmony_ci	return res;
13708c2ecf20Sopenharmony_ci}
13718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(iunique);
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_cistruct inode *igrab(struct inode *inode)
13748c2ecf20Sopenharmony_ci{
13758c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
13768c2ecf20Sopenharmony_ci	if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
13778c2ecf20Sopenharmony_ci		__iget(inode);
13788c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
13798c2ecf20Sopenharmony_ci	} else {
13808c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
13818c2ecf20Sopenharmony_ci		/*
13828c2ecf20Sopenharmony_ci		 * Handle the case where s_op->clear_inode is not been
13838c2ecf20Sopenharmony_ci		 * called yet, and somebody is calling igrab
13848c2ecf20Sopenharmony_ci		 * while the inode is getting freed.
13858c2ecf20Sopenharmony_ci		 */
13868c2ecf20Sopenharmony_ci		inode = NULL;
13878c2ecf20Sopenharmony_ci	}
13888c2ecf20Sopenharmony_ci	return inode;
13898c2ecf20Sopenharmony_ci}
13908c2ecf20Sopenharmony_ciEXPORT_SYMBOL(igrab);
13918c2ecf20Sopenharmony_ci
13928c2ecf20Sopenharmony_ci/**
13938c2ecf20Sopenharmony_ci * ilookup5_nowait - search for an inode in the inode cache
13948c2ecf20Sopenharmony_ci * @sb:		super block of file system to search
13958c2ecf20Sopenharmony_ci * @hashval:	hash value (usually inode number) to search for
13968c2ecf20Sopenharmony_ci * @test:	callback used for comparisons between inodes
13978c2ecf20Sopenharmony_ci * @data:	opaque data pointer to pass to @test
13988c2ecf20Sopenharmony_ci *
13998c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache.
14008c2ecf20Sopenharmony_ci * If the inode is in the cache, the inode is returned with an incremented
14018c2ecf20Sopenharmony_ci * reference count.
14028c2ecf20Sopenharmony_ci *
14038c2ecf20Sopenharmony_ci * Note: I_NEW is not waited upon so you have to be very careful what you do
14048c2ecf20Sopenharmony_ci * with the returned inode.  You probably should be using ilookup5() instead.
14058c2ecf20Sopenharmony_ci *
14068c2ecf20Sopenharmony_ci * Note2: @test is called with the inode_hash_lock held, so can't sleep.
14078c2ecf20Sopenharmony_ci */
14088c2ecf20Sopenharmony_cistruct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
14098c2ecf20Sopenharmony_ci		int (*test)(struct inode *, void *), void *data)
14108c2ecf20Sopenharmony_ci{
14118c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
14128c2ecf20Sopenharmony_ci	struct inode *inode;
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
14158c2ecf20Sopenharmony_ci	inode = find_inode(sb, head, test, data);
14168c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
14178c2ecf20Sopenharmony_ci
14188c2ecf20Sopenharmony_ci	return IS_ERR(inode) ? NULL : inode;
14198c2ecf20Sopenharmony_ci}
14208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ilookup5_nowait);
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci/**
14238c2ecf20Sopenharmony_ci * ilookup5 - search for an inode in the inode cache
14248c2ecf20Sopenharmony_ci * @sb:		super block of file system to search
14258c2ecf20Sopenharmony_ci * @hashval:	hash value (usually inode number) to search for
14268c2ecf20Sopenharmony_ci * @test:	callback used for comparisons between inodes
14278c2ecf20Sopenharmony_ci * @data:	opaque data pointer to pass to @test
14288c2ecf20Sopenharmony_ci *
14298c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
14308c2ecf20Sopenharmony_ci * and if the inode is in the cache, return the inode with an incremented
14318c2ecf20Sopenharmony_ci * reference count.  Waits on I_NEW before returning the inode.
14328c2ecf20Sopenharmony_ci * returned with an incremented reference count.
14338c2ecf20Sopenharmony_ci *
14348c2ecf20Sopenharmony_ci * This is a generalized version of ilookup() for file systems where the
14358c2ecf20Sopenharmony_ci * inode number is not sufficient for unique identification of an inode.
14368c2ecf20Sopenharmony_ci *
14378c2ecf20Sopenharmony_ci * Note: @test is called with the inode_hash_lock held, so can't sleep.
14388c2ecf20Sopenharmony_ci */
14398c2ecf20Sopenharmony_cistruct inode *ilookup5(struct super_block *sb, unsigned long hashval,
14408c2ecf20Sopenharmony_ci		int (*test)(struct inode *, void *), void *data)
14418c2ecf20Sopenharmony_ci{
14428c2ecf20Sopenharmony_ci	struct inode *inode;
14438c2ecf20Sopenharmony_ciagain:
14448c2ecf20Sopenharmony_ci	inode = ilookup5_nowait(sb, hashval, test, data);
14458c2ecf20Sopenharmony_ci	if (inode) {
14468c2ecf20Sopenharmony_ci		wait_on_inode(inode);
14478c2ecf20Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
14488c2ecf20Sopenharmony_ci			iput(inode);
14498c2ecf20Sopenharmony_ci			goto again;
14508c2ecf20Sopenharmony_ci		}
14518c2ecf20Sopenharmony_ci	}
14528c2ecf20Sopenharmony_ci	return inode;
14538c2ecf20Sopenharmony_ci}
14548c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ilookup5);
14558c2ecf20Sopenharmony_ci
14568c2ecf20Sopenharmony_ci/**
14578c2ecf20Sopenharmony_ci * ilookup - search for an inode in the inode cache
14588c2ecf20Sopenharmony_ci * @sb:		super block of file system to search
14598c2ecf20Sopenharmony_ci * @ino:	inode number to search for
14608c2ecf20Sopenharmony_ci *
14618c2ecf20Sopenharmony_ci * Search for the inode @ino in the inode cache, and if the inode is in the
14628c2ecf20Sopenharmony_ci * cache, the inode is returned with an incremented reference count.
14638c2ecf20Sopenharmony_ci */
14648c2ecf20Sopenharmony_cistruct inode *ilookup(struct super_block *sb, unsigned long ino)
14658c2ecf20Sopenharmony_ci{
14668c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
14678c2ecf20Sopenharmony_ci	struct inode *inode;
14688c2ecf20Sopenharmony_ciagain:
14698c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
14708c2ecf20Sopenharmony_ci	inode = find_inode_fast(sb, head, ino);
14718c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci	if (inode) {
14748c2ecf20Sopenharmony_ci		if (IS_ERR(inode))
14758c2ecf20Sopenharmony_ci			return NULL;
14768c2ecf20Sopenharmony_ci		wait_on_inode(inode);
14778c2ecf20Sopenharmony_ci		if (unlikely(inode_unhashed(inode))) {
14788c2ecf20Sopenharmony_ci			iput(inode);
14798c2ecf20Sopenharmony_ci			goto again;
14808c2ecf20Sopenharmony_ci		}
14818c2ecf20Sopenharmony_ci	}
14828c2ecf20Sopenharmony_ci	return inode;
14838c2ecf20Sopenharmony_ci}
14848c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ilookup);
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci/**
14878c2ecf20Sopenharmony_ci * find_inode_nowait - find an inode in the inode cache
14888c2ecf20Sopenharmony_ci * @sb:		super block of file system to search
14898c2ecf20Sopenharmony_ci * @hashval:	hash value (usually inode number) to search for
14908c2ecf20Sopenharmony_ci * @match:	callback used for comparisons between inodes
14918c2ecf20Sopenharmony_ci * @data:	opaque data pointer to pass to @match
14928c2ecf20Sopenharmony_ci *
14938c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode
14948c2ecf20Sopenharmony_ci * cache, where the helper function @match will return 0 if the inode
14958c2ecf20Sopenharmony_ci * does not match, 1 if the inode does match, and -1 if the search
14968c2ecf20Sopenharmony_ci * should be stopped.  The @match function must be responsible for
14978c2ecf20Sopenharmony_ci * taking the i_lock spin_lock and checking i_state for an inode being
14988c2ecf20Sopenharmony_ci * freed or being initialized, and incrementing the reference count
14998c2ecf20Sopenharmony_ci * before returning 1.  It also must not sleep, since it is called with
15008c2ecf20Sopenharmony_ci * the inode_hash_lock spinlock held.
15018c2ecf20Sopenharmony_ci *
15028c2ecf20Sopenharmony_ci * This is a even more generalized version of ilookup5() when the
15038c2ecf20Sopenharmony_ci * function must never block --- find_inode() can block in
15048c2ecf20Sopenharmony_ci * __wait_on_freeing_inode() --- or when the caller can not increment
15058c2ecf20Sopenharmony_ci * the reference count because the resulting iput() might cause an
15068c2ecf20Sopenharmony_ci * inode eviction.  The tradeoff is that the @match funtion must be
15078c2ecf20Sopenharmony_ci * very carefully implemented.
15088c2ecf20Sopenharmony_ci */
15098c2ecf20Sopenharmony_cistruct inode *find_inode_nowait(struct super_block *sb,
15108c2ecf20Sopenharmony_ci				unsigned long hashval,
15118c2ecf20Sopenharmony_ci				int (*match)(struct inode *, unsigned long,
15128c2ecf20Sopenharmony_ci					     void *),
15138c2ecf20Sopenharmony_ci				void *data)
15148c2ecf20Sopenharmony_ci{
15158c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
15168c2ecf20Sopenharmony_ci	struct inode *inode, *ret_inode = NULL;
15178c2ecf20Sopenharmony_ci	int mval;
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
15208c2ecf20Sopenharmony_ci	hlist_for_each_entry(inode, head, i_hash) {
15218c2ecf20Sopenharmony_ci		if (inode->i_sb != sb)
15228c2ecf20Sopenharmony_ci			continue;
15238c2ecf20Sopenharmony_ci		mval = match(inode, hashval, data);
15248c2ecf20Sopenharmony_ci		if (mval == 0)
15258c2ecf20Sopenharmony_ci			continue;
15268c2ecf20Sopenharmony_ci		if (mval == 1)
15278c2ecf20Sopenharmony_ci			ret_inode = inode;
15288c2ecf20Sopenharmony_ci		goto out;
15298c2ecf20Sopenharmony_ci	}
15308c2ecf20Sopenharmony_ciout:
15318c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
15328c2ecf20Sopenharmony_ci	return ret_inode;
15338c2ecf20Sopenharmony_ci}
15348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(find_inode_nowait);
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci/**
15378c2ecf20Sopenharmony_ci * find_inode_rcu - find an inode in the inode cache
15388c2ecf20Sopenharmony_ci * @sb:		Super block of file system to search
15398c2ecf20Sopenharmony_ci * @hashval:	Key to hash
15408c2ecf20Sopenharmony_ci * @test:	Function to test match on an inode
15418c2ecf20Sopenharmony_ci * @data:	Data for test function
15428c2ecf20Sopenharmony_ci *
15438c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
15448c2ecf20Sopenharmony_ci * where the helper function @test will return 0 if the inode does not match
15458c2ecf20Sopenharmony_ci * and 1 if it does.  The @test function must be responsible for taking the
15468c2ecf20Sopenharmony_ci * i_lock spin_lock and checking i_state for an inode being freed or being
15478c2ecf20Sopenharmony_ci * initialized.
15488c2ecf20Sopenharmony_ci *
15498c2ecf20Sopenharmony_ci * If successful, this will return the inode for which the @test function
15508c2ecf20Sopenharmony_ci * returned 1 and NULL otherwise.
15518c2ecf20Sopenharmony_ci *
15528c2ecf20Sopenharmony_ci * The @test function is not permitted to take a ref on any inode presented.
15538c2ecf20Sopenharmony_ci * It is also not permitted to sleep.
15548c2ecf20Sopenharmony_ci *
15558c2ecf20Sopenharmony_ci * The caller must hold the RCU read lock.
15568c2ecf20Sopenharmony_ci */
15578c2ecf20Sopenharmony_cistruct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
15588c2ecf20Sopenharmony_ci			     int (*test)(struct inode *, void *), void *data)
15598c2ecf20Sopenharmony_ci{
15608c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
15618c2ecf20Sopenharmony_ci	struct inode *inode;
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
15648c2ecf20Sopenharmony_ci			 "suspicious find_inode_rcu() usage");
15658c2ecf20Sopenharmony_ci
15668c2ecf20Sopenharmony_ci	hlist_for_each_entry_rcu(inode, head, i_hash) {
15678c2ecf20Sopenharmony_ci		if (inode->i_sb == sb &&
15688c2ecf20Sopenharmony_ci		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
15698c2ecf20Sopenharmony_ci		    test(inode, data))
15708c2ecf20Sopenharmony_ci			return inode;
15718c2ecf20Sopenharmony_ci	}
15728c2ecf20Sopenharmony_ci	return NULL;
15738c2ecf20Sopenharmony_ci}
15748c2ecf20Sopenharmony_ciEXPORT_SYMBOL(find_inode_rcu);
15758c2ecf20Sopenharmony_ci
15768c2ecf20Sopenharmony_ci/**
15778c2ecf20Sopenharmony_ci * find_inode_by_rcu - Find an inode in the inode cache
15788c2ecf20Sopenharmony_ci * @sb:		Super block of file system to search
15798c2ecf20Sopenharmony_ci * @ino:	The inode number to match
15808c2ecf20Sopenharmony_ci *
15818c2ecf20Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache,
15828c2ecf20Sopenharmony_ci * where the helper function @test will return 0 if the inode does not match
15838c2ecf20Sopenharmony_ci * and 1 if it does.  The @test function must be responsible for taking the
15848c2ecf20Sopenharmony_ci * i_lock spin_lock and checking i_state for an inode being freed or being
15858c2ecf20Sopenharmony_ci * initialized.
15868c2ecf20Sopenharmony_ci *
15878c2ecf20Sopenharmony_ci * If successful, this will return the inode for which the @test function
15888c2ecf20Sopenharmony_ci * returned 1 and NULL otherwise.
15898c2ecf20Sopenharmony_ci *
15908c2ecf20Sopenharmony_ci * The @test function is not permitted to take a ref on any inode presented.
15918c2ecf20Sopenharmony_ci * It is also not permitted to sleep.
15928c2ecf20Sopenharmony_ci *
15938c2ecf20Sopenharmony_ci * The caller must hold the RCU read lock.
15948c2ecf20Sopenharmony_ci */
15958c2ecf20Sopenharmony_cistruct inode *find_inode_by_ino_rcu(struct super_block *sb,
15968c2ecf20Sopenharmony_ci				    unsigned long ino)
15978c2ecf20Sopenharmony_ci{
15988c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
15998c2ecf20Sopenharmony_ci	struct inode *inode;
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16028c2ecf20Sopenharmony_ci			 "suspicious find_inode_by_ino_rcu() usage");
16038c2ecf20Sopenharmony_ci
16048c2ecf20Sopenharmony_ci	hlist_for_each_entry_rcu(inode, head, i_hash) {
16058c2ecf20Sopenharmony_ci		if (inode->i_ino == ino &&
16068c2ecf20Sopenharmony_ci		    inode->i_sb == sb &&
16078c2ecf20Sopenharmony_ci		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
16088c2ecf20Sopenharmony_ci		    return inode;
16098c2ecf20Sopenharmony_ci	}
16108c2ecf20Sopenharmony_ci	return NULL;
16118c2ecf20Sopenharmony_ci}
16128c2ecf20Sopenharmony_ciEXPORT_SYMBOL(find_inode_by_ino_rcu);
16138c2ecf20Sopenharmony_ci
16148c2ecf20Sopenharmony_ciint insert_inode_locked(struct inode *inode)
16158c2ecf20Sopenharmony_ci{
16168c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
16178c2ecf20Sopenharmony_ci	ino_t ino = inode->i_ino;
16188c2ecf20Sopenharmony_ci	struct hlist_head *head = inode_hashtable + hash(sb, ino);
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	while (1) {
16218c2ecf20Sopenharmony_ci		struct inode *old = NULL;
16228c2ecf20Sopenharmony_ci		spin_lock(&inode_hash_lock);
16238c2ecf20Sopenharmony_ci		hlist_for_each_entry(old, head, i_hash) {
16248c2ecf20Sopenharmony_ci			if (old->i_ino != ino)
16258c2ecf20Sopenharmony_ci				continue;
16268c2ecf20Sopenharmony_ci			if (old->i_sb != sb)
16278c2ecf20Sopenharmony_ci				continue;
16288c2ecf20Sopenharmony_ci			spin_lock(&old->i_lock);
16298c2ecf20Sopenharmony_ci			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
16308c2ecf20Sopenharmony_ci				spin_unlock(&old->i_lock);
16318c2ecf20Sopenharmony_ci				continue;
16328c2ecf20Sopenharmony_ci			}
16338c2ecf20Sopenharmony_ci			break;
16348c2ecf20Sopenharmony_ci		}
16358c2ecf20Sopenharmony_ci		if (likely(!old)) {
16368c2ecf20Sopenharmony_ci			spin_lock(&inode->i_lock);
16378c2ecf20Sopenharmony_ci			inode->i_state |= I_NEW | I_CREATING;
16388c2ecf20Sopenharmony_ci			hlist_add_head_rcu(&inode->i_hash, head);
16398c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
16408c2ecf20Sopenharmony_ci			spin_unlock(&inode_hash_lock);
16418c2ecf20Sopenharmony_ci			return 0;
16428c2ecf20Sopenharmony_ci		}
16438c2ecf20Sopenharmony_ci		if (unlikely(old->i_state & I_CREATING)) {
16448c2ecf20Sopenharmony_ci			spin_unlock(&old->i_lock);
16458c2ecf20Sopenharmony_ci			spin_unlock(&inode_hash_lock);
16468c2ecf20Sopenharmony_ci			return -EBUSY;
16478c2ecf20Sopenharmony_ci		}
16488c2ecf20Sopenharmony_ci		__iget(old);
16498c2ecf20Sopenharmony_ci		spin_unlock(&old->i_lock);
16508c2ecf20Sopenharmony_ci		spin_unlock(&inode_hash_lock);
16518c2ecf20Sopenharmony_ci		wait_on_inode(old);
16528c2ecf20Sopenharmony_ci		if (unlikely(!inode_unhashed(old))) {
16538c2ecf20Sopenharmony_ci			iput(old);
16548c2ecf20Sopenharmony_ci			return -EBUSY;
16558c2ecf20Sopenharmony_ci		}
16568c2ecf20Sopenharmony_ci		iput(old);
16578c2ecf20Sopenharmony_ci	}
16588c2ecf20Sopenharmony_ci}
16598c2ecf20Sopenharmony_ciEXPORT_SYMBOL(insert_inode_locked);
16608c2ecf20Sopenharmony_ci
16618c2ecf20Sopenharmony_ciint insert_inode_locked4(struct inode *inode, unsigned long hashval,
16628c2ecf20Sopenharmony_ci		int (*test)(struct inode *, void *), void *data)
16638c2ecf20Sopenharmony_ci{
16648c2ecf20Sopenharmony_ci	struct inode *old;
16658c2ecf20Sopenharmony_ci
16668c2ecf20Sopenharmony_ci	inode->i_state |= I_CREATING;
16678c2ecf20Sopenharmony_ci	old = inode_insert5(inode, hashval, test, NULL, data);
16688c2ecf20Sopenharmony_ci
16698c2ecf20Sopenharmony_ci	if (old != inode) {
16708c2ecf20Sopenharmony_ci		iput(old);
16718c2ecf20Sopenharmony_ci		return -EBUSY;
16728c2ecf20Sopenharmony_ci	}
16738c2ecf20Sopenharmony_ci	return 0;
16748c2ecf20Sopenharmony_ci}
16758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(insert_inode_locked4);
16768c2ecf20Sopenharmony_ci
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ciint generic_delete_inode(struct inode *inode)
16798c2ecf20Sopenharmony_ci{
16808c2ecf20Sopenharmony_ci	return 1;
16818c2ecf20Sopenharmony_ci}
16828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_delete_inode);
16838c2ecf20Sopenharmony_ci
16848c2ecf20Sopenharmony_ci/*
16858c2ecf20Sopenharmony_ci * Called when we're dropping the last reference
16868c2ecf20Sopenharmony_ci * to an inode.
16878c2ecf20Sopenharmony_ci *
16888c2ecf20Sopenharmony_ci * Call the FS "drop_inode()" function, defaulting to
16898c2ecf20Sopenharmony_ci * the legacy UNIX filesystem behaviour.  If it tells
16908c2ecf20Sopenharmony_ci * us to evict inode, do so.  Otherwise, retain inode
16918c2ecf20Sopenharmony_ci * in cache if fs is alive, sync and evict if fs is
16928c2ecf20Sopenharmony_ci * shutting down.
16938c2ecf20Sopenharmony_ci */
16948c2ecf20Sopenharmony_cistatic void iput_final(struct inode *inode)
16958c2ecf20Sopenharmony_ci{
16968c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
16978c2ecf20Sopenharmony_ci	const struct super_operations *op = inode->i_sb->s_op;
16988c2ecf20Sopenharmony_ci	unsigned long state;
16998c2ecf20Sopenharmony_ci	int drop;
17008c2ecf20Sopenharmony_ci
17018c2ecf20Sopenharmony_ci	WARN_ON(inode->i_state & I_NEW);
17028c2ecf20Sopenharmony_ci
17038c2ecf20Sopenharmony_ci	if (op->drop_inode)
17048c2ecf20Sopenharmony_ci		drop = op->drop_inode(inode);
17058c2ecf20Sopenharmony_ci	else
17068c2ecf20Sopenharmony_ci		drop = generic_drop_inode(inode);
17078c2ecf20Sopenharmony_ci
17088c2ecf20Sopenharmony_ci	if (!drop &&
17098c2ecf20Sopenharmony_ci	    !(inode->i_state & I_DONTCACHE) &&
17108c2ecf20Sopenharmony_ci	    (sb->s_flags & SB_ACTIVE)) {
17118c2ecf20Sopenharmony_ci		inode_add_lru(inode);
17128c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
17138c2ecf20Sopenharmony_ci		return;
17148c2ecf20Sopenharmony_ci	}
17158c2ecf20Sopenharmony_ci
17168c2ecf20Sopenharmony_ci	state = inode->i_state;
17178c2ecf20Sopenharmony_ci	if (!drop) {
17188c2ecf20Sopenharmony_ci		WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
17198c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
17208c2ecf20Sopenharmony_ci
17218c2ecf20Sopenharmony_ci		write_inode_now(inode, 1);
17228c2ecf20Sopenharmony_ci
17238c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
17248c2ecf20Sopenharmony_ci		state = inode->i_state;
17258c2ecf20Sopenharmony_ci		WARN_ON(state & I_NEW);
17268c2ecf20Sopenharmony_ci		state &= ~I_WILL_FREE;
17278c2ecf20Sopenharmony_ci	}
17288c2ecf20Sopenharmony_ci
17298c2ecf20Sopenharmony_ci	WRITE_ONCE(inode->i_state, state | I_FREEING);
17308c2ecf20Sopenharmony_ci	if (!list_empty(&inode->i_lru))
17318c2ecf20Sopenharmony_ci		inode_lru_list_del(inode);
17328c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
17338c2ecf20Sopenharmony_ci
17348c2ecf20Sopenharmony_ci	evict(inode);
17358c2ecf20Sopenharmony_ci}
17368c2ecf20Sopenharmony_ci
17378c2ecf20Sopenharmony_ci/**
17388c2ecf20Sopenharmony_ci *	iput	- put an inode
17398c2ecf20Sopenharmony_ci *	@inode: inode to put
17408c2ecf20Sopenharmony_ci *
17418c2ecf20Sopenharmony_ci *	Puts an inode, dropping its usage count. If the inode use count hits
17428c2ecf20Sopenharmony_ci *	zero, the inode is then freed and may also be destroyed.
17438c2ecf20Sopenharmony_ci *
17448c2ecf20Sopenharmony_ci *	Consequently, iput() can sleep.
17458c2ecf20Sopenharmony_ci */
17468c2ecf20Sopenharmony_civoid iput(struct inode *inode)
17478c2ecf20Sopenharmony_ci{
17488c2ecf20Sopenharmony_ci	if (!inode)
17498c2ecf20Sopenharmony_ci		return;
17508c2ecf20Sopenharmony_ci	BUG_ON(inode->i_state & I_CLEAR);
17518c2ecf20Sopenharmony_ciretry:
17528c2ecf20Sopenharmony_ci	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
17538c2ecf20Sopenharmony_ci		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
17548c2ecf20Sopenharmony_ci			atomic_inc(&inode->i_count);
17558c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
17568c2ecf20Sopenharmony_ci			trace_writeback_lazytime_iput(inode);
17578c2ecf20Sopenharmony_ci			mark_inode_dirty_sync(inode);
17588c2ecf20Sopenharmony_ci			goto retry;
17598c2ecf20Sopenharmony_ci		}
17608c2ecf20Sopenharmony_ci		iput_final(inode);
17618c2ecf20Sopenharmony_ci	}
17628c2ecf20Sopenharmony_ci}
17638c2ecf20Sopenharmony_ciEXPORT_SYMBOL(iput);
17648c2ecf20Sopenharmony_ci
17658c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
17668c2ecf20Sopenharmony_ci/**
17678c2ecf20Sopenharmony_ci *	bmap	- find a block number in a file
17688c2ecf20Sopenharmony_ci *	@inode:  inode owning the block number being requested
17698c2ecf20Sopenharmony_ci *	@block: pointer containing the block to find
17708c2ecf20Sopenharmony_ci *
17718c2ecf20Sopenharmony_ci *	Replaces the value in ``*block`` with the block number on the device holding
17728c2ecf20Sopenharmony_ci *	corresponding to the requested block number in the file.
17738c2ecf20Sopenharmony_ci *	That is, asked for block 4 of inode 1 the function will replace the
17748c2ecf20Sopenharmony_ci *	4 in ``*block``, with disk block relative to the disk start that holds that
17758c2ecf20Sopenharmony_ci *	block of the file.
17768c2ecf20Sopenharmony_ci *
17778c2ecf20Sopenharmony_ci *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
17788c2ecf20Sopenharmony_ci *	hole, returns 0 and ``*block`` is also set to 0.
17798c2ecf20Sopenharmony_ci */
17808c2ecf20Sopenharmony_ciint bmap(struct inode *inode, sector_t *block)
17818c2ecf20Sopenharmony_ci{
17828c2ecf20Sopenharmony_ci	if (!inode->i_mapping->a_ops->bmap)
17838c2ecf20Sopenharmony_ci		return -EINVAL;
17848c2ecf20Sopenharmony_ci
17858c2ecf20Sopenharmony_ci	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
17868c2ecf20Sopenharmony_ci	return 0;
17878c2ecf20Sopenharmony_ci}
17888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(bmap);
17898c2ecf20Sopenharmony_ci#endif
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_ci/*
17928c2ecf20Sopenharmony_ci * With relative atime, only update atime if the previous atime is
17938c2ecf20Sopenharmony_ci * earlier than either the ctime or mtime or if at least a day has
17948c2ecf20Sopenharmony_ci * passed since the last atime update.
17958c2ecf20Sopenharmony_ci */
17968c2ecf20Sopenharmony_cistatic int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
17978c2ecf20Sopenharmony_ci			     struct timespec64 now)
17988c2ecf20Sopenharmony_ci{
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_ci	if (!(mnt->mnt_flags & MNT_RELATIME))
18018c2ecf20Sopenharmony_ci		return 1;
18028c2ecf20Sopenharmony_ci	/*
18038c2ecf20Sopenharmony_ci	 * Is mtime younger than atime? If yes, update atime:
18048c2ecf20Sopenharmony_ci	 */
18058c2ecf20Sopenharmony_ci	if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
18068c2ecf20Sopenharmony_ci		return 1;
18078c2ecf20Sopenharmony_ci	/*
18088c2ecf20Sopenharmony_ci	 * Is ctime younger than atime? If yes, update atime:
18098c2ecf20Sopenharmony_ci	 */
18108c2ecf20Sopenharmony_ci	if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
18118c2ecf20Sopenharmony_ci		return 1;
18128c2ecf20Sopenharmony_ci
18138c2ecf20Sopenharmony_ci	/*
18148c2ecf20Sopenharmony_ci	 * Is the previous atime value older than a day? If yes,
18158c2ecf20Sopenharmony_ci	 * update atime:
18168c2ecf20Sopenharmony_ci	 */
18178c2ecf20Sopenharmony_ci	if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
18188c2ecf20Sopenharmony_ci		return 1;
18198c2ecf20Sopenharmony_ci	/*
18208c2ecf20Sopenharmony_ci	 * Good, we can skip the atime update:
18218c2ecf20Sopenharmony_ci	 */
18228c2ecf20Sopenharmony_ci	return 0;
18238c2ecf20Sopenharmony_ci}
18248c2ecf20Sopenharmony_ci
18258c2ecf20Sopenharmony_ciint generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
18268c2ecf20Sopenharmony_ci{
18278c2ecf20Sopenharmony_ci	int iflags = I_DIRTY_TIME;
18288c2ecf20Sopenharmony_ci	bool dirty = false;
18298c2ecf20Sopenharmony_ci
18308c2ecf20Sopenharmony_ci	if (flags & S_ATIME)
18318c2ecf20Sopenharmony_ci		inode->i_atime = *time;
18328c2ecf20Sopenharmony_ci	if (flags & S_VERSION)
18338c2ecf20Sopenharmony_ci		dirty = inode_maybe_inc_iversion(inode, false);
18348c2ecf20Sopenharmony_ci	if (flags & S_CTIME)
18358c2ecf20Sopenharmony_ci		inode->i_ctime = *time;
18368c2ecf20Sopenharmony_ci	if (flags & S_MTIME)
18378c2ecf20Sopenharmony_ci		inode->i_mtime = *time;
18388c2ecf20Sopenharmony_ci	if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
18398c2ecf20Sopenharmony_ci	    !(inode->i_sb->s_flags & SB_LAZYTIME))
18408c2ecf20Sopenharmony_ci		dirty = true;
18418c2ecf20Sopenharmony_ci
18428c2ecf20Sopenharmony_ci	if (dirty)
18438c2ecf20Sopenharmony_ci		iflags |= I_DIRTY_SYNC;
18448c2ecf20Sopenharmony_ci	__mark_inode_dirty(inode, iflags);
18458c2ecf20Sopenharmony_ci	return 0;
18468c2ecf20Sopenharmony_ci}
18478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_update_time);
18488c2ecf20Sopenharmony_ci
18498c2ecf20Sopenharmony_ci/*
18508c2ecf20Sopenharmony_ci * This does the actual work of updating an inodes time or version.  Must have
18518c2ecf20Sopenharmony_ci * had called mnt_want_write() before calling this.
18528c2ecf20Sopenharmony_ci */
18538c2ecf20Sopenharmony_ciint inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
18548c2ecf20Sopenharmony_ci{
18558c2ecf20Sopenharmony_ci	if (inode->i_op->update_time)
18568c2ecf20Sopenharmony_ci		return inode->i_op->update_time(inode, time, flags);
18578c2ecf20Sopenharmony_ci	return generic_update_time(inode, time, flags);
18588c2ecf20Sopenharmony_ci}
18598c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_update_time);
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_ci/**
18628c2ecf20Sopenharmony_ci *	touch_atime	-	update the access time
18638c2ecf20Sopenharmony_ci *	@path: the &struct path to update
18648c2ecf20Sopenharmony_ci *	@inode: inode to update
18658c2ecf20Sopenharmony_ci *
18668c2ecf20Sopenharmony_ci *	Update the accessed time on an inode and mark it for writeback.
18678c2ecf20Sopenharmony_ci *	This function automatically handles read only file systems and media,
18688c2ecf20Sopenharmony_ci *	as well as the "noatime" flag and inode specific "noatime" markers.
18698c2ecf20Sopenharmony_ci */
18708c2ecf20Sopenharmony_cibool atime_needs_update(const struct path *path, struct inode *inode)
18718c2ecf20Sopenharmony_ci{
18728c2ecf20Sopenharmony_ci	struct vfsmount *mnt = path->mnt;
18738c2ecf20Sopenharmony_ci	struct timespec64 now;
18748c2ecf20Sopenharmony_ci
18758c2ecf20Sopenharmony_ci	if (inode->i_flags & S_NOATIME)
18768c2ecf20Sopenharmony_ci		return false;
18778c2ecf20Sopenharmony_ci
18788c2ecf20Sopenharmony_ci	/* Atime updates will likely cause i_uid and i_gid to be written
18798c2ecf20Sopenharmony_ci	 * back improprely if their true value is unknown to the vfs.
18808c2ecf20Sopenharmony_ci	 */
18818c2ecf20Sopenharmony_ci	if (HAS_UNMAPPED_ID(inode))
18828c2ecf20Sopenharmony_ci		return false;
18838c2ecf20Sopenharmony_ci
18848c2ecf20Sopenharmony_ci	if (IS_NOATIME(inode))
18858c2ecf20Sopenharmony_ci		return false;
18868c2ecf20Sopenharmony_ci	if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
18878c2ecf20Sopenharmony_ci		return false;
18888c2ecf20Sopenharmony_ci
18898c2ecf20Sopenharmony_ci	if (mnt->mnt_flags & MNT_NOATIME)
18908c2ecf20Sopenharmony_ci		return false;
18918c2ecf20Sopenharmony_ci	if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
18928c2ecf20Sopenharmony_ci		return false;
18938c2ecf20Sopenharmony_ci
18948c2ecf20Sopenharmony_ci	now = current_time(inode);
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci	if (!relatime_need_update(mnt, inode, now))
18978c2ecf20Sopenharmony_ci		return false;
18988c2ecf20Sopenharmony_ci
18998c2ecf20Sopenharmony_ci	if (timespec64_equal(&inode->i_atime, &now))
19008c2ecf20Sopenharmony_ci		return false;
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ci	return true;
19038c2ecf20Sopenharmony_ci}
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_civoid touch_atime(const struct path *path)
19068c2ecf20Sopenharmony_ci{
19078c2ecf20Sopenharmony_ci	struct vfsmount *mnt = path->mnt;
19088c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(path->dentry);
19098c2ecf20Sopenharmony_ci	struct timespec64 now;
19108c2ecf20Sopenharmony_ci
19118c2ecf20Sopenharmony_ci	if (!atime_needs_update(path, inode))
19128c2ecf20Sopenharmony_ci		return;
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_ci	if (!sb_start_write_trylock(inode->i_sb))
19158c2ecf20Sopenharmony_ci		return;
19168c2ecf20Sopenharmony_ci
19178c2ecf20Sopenharmony_ci	if (__mnt_want_write(mnt) != 0)
19188c2ecf20Sopenharmony_ci		goto skip_update;
19198c2ecf20Sopenharmony_ci	/*
19208c2ecf20Sopenharmony_ci	 * File systems can error out when updating inodes if they need to
19218c2ecf20Sopenharmony_ci	 * allocate new space to modify an inode (such is the case for
19228c2ecf20Sopenharmony_ci	 * Btrfs), but since we touch atime while walking down the path we
19238c2ecf20Sopenharmony_ci	 * really don't care if we failed to update the atime of the file,
19248c2ecf20Sopenharmony_ci	 * so just ignore the return value.
19258c2ecf20Sopenharmony_ci	 * We may also fail on filesystems that have the ability to make parts
19268c2ecf20Sopenharmony_ci	 * of the fs read only, e.g. subvolumes in Btrfs.
19278c2ecf20Sopenharmony_ci	 */
19288c2ecf20Sopenharmony_ci	now = current_time(inode);
19298c2ecf20Sopenharmony_ci	inode_update_time(inode, &now, S_ATIME);
19308c2ecf20Sopenharmony_ci	__mnt_drop_write(mnt);
19318c2ecf20Sopenharmony_ciskip_update:
19328c2ecf20Sopenharmony_ci	sb_end_write(inode->i_sb);
19338c2ecf20Sopenharmony_ci}
19348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(touch_atime);
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci/*
19378c2ecf20Sopenharmony_ci * Return mask of changes for notify_change() that need to be done as a
19388c2ecf20Sopenharmony_ci * response to write or truncate. Return 0 if nothing has to be changed.
19398c2ecf20Sopenharmony_ci * Negative value on error (change should be denied).
19408c2ecf20Sopenharmony_ci */
19418c2ecf20Sopenharmony_ciint dentry_needs_remove_privs(struct dentry *dentry)
19428c2ecf20Sopenharmony_ci{
19438c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(dentry);
19448c2ecf20Sopenharmony_ci	int mask = 0;
19458c2ecf20Sopenharmony_ci	int ret;
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci	if (IS_NOSEC(inode))
19488c2ecf20Sopenharmony_ci		return 0;
19498c2ecf20Sopenharmony_ci
19508c2ecf20Sopenharmony_ci	mask = setattr_should_drop_suidgid(inode);
19518c2ecf20Sopenharmony_ci	ret = security_inode_need_killpriv(dentry);
19528c2ecf20Sopenharmony_ci	if (ret < 0)
19538c2ecf20Sopenharmony_ci		return ret;
19548c2ecf20Sopenharmony_ci	if (ret)
19558c2ecf20Sopenharmony_ci		mask |= ATTR_KILL_PRIV;
19568c2ecf20Sopenharmony_ci	return mask;
19578c2ecf20Sopenharmony_ci}
19588c2ecf20Sopenharmony_ci
19598c2ecf20Sopenharmony_cistatic int __remove_privs(struct dentry *dentry, int kill)
19608c2ecf20Sopenharmony_ci{
19618c2ecf20Sopenharmony_ci	struct iattr newattrs;
19628c2ecf20Sopenharmony_ci
19638c2ecf20Sopenharmony_ci	newattrs.ia_valid = ATTR_FORCE | kill;
19648c2ecf20Sopenharmony_ci	/*
19658c2ecf20Sopenharmony_ci	 * Note we call this on write, so notify_change will not
19668c2ecf20Sopenharmony_ci	 * encounter any conflicting delegations:
19678c2ecf20Sopenharmony_ci	 */
19688c2ecf20Sopenharmony_ci	return notify_change(dentry, &newattrs, NULL);
19698c2ecf20Sopenharmony_ci}
19708c2ecf20Sopenharmony_ci
19718c2ecf20Sopenharmony_ci/*
19728c2ecf20Sopenharmony_ci * Remove special file priviledges (suid, capabilities) when file is written
19738c2ecf20Sopenharmony_ci * to or truncated.
19748c2ecf20Sopenharmony_ci */
19758c2ecf20Sopenharmony_ciint file_remove_privs(struct file *file)
19768c2ecf20Sopenharmony_ci{
19778c2ecf20Sopenharmony_ci	struct dentry *dentry = file_dentry(file);
19788c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
19798c2ecf20Sopenharmony_ci	int kill;
19808c2ecf20Sopenharmony_ci	int error = 0;
19818c2ecf20Sopenharmony_ci
19828c2ecf20Sopenharmony_ci	/*
19838c2ecf20Sopenharmony_ci	 * Fast path for nothing security related.
19848c2ecf20Sopenharmony_ci	 * As well for non-regular files, e.g. blkdev inodes.
19858c2ecf20Sopenharmony_ci	 * For example, blkdev_write_iter() might get here
19868c2ecf20Sopenharmony_ci	 * trying to remove privs which it is not allowed to.
19878c2ecf20Sopenharmony_ci	 */
19888c2ecf20Sopenharmony_ci	if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
19898c2ecf20Sopenharmony_ci		return 0;
19908c2ecf20Sopenharmony_ci
19918c2ecf20Sopenharmony_ci	kill = dentry_needs_remove_privs(dentry);
19928c2ecf20Sopenharmony_ci	if (kill < 0)
19938c2ecf20Sopenharmony_ci		return kill;
19948c2ecf20Sopenharmony_ci	if (kill)
19958c2ecf20Sopenharmony_ci		error = __remove_privs(dentry, kill);
19968c2ecf20Sopenharmony_ci	if (!error)
19978c2ecf20Sopenharmony_ci		inode_has_no_xattr(inode);
19988c2ecf20Sopenharmony_ci
19998c2ecf20Sopenharmony_ci	return error;
20008c2ecf20Sopenharmony_ci}
20018c2ecf20Sopenharmony_ciEXPORT_SYMBOL(file_remove_privs);
20028c2ecf20Sopenharmony_ci
20038c2ecf20Sopenharmony_ci/**
20048c2ecf20Sopenharmony_ci *	file_update_time	-	update mtime and ctime time
20058c2ecf20Sopenharmony_ci *	@file: file accessed
20068c2ecf20Sopenharmony_ci *
20078c2ecf20Sopenharmony_ci *	Update the mtime and ctime members of an inode and mark the inode
20088c2ecf20Sopenharmony_ci *	for writeback.  Note that this function is meant exclusively for
20098c2ecf20Sopenharmony_ci *	usage in the file write path of filesystems, and filesystems may
20108c2ecf20Sopenharmony_ci *	choose to explicitly ignore update via this function with the
20118c2ecf20Sopenharmony_ci *	S_NOCMTIME inode flag, e.g. for network filesystem where these
20128c2ecf20Sopenharmony_ci *	timestamps are handled by the server.  This can return an error for
20138c2ecf20Sopenharmony_ci *	file systems who need to allocate space in order to update an inode.
20148c2ecf20Sopenharmony_ci */
20158c2ecf20Sopenharmony_ci
20168c2ecf20Sopenharmony_ciint file_update_time(struct file *file)
20178c2ecf20Sopenharmony_ci{
20188c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
20198c2ecf20Sopenharmony_ci	struct timespec64 now;
20208c2ecf20Sopenharmony_ci	int sync_it = 0;
20218c2ecf20Sopenharmony_ci	int ret;
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci	/* First try to exhaust all avenues to not sync */
20248c2ecf20Sopenharmony_ci	if (IS_NOCMTIME(inode))
20258c2ecf20Sopenharmony_ci		return 0;
20268c2ecf20Sopenharmony_ci
20278c2ecf20Sopenharmony_ci	now = current_time(inode);
20288c2ecf20Sopenharmony_ci	if (!timespec64_equal(&inode->i_mtime, &now))
20298c2ecf20Sopenharmony_ci		sync_it = S_MTIME;
20308c2ecf20Sopenharmony_ci
20318c2ecf20Sopenharmony_ci	if (!timespec64_equal(&inode->i_ctime, &now))
20328c2ecf20Sopenharmony_ci		sync_it |= S_CTIME;
20338c2ecf20Sopenharmony_ci
20348c2ecf20Sopenharmony_ci	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
20358c2ecf20Sopenharmony_ci		sync_it |= S_VERSION;
20368c2ecf20Sopenharmony_ci
20378c2ecf20Sopenharmony_ci	if (!sync_it)
20388c2ecf20Sopenharmony_ci		return 0;
20398c2ecf20Sopenharmony_ci
20408c2ecf20Sopenharmony_ci	/* Finally allowed to write? Takes lock. */
20418c2ecf20Sopenharmony_ci	if (__mnt_want_write_file(file))
20428c2ecf20Sopenharmony_ci		return 0;
20438c2ecf20Sopenharmony_ci
20448c2ecf20Sopenharmony_ci	ret = inode_update_time(inode, &now, sync_it);
20458c2ecf20Sopenharmony_ci	__mnt_drop_write_file(file);
20468c2ecf20Sopenharmony_ci
20478c2ecf20Sopenharmony_ci	return ret;
20488c2ecf20Sopenharmony_ci}
20498c2ecf20Sopenharmony_ciEXPORT_SYMBOL(file_update_time);
20508c2ecf20Sopenharmony_ci
20518c2ecf20Sopenharmony_ci/* Caller must hold the file's inode lock */
20528c2ecf20Sopenharmony_ciint file_modified(struct file *file)
20538c2ecf20Sopenharmony_ci{
20548c2ecf20Sopenharmony_ci	int err;
20558c2ecf20Sopenharmony_ci
20568c2ecf20Sopenharmony_ci	/*
20578c2ecf20Sopenharmony_ci	 * Clear the security bits if the process is not being run by root.
20588c2ecf20Sopenharmony_ci	 * This keeps people from modifying setuid and setgid binaries.
20598c2ecf20Sopenharmony_ci	 */
20608c2ecf20Sopenharmony_ci	err = file_remove_privs(file);
20618c2ecf20Sopenharmony_ci	if (err)
20628c2ecf20Sopenharmony_ci		return err;
20638c2ecf20Sopenharmony_ci
20648c2ecf20Sopenharmony_ci	if (unlikely(file->f_mode & FMODE_NOCMTIME))
20658c2ecf20Sopenharmony_ci		return 0;
20668c2ecf20Sopenharmony_ci
20678c2ecf20Sopenharmony_ci	return file_update_time(file);
20688c2ecf20Sopenharmony_ci}
20698c2ecf20Sopenharmony_ciEXPORT_SYMBOL(file_modified);
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ciint inode_needs_sync(struct inode *inode)
20728c2ecf20Sopenharmony_ci{
20738c2ecf20Sopenharmony_ci	if (IS_SYNC(inode))
20748c2ecf20Sopenharmony_ci		return 1;
20758c2ecf20Sopenharmony_ci	if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
20768c2ecf20Sopenharmony_ci		return 1;
20778c2ecf20Sopenharmony_ci	return 0;
20788c2ecf20Sopenharmony_ci}
20798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_needs_sync);
20808c2ecf20Sopenharmony_ci
20818c2ecf20Sopenharmony_ci/*
20828c2ecf20Sopenharmony_ci * If we try to find an inode in the inode hash while it is being
20838c2ecf20Sopenharmony_ci * deleted, we have to wait until the filesystem completes its
20848c2ecf20Sopenharmony_ci * deletion before reporting that it isn't found.  This function waits
20858c2ecf20Sopenharmony_ci * until the deletion _might_ have completed.  Callers are responsible
20868c2ecf20Sopenharmony_ci * to recheck inode state.
20878c2ecf20Sopenharmony_ci *
20888c2ecf20Sopenharmony_ci * It doesn't matter if I_NEW is not set initially, a call to
20898c2ecf20Sopenharmony_ci * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
20908c2ecf20Sopenharmony_ci * will DTRT.
20918c2ecf20Sopenharmony_ci */
20928c2ecf20Sopenharmony_cistatic void __wait_on_freeing_inode(struct inode *inode)
20938c2ecf20Sopenharmony_ci{
20948c2ecf20Sopenharmony_ci	wait_queue_head_t *wq;
20958c2ecf20Sopenharmony_ci	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
20968c2ecf20Sopenharmony_ci	wq = bit_waitqueue(&inode->i_state, __I_NEW);
20978c2ecf20Sopenharmony_ci	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
20988c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
20998c2ecf20Sopenharmony_ci	spin_unlock(&inode_hash_lock);
21008c2ecf20Sopenharmony_ci	schedule();
21018c2ecf20Sopenharmony_ci	finish_wait(wq, &wait.wq_entry);
21028c2ecf20Sopenharmony_ci	spin_lock(&inode_hash_lock);
21038c2ecf20Sopenharmony_ci}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_cistatic __initdata unsigned long ihash_entries;
21068c2ecf20Sopenharmony_cistatic int __init set_ihash_entries(char *str)
21078c2ecf20Sopenharmony_ci{
21088c2ecf20Sopenharmony_ci	if (!str)
21098c2ecf20Sopenharmony_ci		return 0;
21108c2ecf20Sopenharmony_ci	ihash_entries = simple_strtoul(str, &str, 0);
21118c2ecf20Sopenharmony_ci	return 1;
21128c2ecf20Sopenharmony_ci}
21138c2ecf20Sopenharmony_ci__setup("ihash_entries=", set_ihash_entries);
21148c2ecf20Sopenharmony_ci
21158c2ecf20Sopenharmony_ci/*
21168c2ecf20Sopenharmony_ci * Initialize the waitqueues and inode hash table.
21178c2ecf20Sopenharmony_ci */
21188c2ecf20Sopenharmony_civoid __init inode_init_early(void)
21198c2ecf20Sopenharmony_ci{
21208c2ecf20Sopenharmony_ci	/* If hashes are distributed across NUMA nodes, defer
21218c2ecf20Sopenharmony_ci	 * hash allocation until vmalloc space is available.
21228c2ecf20Sopenharmony_ci	 */
21238c2ecf20Sopenharmony_ci	if (hashdist)
21248c2ecf20Sopenharmony_ci		return;
21258c2ecf20Sopenharmony_ci
21268c2ecf20Sopenharmony_ci	inode_hashtable =
21278c2ecf20Sopenharmony_ci		alloc_large_system_hash("Inode-cache",
21288c2ecf20Sopenharmony_ci					sizeof(struct hlist_head),
21298c2ecf20Sopenharmony_ci					ihash_entries,
21308c2ecf20Sopenharmony_ci					14,
21318c2ecf20Sopenharmony_ci					HASH_EARLY | HASH_ZERO,
21328c2ecf20Sopenharmony_ci					&i_hash_shift,
21338c2ecf20Sopenharmony_ci					&i_hash_mask,
21348c2ecf20Sopenharmony_ci					0,
21358c2ecf20Sopenharmony_ci					0);
21368c2ecf20Sopenharmony_ci}
21378c2ecf20Sopenharmony_ci
21388c2ecf20Sopenharmony_civoid __init inode_init(void)
21398c2ecf20Sopenharmony_ci{
21408c2ecf20Sopenharmony_ci	/* inode slab cache */
21418c2ecf20Sopenharmony_ci	inode_cachep = kmem_cache_create("inode_cache",
21428c2ecf20Sopenharmony_ci					 sizeof(struct inode),
21438c2ecf20Sopenharmony_ci					 0,
21448c2ecf20Sopenharmony_ci					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
21458c2ecf20Sopenharmony_ci					 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
21468c2ecf20Sopenharmony_ci					 init_once);
21478c2ecf20Sopenharmony_ci
21488c2ecf20Sopenharmony_ci	/* Hash may have been set up in inode_init_early */
21498c2ecf20Sopenharmony_ci	if (!hashdist)
21508c2ecf20Sopenharmony_ci		return;
21518c2ecf20Sopenharmony_ci
21528c2ecf20Sopenharmony_ci	inode_hashtable =
21538c2ecf20Sopenharmony_ci		alloc_large_system_hash("Inode-cache",
21548c2ecf20Sopenharmony_ci					sizeof(struct hlist_head),
21558c2ecf20Sopenharmony_ci					ihash_entries,
21568c2ecf20Sopenharmony_ci					14,
21578c2ecf20Sopenharmony_ci					HASH_ZERO,
21588c2ecf20Sopenharmony_ci					&i_hash_shift,
21598c2ecf20Sopenharmony_ci					&i_hash_mask,
21608c2ecf20Sopenharmony_ci					0,
21618c2ecf20Sopenharmony_ci					0);
21628c2ecf20Sopenharmony_ci}
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_civoid init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
21658c2ecf20Sopenharmony_ci{
21668c2ecf20Sopenharmony_ci	inode->i_mode = mode;
21678c2ecf20Sopenharmony_ci	if (S_ISCHR(mode)) {
21688c2ecf20Sopenharmony_ci		inode->i_fop = &def_chr_fops;
21698c2ecf20Sopenharmony_ci		inode->i_rdev = rdev;
21708c2ecf20Sopenharmony_ci	} else if (S_ISBLK(mode)) {
21718c2ecf20Sopenharmony_ci		inode->i_fop = &def_blk_fops;
21728c2ecf20Sopenharmony_ci		inode->i_rdev = rdev;
21738c2ecf20Sopenharmony_ci	} else if (S_ISFIFO(mode))
21748c2ecf20Sopenharmony_ci		inode->i_fop = &pipefifo_fops;
21758c2ecf20Sopenharmony_ci	else if (S_ISSOCK(mode))
21768c2ecf20Sopenharmony_ci		;	/* leave it no_open_fops */
21778c2ecf20Sopenharmony_ci	else
21788c2ecf20Sopenharmony_ci		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
21798c2ecf20Sopenharmony_ci				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
21808c2ecf20Sopenharmony_ci				  inode->i_ino);
21818c2ecf20Sopenharmony_ci}
21828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(init_special_inode);
21838c2ecf20Sopenharmony_ci
21848c2ecf20Sopenharmony_ci/**
21858c2ecf20Sopenharmony_ci * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
21868c2ecf20Sopenharmony_ci * @inode: New inode
21878c2ecf20Sopenharmony_ci * @dir: Directory inode
21888c2ecf20Sopenharmony_ci * @mode: mode of the new inode
21898c2ecf20Sopenharmony_ci */
21908c2ecf20Sopenharmony_civoid inode_init_owner(struct inode *inode, const struct inode *dir,
21918c2ecf20Sopenharmony_ci			umode_t mode)
21928c2ecf20Sopenharmony_ci{
21938c2ecf20Sopenharmony_ci	inode->i_uid = current_fsuid();
21948c2ecf20Sopenharmony_ci	if (dir && dir->i_mode & S_ISGID) {
21958c2ecf20Sopenharmony_ci		inode->i_gid = dir->i_gid;
21968c2ecf20Sopenharmony_ci
21978c2ecf20Sopenharmony_ci		/* Directories are special, and always inherit S_ISGID */
21988c2ecf20Sopenharmony_ci		if (S_ISDIR(mode))
21998c2ecf20Sopenharmony_ci			mode |= S_ISGID;
22008c2ecf20Sopenharmony_ci	} else
22018c2ecf20Sopenharmony_ci		inode->i_gid = current_fsgid();
22028c2ecf20Sopenharmony_ci	inode->i_mode = mode;
22038c2ecf20Sopenharmony_ci}
22048c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_init_owner);
22058c2ecf20Sopenharmony_ci
22068c2ecf20Sopenharmony_ci/**
22078c2ecf20Sopenharmony_ci * inode_owner_or_capable - check current task permissions to inode
22088c2ecf20Sopenharmony_ci * @inode: inode being checked
22098c2ecf20Sopenharmony_ci *
22108c2ecf20Sopenharmony_ci * Return true if current either has CAP_FOWNER in a namespace with the
22118c2ecf20Sopenharmony_ci * inode owner uid mapped, or owns the file.
22128c2ecf20Sopenharmony_ci */
22138c2ecf20Sopenharmony_cibool inode_owner_or_capable(const struct inode *inode)
22148c2ecf20Sopenharmony_ci{
22158c2ecf20Sopenharmony_ci	struct user_namespace *ns;
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci	if (uid_eq(current_fsuid(), inode->i_uid))
22188c2ecf20Sopenharmony_ci		return true;
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci	ns = current_user_ns();
22218c2ecf20Sopenharmony_ci	if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
22228c2ecf20Sopenharmony_ci		return true;
22238c2ecf20Sopenharmony_ci	return false;
22248c2ecf20Sopenharmony_ci}
22258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_owner_or_capable);
22268c2ecf20Sopenharmony_ci
22278c2ecf20Sopenharmony_ci/*
22288c2ecf20Sopenharmony_ci * Direct i/o helper functions
22298c2ecf20Sopenharmony_ci */
22308c2ecf20Sopenharmony_cistatic void __inode_dio_wait(struct inode *inode)
22318c2ecf20Sopenharmony_ci{
22328c2ecf20Sopenharmony_ci	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
22338c2ecf20Sopenharmony_ci	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	do {
22368c2ecf20Sopenharmony_ci		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
22378c2ecf20Sopenharmony_ci		if (atomic_read(&inode->i_dio_count))
22388c2ecf20Sopenharmony_ci			schedule();
22398c2ecf20Sopenharmony_ci	} while (atomic_read(&inode->i_dio_count));
22408c2ecf20Sopenharmony_ci	finish_wait(wq, &q.wq_entry);
22418c2ecf20Sopenharmony_ci}
22428c2ecf20Sopenharmony_ci
22438c2ecf20Sopenharmony_ci/**
22448c2ecf20Sopenharmony_ci * inode_dio_wait - wait for outstanding DIO requests to finish
22458c2ecf20Sopenharmony_ci * @inode: inode to wait for
22468c2ecf20Sopenharmony_ci *
22478c2ecf20Sopenharmony_ci * Waits for all pending direct I/O requests to finish so that we can
22488c2ecf20Sopenharmony_ci * proceed with a truncate or equivalent operation.
22498c2ecf20Sopenharmony_ci *
22508c2ecf20Sopenharmony_ci * Must be called under a lock that serializes taking new references
22518c2ecf20Sopenharmony_ci * to i_dio_count, usually by inode->i_mutex.
22528c2ecf20Sopenharmony_ci */
22538c2ecf20Sopenharmony_civoid inode_dio_wait(struct inode *inode)
22548c2ecf20Sopenharmony_ci{
22558c2ecf20Sopenharmony_ci	if (atomic_read(&inode->i_dio_count))
22568c2ecf20Sopenharmony_ci		__inode_dio_wait(inode);
22578c2ecf20Sopenharmony_ci}
22588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_dio_wait);
22598c2ecf20Sopenharmony_ci
22608c2ecf20Sopenharmony_ci/*
22618c2ecf20Sopenharmony_ci * inode_set_flags - atomically set some inode flags
22628c2ecf20Sopenharmony_ci *
22638c2ecf20Sopenharmony_ci * Note: the caller should be holding i_mutex, or else be sure that
22648c2ecf20Sopenharmony_ci * they have exclusive access to the inode structure (i.e., while the
22658c2ecf20Sopenharmony_ci * inode is being instantiated).  The reason for the cmpxchg() loop
22668c2ecf20Sopenharmony_ci * --- which wouldn't be necessary if all code paths which modify
22678c2ecf20Sopenharmony_ci * i_flags actually followed this rule, is that there is at least one
22688c2ecf20Sopenharmony_ci * code path which doesn't today so we use cmpxchg() out of an abundance
22698c2ecf20Sopenharmony_ci * of caution.
22708c2ecf20Sopenharmony_ci *
22718c2ecf20Sopenharmony_ci * In the long run, i_mutex is overkill, and we should probably look
22728c2ecf20Sopenharmony_ci * at using the i_lock spinlock to protect i_flags, and then make sure
22738c2ecf20Sopenharmony_ci * it is so documented in include/linux/fs.h and that all code follows
22748c2ecf20Sopenharmony_ci * the locking convention!!
22758c2ecf20Sopenharmony_ci */
22768c2ecf20Sopenharmony_civoid inode_set_flags(struct inode *inode, unsigned int flags,
22778c2ecf20Sopenharmony_ci		     unsigned int mask)
22788c2ecf20Sopenharmony_ci{
22798c2ecf20Sopenharmony_ci	WARN_ON_ONCE(flags & ~mask);
22808c2ecf20Sopenharmony_ci	set_mask_bits(&inode->i_flags, mask, flags);
22818c2ecf20Sopenharmony_ci}
22828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_set_flags);
22838c2ecf20Sopenharmony_ci
22848c2ecf20Sopenharmony_civoid inode_nohighmem(struct inode *inode)
22858c2ecf20Sopenharmony_ci{
22868c2ecf20Sopenharmony_ci	mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
22878c2ecf20Sopenharmony_ci}
22888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_nohighmem);
22898c2ecf20Sopenharmony_ci
22908c2ecf20Sopenharmony_ci/**
22918c2ecf20Sopenharmony_ci * timestamp_truncate - Truncate timespec to a granularity
22928c2ecf20Sopenharmony_ci * @t: Timespec
22938c2ecf20Sopenharmony_ci * @inode: inode being updated
22948c2ecf20Sopenharmony_ci *
22958c2ecf20Sopenharmony_ci * Truncate a timespec to the granularity supported by the fs
22968c2ecf20Sopenharmony_ci * containing the inode. Always rounds down. gran must
22978c2ecf20Sopenharmony_ci * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
22988c2ecf20Sopenharmony_ci */
22998c2ecf20Sopenharmony_cistruct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
23008c2ecf20Sopenharmony_ci{
23018c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
23028c2ecf20Sopenharmony_ci	unsigned int gran = sb->s_time_gran;
23038c2ecf20Sopenharmony_ci
23048c2ecf20Sopenharmony_ci	t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
23058c2ecf20Sopenharmony_ci	if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
23068c2ecf20Sopenharmony_ci		t.tv_nsec = 0;
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci	/* Avoid division in the common cases 1 ns and 1 s. */
23098c2ecf20Sopenharmony_ci	if (gran == 1)
23108c2ecf20Sopenharmony_ci		; /* nothing */
23118c2ecf20Sopenharmony_ci	else if (gran == NSEC_PER_SEC)
23128c2ecf20Sopenharmony_ci		t.tv_nsec = 0;
23138c2ecf20Sopenharmony_ci	else if (gran > 1 && gran < NSEC_PER_SEC)
23148c2ecf20Sopenharmony_ci		t.tv_nsec -= t.tv_nsec % gran;
23158c2ecf20Sopenharmony_ci	else
23168c2ecf20Sopenharmony_ci		WARN(1, "invalid file time granularity: %u", gran);
23178c2ecf20Sopenharmony_ci	return t;
23188c2ecf20Sopenharmony_ci}
23198c2ecf20Sopenharmony_ciEXPORT_SYMBOL(timestamp_truncate);
23208c2ecf20Sopenharmony_ci
23218c2ecf20Sopenharmony_ci/**
23228c2ecf20Sopenharmony_ci * current_time - Return FS time
23238c2ecf20Sopenharmony_ci * @inode: inode.
23248c2ecf20Sopenharmony_ci *
23258c2ecf20Sopenharmony_ci * Return the current time truncated to the time granularity supported by
23268c2ecf20Sopenharmony_ci * the fs.
23278c2ecf20Sopenharmony_ci *
23288c2ecf20Sopenharmony_ci * Note that inode and inode->sb cannot be NULL.
23298c2ecf20Sopenharmony_ci * Otherwise, the function warns and returns time without truncation.
23308c2ecf20Sopenharmony_ci */
23318c2ecf20Sopenharmony_cistruct timespec64 current_time(struct inode *inode)
23328c2ecf20Sopenharmony_ci{
23338c2ecf20Sopenharmony_ci	struct timespec64 now;
23348c2ecf20Sopenharmony_ci
23358c2ecf20Sopenharmony_ci	ktime_get_coarse_real_ts64(&now);
23368c2ecf20Sopenharmony_ci
23378c2ecf20Sopenharmony_ci	if (unlikely(!inode->i_sb)) {
23388c2ecf20Sopenharmony_ci		WARN(1, "current_time() called with uninitialized super_block in the inode");
23398c2ecf20Sopenharmony_ci		return now;
23408c2ecf20Sopenharmony_ci	}
23418c2ecf20Sopenharmony_ci
23428c2ecf20Sopenharmony_ci	return timestamp_truncate(now, inode);
23438c2ecf20Sopenharmony_ci}
23448c2ecf20Sopenharmony_ciEXPORT_SYMBOL(current_time);
23458c2ecf20Sopenharmony_ci
23468c2ecf20Sopenharmony_ci/*
23478c2ecf20Sopenharmony_ci * Generic function to check FS_IOC_SETFLAGS values and reject any invalid
23488c2ecf20Sopenharmony_ci * configurations.
23498c2ecf20Sopenharmony_ci *
23508c2ecf20Sopenharmony_ci * Note: the caller should be holding i_mutex, or else be sure that they have
23518c2ecf20Sopenharmony_ci * exclusive access to the inode structure.
23528c2ecf20Sopenharmony_ci */
23538c2ecf20Sopenharmony_ciint vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
23548c2ecf20Sopenharmony_ci			     unsigned int flags)
23558c2ecf20Sopenharmony_ci{
23568c2ecf20Sopenharmony_ci	/*
23578c2ecf20Sopenharmony_ci	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
23588c2ecf20Sopenharmony_ci	 * the relevant capability.
23598c2ecf20Sopenharmony_ci	 *
23608c2ecf20Sopenharmony_ci	 * This test looks nicer. Thanks to Pauline Middelink
23618c2ecf20Sopenharmony_ci	 */
23628c2ecf20Sopenharmony_ci	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
23638c2ecf20Sopenharmony_ci	    !capable(CAP_LINUX_IMMUTABLE))
23648c2ecf20Sopenharmony_ci		return -EPERM;
23658c2ecf20Sopenharmony_ci
23668c2ecf20Sopenharmony_ci	return fscrypt_prepare_setflags(inode, oldflags, flags);
23678c2ecf20Sopenharmony_ci}
23688c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vfs_ioc_setflags_prepare);
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_ci/*
23718c2ecf20Sopenharmony_ci * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid
23728c2ecf20Sopenharmony_ci * configurations.
23738c2ecf20Sopenharmony_ci *
23748c2ecf20Sopenharmony_ci * Note: the caller should be holding i_mutex, or else be sure that they have
23758c2ecf20Sopenharmony_ci * exclusive access to the inode structure.
23768c2ecf20Sopenharmony_ci */
23778c2ecf20Sopenharmony_ciint vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
23788c2ecf20Sopenharmony_ci			     struct fsxattr *fa)
23798c2ecf20Sopenharmony_ci{
23808c2ecf20Sopenharmony_ci	/*
23818c2ecf20Sopenharmony_ci	 * Can't modify an immutable/append-only file unless we have
23828c2ecf20Sopenharmony_ci	 * appropriate permission.
23838c2ecf20Sopenharmony_ci	 */
23848c2ecf20Sopenharmony_ci	if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
23858c2ecf20Sopenharmony_ci			(FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) &&
23868c2ecf20Sopenharmony_ci	    !capable(CAP_LINUX_IMMUTABLE))
23878c2ecf20Sopenharmony_ci		return -EPERM;
23888c2ecf20Sopenharmony_ci
23898c2ecf20Sopenharmony_ci	/*
23908c2ecf20Sopenharmony_ci	 * Project Quota ID state is only allowed to change from within the init
23918c2ecf20Sopenharmony_ci	 * namespace. Enforce that restriction only if we are trying to change
23928c2ecf20Sopenharmony_ci	 * the quota ID state. Everything else is allowed in user namespaces.
23938c2ecf20Sopenharmony_ci	 */
23948c2ecf20Sopenharmony_ci	if (current_user_ns() != &init_user_ns) {
23958c2ecf20Sopenharmony_ci		if (old_fa->fsx_projid != fa->fsx_projid)
23968c2ecf20Sopenharmony_ci			return -EINVAL;
23978c2ecf20Sopenharmony_ci		if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
23988c2ecf20Sopenharmony_ci				FS_XFLAG_PROJINHERIT)
23998c2ecf20Sopenharmony_ci			return -EINVAL;
24008c2ecf20Sopenharmony_ci	}
24018c2ecf20Sopenharmony_ci
24028c2ecf20Sopenharmony_ci	/* Check extent size hints. */
24038c2ecf20Sopenharmony_ci	if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
24048c2ecf20Sopenharmony_ci		return -EINVAL;
24058c2ecf20Sopenharmony_ci
24068c2ecf20Sopenharmony_ci	if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
24078c2ecf20Sopenharmony_ci			!S_ISDIR(inode->i_mode))
24088c2ecf20Sopenharmony_ci		return -EINVAL;
24098c2ecf20Sopenharmony_ci
24108c2ecf20Sopenharmony_ci	if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
24118c2ecf20Sopenharmony_ci	    !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
24128c2ecf20Sopenharmony_ci		return -EINVAL;
24138c2ecf20Sopenharmony_ci
24148c2ecf20Sopenharmony_ci	/*
24158c2ecf20Sopenharmony_ci	 * It is only valid to set the DAX flag on regular files and
24168c2ecf20Sopenharmony_ci	 * directories on filesystems.
24178c2ecf20Sopenharmony_ci	 */
24188c2ecf20Sopenharmony_ci	if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
24198c2ecf20Sopenharmony_ci	    !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
24208c2ecf20Sopenharmony_ci		return -EINVAL;
24218c2ecf20Sopenharmony_ci
24228c2ecf20Sopenharmony_ci	/* Extent size hints of zero turn off the flags. */
24238c2ecf20Sopenharmony_ci	if (fa->fsx_extsize == 0)
24248c2ecf20Sopenharmony_ci		fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
24258c2ecf20Sopenharmony_ci	if (fa->fsx_cowextsize == 0)
24268c2ecf20Sopenharmony_ci		fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
24278c2ecf20Sopenharmony_ci
24288c2ecf20Sopenharmony_ci	return 0;
24298c2ecf20Sopenharmony_ci}
24308c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
24318c2ecf20Sopenharmony_ci
24328c2ecf20Sopenharmony_ci/**
24338c2ecf20Sopenharmony_ci * inode_set_ctime_current - set the ctime to current_time
24348c2ecf20Sopenharmony_ci * @inode: inode
24358c2ecf20Sopenharmony_ci *
24368c2ecf20Sopenharmony_ci * Set the inode->i_ctime to the current value for the inode. Returns
24378c2ecf20Sopenharmony_ci * the current value that was assigned to i_ctime.
24388c2ecf20Sopenharmony_ci */
24398c2ecf20Sopenharmony_cistruct timespec64 inode_set_ctime_current(struct inode *inode)
24408c2ecf20Sopenharmony_ci{
24418c2ecf20Sopenharmony_ci	struct timespec64 now = current_time(inode);
24428c2ecf20Sopenharmony_ci
24438c2ecf20Sopenharmony_ci	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
24448c2ecf20Sopenharmony_ci	return now;
24458c2ecf20Sopenharmony_ci}
24468c2ecf20Sopenharmony_ciEXPORT_SYMBOL(inode_set_ctime_current);
24478c2ecf20Sopenharmony_ci
24488c2ecf20Sopenharmony_ci/**
24498c2ecf20Sopenharmony_ci * in_group_or_capable - check whether caller is CAP_FSETID privileged
24508c2ecf20Sopenharmony_ci * @inode:	inode to check
24518c2ecf20Sopenharmony_ci * @gid:	the new/current gid of @inode
24528c2ecf20Sopenharmony_ci *
24538c2ecf20Sopenharmony_ci * Check wether @gid is in the caller's group list or if the caller is
24548c2ecf20Sopenharmony_ci * privileged with CAP_FSETID over @inode. This can be used to determine
24558c2ecf20Sopenharmony_ci * whether the setgid bit can be kept or must be dropped.
24568c2ecf20Sopenharmony_ci *
24578c2ecf20Sopenharmony_ci * Return: true if the caller is sufficiently privileged, false if not.
24588c2ecf20Sopenharmony_ci */
24598c2ecf20Sopenharmony_cibool in_group_or_capable(const struct inode *inode, kgid_t gid)
24608c2ecf20Sopenharmony_ci{
24618c2ecf20Sopenharmony_ci	if (in_group_p(gid))
24628c2ecf20Sopenharmony_ci		return true;
24638c2ecf20Sopenharmony_ci	if (capable_wrt_inode_uidgid(inode, CAP_FSETID))
24648c2ecf20Sopenharmony_ci		return true;
24658c2ecf20Sopenharmony_ci	return false;
24668c2ecf20Sopenharmony_ci}
24678c2ecf20Sopenharmony_ci
24688c2ecf20Sopenharmony_ci/**
24698c2ecf20Sopenharmony_ci * mode_strip_sgid - handle the sgid bit for non-directories
24708c2ecf20Sopenharmony_ci * @dir: parent directory inode
24718c2ecf20Sopenharmony_ci * @mode: mode of the file to be created in @dir
24728c2ecf20Sopenharmony_ci *
24738c2ecf20Sopenharmony_ci * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
24748c2ecf20Sopenharmony_ci * raised and @dir has the S_ISGID bit raised ensure that the caller is
24758c2ecf20Sopenharmony_ci * either in the group of the parent directory or they have CAP_FSETID
24768c2ecf20Sopenharmony_ci * in their user namespace and are privileged over the parent directory.
24778c2ecf20Sopenharmony_ci * In all other cases, strip the S_ISGID bit from @mode.
24788c2ecf20Sopenharmony_ci *
24798c2ecf20Sopenharmony_ci * Return: the new mode to use for the file
24808c2ecf20Sopenharmony_ci */
24818c2ecf20Sopenharmony_ciumode_t mode_strip_sgid(const struct inode *dir, umode_t mode)
24828c2ecf20Sopenharmony_ci{
24838c2ecf20Sopenharmony_ci	if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
24848c2ecf20Sopenharmony_ci		return mode;
24858c2ecf20Sopenharmony_ci	if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
24868c2ecf20Sopenharmony_ci		return mode;
24878c2ecf20Sopenharmony_ci	if (in_group_or_capable(dir, dir->i_gid))
24888c2ecf20Sopenharmony_ci		return mode;
24898c2ecf20Sopenharmony_ci	return mode & ~S_ISGID;
24908c2ecf20Sopenharmony_ci}
24918c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mode_strip_sgid);
2492