162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * (C) 1997 Linus Torvalds 462306a36Sopenharmony_ci * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include <linux/export.h> 762306a36Sopenharmony_ci#include <linux/fs.h> 862306a36Sopenharmony_ci#include <linux/filelock.h> 962306a36Sopenharmony_ci#include <linux/mm.h> 1062306a36Sopenharmony_ci#include <linux/backing-dev.h> 1162306a36Sopenharmony_ci#include <linux/hash.h> 1262306a36Sopenharmony_ci#include <linux/swap.h> 1362306a36Sopenharmony_ci#include <linux/security.h> 1462306a36Sopenharmony_ci#include <linux/cdev.h> 1562306a36Sopenharmony_ci#include <linux/memblock.h> 1662306a36Sopenharmony_ci#include <linux/fsnotify.h> 1762306a36Sopenharmony_ci#include <linux/mount.h> 1862306a36Sopenharmony_ci#include <linux/posix_acl.h> 1962306a36Sopenharmony_ci#include <linux/buffer_head.h> /* for inode_has_buffers */ 2062306a36Sopenharmony_ci#include <linux/ratelimit.h> 2162306a36Sopenharmony_ci#include <linux/list_lru.h> 2262306a36Sopenharmony_ci#include <linux/iversion.h> 2362306a36Sopenharmony_ci#include <trace/events/writeback.h> 2462306a36Sopenharmony_ci#include "internal.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* 2762306a36Sopenharmony_ci * Inode locking rules: 2862306a36Sopenharmony_ci * 2962306a36Sopenharmony_ci * inode->i_lock protects: 3062306a36Sopenharmony_ci * inode->i_state, inode->i_hash, __iget(), inode->i_io_list 3162306a36Sopenharmony_ci * Inode LRU list locks protect: 3262306a36Sopenharmony_ci * inode->i_sb->s_inode_lru, inode->i_lru 3362306a36Sopenharmony_ci * inode->i_sb->s_inode_list_lock protects: 3462306a36Sopenharmony_ci * inode->i_sb->s_inodes, inode->i_sb_list 3562306a36Sopenharmony_ci * bdi->wb.list_lock protects: 3662306a36Sopenharmony_ci * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list 3762306a36Sopenharmony_ci * inode_hash_lock protects: 3862306a36Sopenharmony_ci * inode_hashtable, inode->i_hash 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * Lock ordering: 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * inode->i_sb->s_inode_list_lock 4362306a36Sopenharmony_ci * inode->i_lock 4462306a36Sopenharmony_ci * Inode LRU list locks 4562306a36Sopenharmony_ci * 4662306a36Sopenharmony_ci * bdi->wb.list_lock 4762306a36Sopenharmony_ci * inode->i_lock 4862306a36Sopenharmony_ci * 4962306a36Sopenharmony_ci * inode_hash_lock 5062306a36Sopenharmony_ci * inode->i_sb->s_inode_list_lock 5162306a36Sopenharmony_ci * inode->i_lock 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * iunique_lock 5462306a36Sopenharmony_ci * inode_hash_lock 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistatic unsigned int i_hash_mask __read_mostly; 5862306a36Sopenharmony_cistatic unsigned int i_hash_shift __read_mostly; 5962306a36Sopenharmony_cistatic struct hlist_head *inode_hashtable __read_mostly; 6062306a36Sopenharmony_cistatic __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci/* 6362306a36Sopenharmony_ci * Empty aops. Can be used for the cases where the user does not 6462306a36Sopenharmony_ci * define any of the address_space operations. 6562306a36Sopenharmony_ci */ 6662306a36Sopenharmony_ciconst struct address_space_operations empty_aops = { 6762306a36Sopenharmony_ci}; 6862306a36Sopenharmony_ciEXPORT_SYMBOL(empty_aops); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, nr_inodes); 7162306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, nr_unused); 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_cistatic struct kmem_cache *inode_cachep __read_mostly; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_cistatic long get_nr_inodes(void) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci int i; 7862306a36Sopenharmony_ci long sum = 0; 7962306a36Sopenharmony_ci for_each_possible_cpu(i) 8062306a36Sopenharmony_ci sum += per_cpu(nr_inodes, i); 8162306a36Sopenharmony_ci return sum < 0 ? 0 : sum; 8262306a36Sopenharmony_ci} 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cistatic inline long get_nr_inodes_unused(void) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci int i; 8762306a36Sopenharmony_ci long sum = 0; 8862306a36Sopenharmony_ci for_each_possible_cpu(i) 8962306a36Sopenharmony_ci sum += per_cpu(nr_unused, i); 9062306a36Sopenharmony_ci return sum < 0 ? 0 : sum; 9162306a36Sopenharmony_ci} 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_cilong get_nr_dirty_inodes(void) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci /* not actually dirty inodes, but a wild approximation */ 9662306a36Sopenharmony_ci long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 9762306a36Sopenharmony_ci return nr_dirty > 0 ? nr_dirty : 0; 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci/* 10162306a36Sopenharmony_ci * Handle nr_inode sysctl 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 10462306a36Sopenharmony_ci/* 10562306a36Sopenharmony_ci * Statistics gathering.. 10662306a36Sopenharmony_ci */ 10762306a36Sopenharmony_cistatic struct inodes_stat_t inodes_stat; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cistatic int proc_nr_inodes(struct ctl_table *table, int write, void *buffer, 11062306a36Sopenharmony_ci size_t *lenp, loff_t *ppos) 11162306a36Sopenharmony_ci{ 11262306a36Sopenharmony_ci inodes_stat.nr_inodes = get_nr_inodes(); 11362306a36Sopenharmony_ci inodes_stat.nr_unused = get_nr_inodes_unused(); 11462306a36Sopenharmony_ci return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 11562306a36Sopenharmony_ci} 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_cistatic struct ctl_table inodes_sysctls[] = { 11862306a36Sopenharmony_ci { 11962306a36Sopenharmony_ci .procname = "inode-nr", 12062306a36Sopenharmony_ci .data = &inodes_stat, 12162306a36Sopenharmony_ci .maxlen = 2*sizeof(long), 12262306a36Sopenharmony_ci .mode = 0444, 12362306a36Sopenharmony_ci .proc_handler = proc_nr_inodes, 12462306a36Sopenharmony_ci }, 12562306a36Sopenharmony_ci { 12662306a36Sopenharmony_ci .procname = "inode-state", 12762306a36Sopenharmony_ci .data = &inodes_stat, 12862306a36Sopenharmony_ci .maxlen = 7*sizeof(long), 12962306a36Sopenharmony_ci .mode = 0444, 13062306a36Sopenharmony_ci .proc_handler = proc_nr_inodes, 13162306a36Sopenharmony_ci }, 13262306a36Sopenharmony_ci { } 13362306a36Sopenharmony_ci}; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic int __init init_fs_inode_sysctls(void) 13662306a36Sopenharmony_ci{ 13762306a36Sopenharmony_ci register_sysctl_init("fs", inodes_sysctls); 13862306a36Sopenharmony_ci return 0; 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ciearly_initcall(init_fs_inode_sysctls); 14162306a36Sopenharmony_ci#endif 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_cistatic int no_open(struct inode *inode, struct file *file) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci return -ENXIO; 14662306a36Sopenharmony_ci} 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci/** 14962306a36Sopenharmony_ci * inode_init_always - perform inode structure initialisation 15062306a36Sopenharmony_ci * @sb: superblock inode belongs to 15162306a36Sopenharmony_ci * @inode: inode to initialise 15262306a36Sopenharmony_ci * 15362306a36Sopenharmony_ci * These are initializations that need to be done on every inode 15462306a36Sopenharmony_ci * allocation as the fields are not initialised by slab allocation. 15562306a36Sopenharmony_ci */ 15662306a36Sopenharmony_ciint inode_init_always(struct super_block *sb, struct inode *inode) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci static const struct inode_operations empty_iops; 15962306a36Sopenharmony_ci static const struct file_operations no_open_fops = {.open = no_open}; 16062306a36Sopenharmony_ci struct address_space *const mapping = &inode->i_data; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci inode->i_sb = sb; 16362306a36Sopenharmony_ci inode->i_blkbits = sb->s_blocksize_bits; 16462306a36Sopenharmony_ci inode->i_flags = 0; 16562306a36Sopenharmony_ci atomic64_set(&inode->i_sequence, 0); 16662306a36Sopenharmony_ci atomic_set(&inode->i_count, 1); 16762306a36Sopenharmony_ci inode->i_op = &empty_iops; 16862306a36Sopenharmony_ci inode->i_fop = &no_open_fops; 16962306a36Sopenharmony_ci inode->i_ino = 0; 17062306a36Sopenharmony_ci inode->__i_nlink = 1; 17162306a36Sopenharmony_ci inode->i_opflags = 0; 17262306a36Sopenharmony_ci if (sb->s_xattr) 17362306a36Sopenharmony_ci inode->i_opflags |= IOP_XATTR; 17462306a36Sopenharmony_ci i_uid_write(inode, 0); 17562306a36Sopenharmony_ci i_gid_write(inode, 0); 17662306a36Sopenharmony_ci atomic_set(&inode->i_writecount, 0); 17762306a36Sopenharmony_ci inode->i_size = 0; 17862306a36Sopenharmony_ci inode->i_write_hint = WRITE_LIFE_NOT_SET; 17962306a36Sopenharmony_ci inode->i_blocks = 0; 18062306a36Sopenharmony_ci inode->i_bytes = 0; 18162306a36Sopenharmony_ci inode->i_generation = 0; 18262306a36Sopenharmony_ci inode->i_pipe = NULL; 18362306a36Sopenharmony_ci inode->i_cdev = NULL; 18462306a36Sopenharmony_ci inode->i_link = NULL; 18562306a36Sopenharmony_ci inode->i_dir_seq = 0; 18662306a36Sopenharmony_ci inode->i_rdev = 0; 18762306a36Sopenharmony_ci inode->dirtied_when = 0; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci#ifdef CONFIG_CGROUP_WRITEBACK 19062306a36Sopenharmony_ci inode->i_wb_frn_winner = 0; 19162306a36Sopenharmony_ci inode->i_wb_frn_avg_time = 0; 19262306a36Sopenharmony_ci inode->i_wb_frn_history = 0; 19362306a36Sopenharmony_ci#endif 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci spin_lock_init(&inode->i_lock); 19662306a36Sopenharmony_ci lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci init_rwsem(&inode->i_rwsem); 19962306a36Sopenharmony_ci lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci atomic_set(&inode->i_dio_count, 0); 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci mapping->a_ops = &empty_aops; 20462306a36Sopenharmony_ci mapping->host = inode; 20562306a36Sopenharmony_ci mapping->flags = 0; 20662306a36Sopenharmony_ci mapping->wb_err = 0; 20762306a36Sopenharmony_ci atomic_set(&mapping->i_mmap_writable, 0); 20862306a36Sopenharmony_ci#ifdef CONFIG_READ_ONLY_THP_FOR_FS 20962306a36Sopenharmony_ci atomic_set(&mapping->nr_thps, 0); 21062306a36Sopenharmony_ci#endif 21162306a36Sopenharmony_ci mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 21262306a36Sopenharmony_ci mapping->private_data = NULL; 21362306a36Sopenharmony_ci mapping->writeback_index = 0; 21462306a36Sopenharmony_ci init_rwsem(&mapping->invalidate_lock); 21562306a36Sopenharmony_ci lockdep_set_class_and_name(&mapping->invalidate_lock, 21662306a36Sopenharmony_ci &sb->s_type->invalidate_lock_key, 21762306a36Sopenharmony_ci "mapping.invalidate_lock"); 21862306a36Sopenharmony_ci if (sb->s_iflags & SB_I_STABLE_WRITES) 21962306a36Sopenharmony_ci mapping_set_stable_writes(mapping); 22062306a36Sopenharmony_ci inode->i_private = NULL; 22162306a36Sopenharmony_ci inode->i_mapping = mapping; 22262306a36Sopenharmony_ci INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ 22362306a36Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL 22462306a36Sopenharmony_ci inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 22562306a36Sopenharmony_ci#endif 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci#ifdef CONFIG_FSNOTIFY 22862306a36Sopenharmony_ci inode->i_fsnotify_mask = 0; 22962306a36Sopenharmony_ci#endif 23062306a36Sopenharmony_ci inode->i_flctx = NULL; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci if (unlikely(security_inode_alloc(inode))) 23362306a36Sopenharmony_ci return -ENOMEM; 23462306a36Sopenharmony_ci this_cpu_inc(nr_inodes); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return 0; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ciEXPORT_SYMBOL(inode_init_always); 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_civoid free_inode_nonrcu(struct inode *inode) 24162306a36Sopenharmony_ci{ 24262306a36Sopenharmony_ci kmem_cache_free(inode_cachep, inode); 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ciEXPORT_SYMBOL(free_inode_nonrcu); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic void i_callback(struct rcu_head *head) 24762306a36Sopenharmony_ci{ 24862306a36Sopenharmony_ci struct inode *inode = container_of(head, struct inode, i_rcu); 24962306a36Sopenharmony_ci if (inode->free_inode) 25062306a36Sopenharmony_ci inode->free_inode(inode); 25162306a36Sopenharmony_ci else 25262306a36Sopenharmony_ci free_inode_nonrcu(inode); 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cistatic struct inode *alloc_inode(struct super_block *sb) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci const struct super_operations *ops = sb->s_op; 25862306a36Sopenharmony_ci struct inode *inode; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci if (ops->alloc_inode) 26162306a36Sopenharmony_ci inode = ops->alloc_inode(sb); 26262306a36Sopenharmony_ci else 26362306a36Sopenharmony_ci inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (!inode) 26662306a36Sopenharmony_ci return NULL; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci if (unlikely(inode_init_always(sb, inode))) { 26962306a36Sopenharmony_ci if (ops->destroy_inode) { 27062306a36Sopenharmony_ci ops->destroy_inode(inode); 27162306a36Sopenharmony_ci if (!ops->free_inode) 27262306a36Sopenharmony_ci return NULL; 27362306a36Sopenharmony_ci } 27462306a36Sopenharmony_ci inode->free_inode = ops->free_inode; 27562306a36Sopenharmony_ci i_callback(&inode->i_rcu); 27662306a36Sopenharmony_ci return NULL; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci return inode; 28062306a36Sopenharmony_ci} 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_civoid __destroy_inode(struct inode *inode) 28362306a36Sopenharmony_ci{ 28462306a36Sopenharmony_ci BUG_ON(inode_has_buffers(inode)); 28562306a36Sopenharmony_ci inode_detach_wb(inode); 28662306a36Sopenharmony_ci security_inode_free(inode); 28762306a36Sopenharmony_ci fsnotify_inode_delete(inode); 28862306a36Sopenharmony_ci locks_free_lock_context(inode); 28962306a36Sopenharmony_ci if (!inode->i_nlink) { 29062306a36Sopenharmony_ci WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); 29162306a36Sopenharmony_ci atomic_long_dec(&inode->i_sb->s_remove_count); 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL 29562306a36Sopenharmony_ci if (inode->i_acl && !is_uncached_acl(inode->i_acl)) 29662306a36Sopenharmony_ci posix_acl_release(inode->i_acl); 29762306a36Sopenharmony_ci if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl)) 29862306a36Sopenharmony_ci posix_acl_release(inode->i_default_acl); 29962306a36Sopenharmony_ci#endif 30062306a36Sopenharmony_ci this_cpu_dec(nr_inodes); 30162306a36Sopenharmony_ci} 30262306a36Sopenharmony_ciEXPORT_SYMBOL(__destroy_inode); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistatic void destroy_inode(struct inode *inode) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci const struct super_operations *ops = inode->i_sb->s_op; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci BUG_ON(!list_empty(&inode->i_lru)); 30962306a36Sopenharmony_ci __destroy_inode(inode); 31062306a36Sopenharmony_ci if (ops->destroy_inode) { 31162306a36Sopenharmony_ci ops->destroy_inode(inode); 31262306a36Sopenharmony_ci if (!ops->free_inode) 31362306a36Sopenharmony_ci return; 31462306a36Sopenharmony_ci } 31562306a36Sopenharmony_ci inode->free_inode = ops->free_inode; 31662306a36Sopenharmony_ci call_rcu(&inode->i_rcu, i_callback); 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci/** 32062306a36Sopenharmony_ci * drop_nlink - directly drop an inode's link count 32162306a36Sopenharmony_ci * @inode: inode 32262306a36Sopenharmony_ci * 32362306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any 32462306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink. In cases 32562306a36Sopenharmony_ci * where we are attempting to track writes to the 32662306a36Sopenharmony_ci * filesystem, a decrement to zero means an imminent 32762306a36Sopenharmony_ci * write when the file is truncated and actually unlinked 32862306a36Sopenharmony_ci * on the filesystem. 32962306a36Sopenharmony_ci */ 33062306a36Sopenharmony_civoid drop_nlink(struct inode *inode) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci WARN_ON(inode->i_nlink == 0); 33362306a36Sopenharmony_ci inode->__i_nlink--; 33462306a36Sopenharmony_ci if (!inode->i_nlink) 33562306a36Sopenharmony_ci atomic_long_inc(&inode->i_sb->s_remove_count); 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ciEXPORT_SYMBOL(drop_nlink); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci/** 34062306a36Sopenharmony_ci * clear_nlink - directly zero an inode's link count 34162306a36Sopenharmony_ci * @inode: inode 34262306a36Sopenharmony_ci * 34362306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any 34462306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink. See 34562306a36Sopenharmony_ci * drop_nlink() for why we care about i_nlink hitting zero. 34662306a36Sopenharmony_ci */ 34762306a36Sopenharmony_civoid clear_nlink(struct inode *inode) 34862306a36Sopenharmony_ci{ 34962306a36Sopenharmony_ci if (inode->i_nlink) { 35062306a36Sopenharmony_ci inode->__i_nlink = 0; 35162306a36Sopenharmony_ci atomic_long_inc(&inode->i_sb->s_remove_count); 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci} 35462306a36Sopenharmony_ciEXPORT_SYMBOL(clear_nlink); 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci/** 35762306a36Sopenharmony_ci * set_nlink - directly set an inode's link count 35862306a36Sopenharmony_ci * @inode: inode 35962306a36Sopenharmony_ci * @nlink: new nlink (should be non-zero) 36062306a36Sopenharmony_ci * 36162306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any 36262306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink. 36362306a36Sopenharmony_ci */ 36462306a36Sopenharmony_civoid set_nlink(struct inode *inode, unsigned int nlink) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci if (!nlink) { 36762306a36Sopenharmony_ci clear_nlink(inode); 36862306a36Sopenharmony_ci } else { 36962306a36Sopenharmony_ci /* Yes, some filesystems do change nlink from zero to one */ 37062306a36Sopenharmony_ci if (inode->i_nlink == 0) 37162306a36Sopenharmony_ci atomic_long_dec(&inode->i_sb->s_remove_count); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci inode->__i_nlink = nlink; 37462306a36Sopenharmony_ci } 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ciEXPORT_SYMBOL(set_nlink); 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci/** 37962306a36Sopenharmony_ci * inc_nlink - directly increment an inode's link count 38062306a36Sopenharmony_ci * @inode: inode 38162306a36Sopenharmony_ci * 38262306a36Sopenharmony_ci * This is a low-level filesystem helper to replace any 38362306a36Sopenharmony_ci * direct filesystem manipulation of i_nlink. Currently, 38462306a36Sopenharmony_ci * it is only here for parity with dec_nlink(). 38562306a36Sopenharmony_ci */ 38662306a36Sopenharmony_civoid inc_nlink(struct inode *inode) 38762306a36Sopenharmony_ci{ 38862306a36Sopenharmony_ci if (unlikely(inode->i_nlink == 0)) { 38962306a36Sopenharmony_ci WARN_ON(!(inode->i_state & I_LINKABLE)); 39062306a36Sopenharmony_ci atomic_long_dec(&inode->i_sb->s_remove_count); 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci inode->__i_nlink++; 39462306a36Sopenharmony_ci} 39562306a36Sopenharmony_ciEXPORT_SYMBOL(inc_nlink); 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_cistatic void __address_space_init_once(struct address_space *mapping) 39862306a36Sopenharmony_ci{ 39962306a36Sopenharmony_ci xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); 40062306a36Sopenharmony_ci init_rwsem(&mapping->i_mmap_rwsem); 40162306a36Sopenharmony_ci INIT_LIST_HEAD(&mapping->private_list); 40262306a36Sopenharmony_ci spin_lock_init(&mapping->private_lock); 40362306a36Sopenharmony_ci mapping->i_mmap = RB_ROOT_CACHED; 40462306a36Sopenharmony_ci} 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_civoid address_space_init_once(struct address_space *mapping) 40762306a36Sopenharmony_ci{ 40862306a36Sopenharmony_ci memset(mapping, 0, sizeof(*mapping)); 40962306a36Sopenharmony_ci __address_space_init_once(mapping); 41062306a36Sopenharmony_ci} 41162306a36Sopenharmony_ciEXPORT_SYMBOL(address_space_init_once); 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci/* 41462306a36Sopenharmony_ci * These are initializations that only need to be done 41562306a36Sopenharmony_ci * once, because the fields are idempotent across use 41662306a36Sopenharmony_ci * of the inode, so let the slab aware of that. 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_civoid inode_init_once(struct inode *inode) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci memset(inode, 0, sizeof(*inode)); 42162306a36Sopenharmony_ci INIT_HLIST_NODE(&inode->i_hash); 42262306a36Sopenharmony_ci INIT_LIST_HEAD(&inode->i_devices); 42362306a36Sopenharmony_ci INIT_LIST_HEAD(&inode->i_io_list); 42462306a36Sopenharmony_ci INIT_LIST_HEAD(&inode->i_wb_list); 42562306a36Sopenharmony_ci INIT_LIST_HEAD(&inode->i_lru); 42662306a36Sopenharmony_ci INIT_LIST_HEAD(&inode->i_sb_list); 42762306a36Sopenharmony_ci __address_space_init_once(&inode->i_data); 42862306a36Sopenharmony_ci i_size_ordered_init(inode); 42962306a36Sopenharmony_ci} 43062306a36Sopenharmony_ciEXPORT_SYMBOL(inode_init_once); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_cistatic void init_once(void *foo) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci struct inode *inode = (struct inode *) foo; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci inode_init_once(inode); 43762306a36Sopenharmony_ci} 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci/* 44062306a36Sopenharmony_ci * inode->i_lock must be held 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_civoid __iget(struct inode *inode) 44362306a36Sopenharmony_ci{ 44462306a36Sopenharmony_ci atomic_inc(&inode->i_count); 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci/* 44862306a36Sopenharmony_ci * get additional reference to inode; caller must already hold one. 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_civoid ihold(struct inode *inode) 45162306a36Sopenharmony_ci{ 45262306a36Sopenharmony_ci WARN_ON(atomic_inc_return(&inode->i_count) < 2); 45362306a36Sopenharmony_ci} 45462306a36Sopenharmony_ciEXPORT_SYMBOL(ihold); 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_cistatic void __inode_add_lru(struct inode *inode, bool rotate) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) 45962306a36Sopenharmony_ci return; 46062306a36Sopenharmony_ci if (atomic_read(&inode->i_count)) 46162306a36Sopenharmony_ci return; 46262306a36Sopenharmony_ci if (!(inode->i_sb->s_flags & SB_ACTIVE)) 46362306a36Sopenharmony_ci return; 46462306a36Sopenharmony_ci if (!mapping_shrinkable(&inode->i_data)) 46562306a36Sopenharmony_ci return; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) 46862306a36Sopenharmony_ci this_cpu_inc(nr_unused); 46962306a36Sopenharmony_ci else if (rotate) 47062306a36Sopenharmony_ci inode->i_state |= I_REFERENCED; 47162306a36Sopenharmony_ci} 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci/* 47462306a36Sopenharmony_ci * Add inode to LRU if needed (inode is unused and clean). 47562306a36Sopenharmony_ci * 47662306a36Sopenharmony_ci * Needs inode->i_lock held. 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_civoid inode_add_lru(struct inode *inode) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci __inode_add_lru(inode, false); 48162306a36Sopenharmony_ci} 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_cistatic void inode_lru_list_del(struct inode *inode) 48462306a36Sopenharmony_ci{ 48562306a36Sopenharmony_ci if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) 48662306a36Sopenharmony_ci this_cpu_dec(nr_unused); 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci/** 49062306a36Sopenharmony_ci * inode_sb_list_add - add inode to the superblock list of inodes 49162306a36Sopenharmony_ci * @inode: inode to add 49262306a36Sopenharmony_ci */ 49362306a36Sopenharmony_civoid inode_sb_list_add(struct inode *inode) 49462306a36Sopenharmony_ci{ 49562306a36Sopenharmony_ci spin_lock(&inode->i_sb->s_inode_list_lock); 49662306a36Sopenharmony_ci list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 49762306a36Sopenharmony_ci spin_unlock(&inode->i_sb->s_inode_list_lock); 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inode_sb_list_add); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_cistatic inline void inode_sb_list_del(struct inode *inode) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci if (!list_empty(&inode->i_sb_list)) { 50462306a36Sopenharmony_ci spin_lock(&inode->i_sb->s_inode_list_lock); 50562306a36Sopenharmony_ci list_del_init(&inode->i_sb_list); 50662306a36Sopenharmony_ci spin_unlock(&inode->i_sb->s_inode_list_lock); 50762306a36Sopenharmony_ci } 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_cistatic unsigned long hash(struct super_block *sb, unsigned long hashval) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci unsigned long tmp; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 51562306a36Sopenharmony_ci L1_CACHE_BYTES; 51662306a36Sopenharmony_ci tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); 51762306a36Sopenharmony_ci return tmp & i_hash_mask; 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci/** 52162306a36Sopenharmony_ci * __insert_inode_hash - hash an inode 52262306a36Sopenharmony_ci * @inode: unhashed inode 52362306a36Sopenharmony_ci * @hashval: unsigned long value used to locate this object in the 52462306a36Sopenharmony_ci * inode_hashtable. 52562306a36Sopenharmony_ci * 52662306a36Sopenharmony_ci * Add an inode to the inode hash for this superblock. 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_civoid __insert_inode_hash(struct inode *inode, unsigned long hashval) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 53362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 53462306a36Sopenharmony_ci hlist_add_head_rcu(&inode->i_hash, b); 53562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 53662306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 53762306a36Sopenharmony_ci} 53862306a36Sopenharmony_ciEXPORT_SYMBOL(__insert_inode_hash); 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci/** 54162306a36Sopenharmony_ci * __remove_inode_hash - remove an inode from the hash 54262306a36Sopenharmony_ci * @inode: inode to unhash 54362306a36Sopenharmony_ci * 54462306a36Sopenharmony_ci * Remove an inode from the superblock. 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_civoid __remove_inode_hash(struct inode *inode) 54762306a36Sopenharmony_ci{ 54862306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 54962306a36Sopenharmony_ci spin_lock(&inode->i_lock); 55062306a36Sopenharmony_ci hlist_del_init_rcu(&inode->i_hash); 55162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 55262306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 55362306a36Sopenharmony_ci} 55462306a36Sopenharmony_ciEXPORT_SYMBOL(__remove_inode_hash); 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_civoid dump_mapping(const struct address_space *mapping) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci struct inode *host; 55962306a36Sopenharmony_ci const struct address_space_operations *a_ops; 56062306a36Sopenharmony_ci struct hlist_node *dentry_first; 56162306a36Sopenharmony_ci struct dentry *dentry_ptr; 56262306a36Sopenharmony_ci struct dentry dentry; 56362306a36Sopenharmony_ci unsigned long ino; 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci /* 56662306a36Sopenharmony_ci * If mapping is an invalid pointer, we don't want to crash 56762306a36Sopenharmony_ci * accessing it, so probe everything depending on it carefully. 56862306a36Sopenharmony_ci */ 56962306a36Sopenharmony_ci if (get_kernel_nofault(host, &mapping->host) || 57062306a36Sopenharmony_ci get_kernel_nofault(a_ops, &mapping->a_ops)) { 57162306a36Sopenharmony_ci pr_warn("invalid mapping:%px\n", mapping); 57262306a36Sopenharmony_ci return; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (!host) { 57662306a36Sopenharmony_ci pr_warn("aops:%ps\n", a_ops); 57762306a36Sopenharmony_ci return; 57862306a36Sopenharmony_ci } 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (get_kernel_nofault(dentry_first, &host->i_dentry.first) || 58162306a36Sopenharmony_ci get_kernel_nofault(ino, &host->i_ino)) { 58262306a36Sopenharmony_ci pr_warn("aops:%ps invalid inode:%px\n", a_ops, host); 58362306a36Sopenharmony_ci return; 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci if (!dentry_first) { 58762306a36Sopenharmony_ci pr_warn("aops:%ps ino:%lx\n", a_ops, ino); 58862306a36Sopenharmony_ci return; 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); 59262306a36Sopenharmony_ci if (get_kernel_nofault(dentry, dentry_ptr)) { 59362306a36Sopenharmony_ci pr_warn("aops:%ps ino:%lx invalid dentry:%px\n", 59462306a36Sopenharmony_ci a_ops, ino, dentry_ptr); 59562306a36Sopenharmony_ci return; 59662306a36Sopenharmony_ci } 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci /* 59962306a36Sopenharmony_ci * if dentry is corrupted, the %pd handler may still crash, 60062306a36Sopenharmony_ci * but it's unlikely that we reach here with a corrupt mapping 60162306a36Sopenharmony_ci */ 60262306a36Sopenharmony_ci pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry); 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_civoid clear_inode(struct inode *inode) 60662306a36Sopenharmony_ci{ 60762306a36Sopenharmony_ci /* 60862306a36Sopenharmony_ci * We have to cycle the i_pages lock here because reclaim can be in the 60962306a36Sopenharmony_ci * process of removing the last page (in __filemap_remove_folio()) 61062306a36Sopenharmony_ci * and we must not free the mapping under it. 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci xa_lock_irq(&inode->i_data.i_pages); 61362306a36Sopenharmony_ci BUG_ON(inode->i_data.nrpages); 61462306a36Sopenharmony_ci /* 61562306a36Sopenharmony_ci * Almost always, mapping_empty(&inode->i_data) here; but there are 61662306a36Sopenharmony_ci * two known and long-standing ways in which nodes may get left behind 61762306a36Sopenharmony_ci * (when deep radix-tree node allocation failed partway; or when THP 61862306a36Sopenharmony_ci * collapse_file() failed). Until those two known cases are cleaned up, 61962306a36Sopenharmony_ci * or a cleanup function is called here, do not BUG_ON(!mapping_empty), 62062306a36Sopenharmony_ci * nor even WARN_ON(!mapping_empty). 62162306a36Sopenharmony_ci */ 62262306a36Sopenharmony_ci xa_unlock_irq(&inode->i_data.i_pages); 62362306a36Sopenharmony_ci BUG_ON(!list_empty(&inode->i_data.private_list)); 62462306a36Sopenharmony_ci BUG_ON(!(inode->i_state & I_FREEING)); 62562306a36Sopenharmony_ci BUG_ON(inode->i_state & I_CLEAR); 62662306a36Sopenharmony_ci BUG_ON(!list_empty(&inode->i_wb_list)); 62762306a36Sopenharmony_ci /* don't need i_lock here, no concurrent mods to i_state */ 62862306a36Sopenharmony_ci inode->i_state = I_FREEING | I_CLEAR; 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ciEXPORT_SYMBOL(clear_inode); 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci/* 63362306a36Sopenharmony_ci * Free the inode passed in, removing it from the lists it is still connected 63462306a36Sopenharmony_ci * to. We remove any pages still attached to the inode and wait for any IO that 63562306a36Sopenharmony_ci * is still in progress before finally destroying the inode. 63662306a36Sopenharmony_ci * 63762306a36Sopenharmony_ci * An inode must already be marked I_FREEING so that we avoid the inode being 63862306a36Sopenharmony_ci * moved back onto lists if we race with other code that manipulates the lists 63962306a36Sopenharmony_ci * (e.g. writeback_single_inode). The caller is responsible for setting this. 64062306a36Sopenharmony_ci * 64162306a36Sopenharmony_ci * An inode must already be removed from the LRU list before being evicted from 64262306a36Sopenharmony_ci * the cache. This should occur atomically with setting the I_FREEING state 64362306a36Sopenharmony_ci * flag, so no inodes here should ever be on the LRU when being evicted. 64462306a36Sopenharmony_ci */ 64562306a36Sopenharmony_cistatic void evict(struct inode *inode) 64662306a36Sopenharmony_ci{ 64762306a36Sopenharmony_ci const struct super_operations *op = inode->i_sb->s_op; 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci BUG_ON(!(inode->i_state & I_FREEING)); 65062306a36Sopenharmony_ci BUG_ON(!list_empty(&inode->i_lru)); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci if (!list_empty(&inode->i_io_list)) 65362306a36Sopenharmony_ci inode_io_list_del(inode); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci inode_sb_list_del(inode); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci /* 65862306a36Sopenharmony_ci * Wait for flusher thread to be done with the inode so that filesystem 65962306a36Sopenharmony_ci * does not start destroying it while writeback is still running. Since 66062306a36Sopenharmony_ci * the inode has I_FREEING set, flusher thread won't start new work on 66162306a36Sopenharmony_ci * the inode. We just have to wait for running writeback to finish. 66262306a36Sopenharmony_ci */ 66362306a36Sopenharmony_ci inode_wait_for_writeback(inode); 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci if (op->evict_inode) { 66662306a36Sopenharmony_ci op->evict_inode(inode); 66762306a36Sopenharmony_ci } else { 66862306a36Sopenharmony_ci truncate_inode_pages_final(&inode->i_data); 66962306a36Sopenharmony_ci clear_inode(inode); 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci if (S_ISCHR(inode->i_mode) && inode->i_cdev) 67262306a36Sopenharmony_ci cd_forget(inode); 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci remove_inode_hash(inode); 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci spin_lock(&inode->i_lock); 67762306a36Sopenharmony_ci wake_up_bit(&inode->i_state, __I_NEW); 67862306a36Sopenharmony_ci BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 67962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci destroy_inode(inode); 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci/* 68562306a36Sopenharmony_ci * dispose_list - dispose of the contents of a local list 68662306a36Sopenharmony_ci * @head: the head of the list to free 68762306a36Sopenharmony_ci * 68862306a36Sopenharmony_ci * Dispose-list gets a local list with local inodes in it, so it doesn't 68962306a36Sopenharmony_ci * need to worry about list corruption and SMP locks. 69062306a36Sopenharmony_ci */ 69162306a36Sopenharmony_cistatic void dispose_list(struct list_head *head) 69262306a36Sopenharmony_ci{ 69362306a36Sopenharmony_ci while (!list_empty(head)) { 69462306a36Sopenharmony_ci struct inode *inode; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci inode = list_first_entry(head, struct inode, i_lru); 69762306a36Sopenharmony_ci list_del_init(&inode->i_lru); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci evict(inode); 70062306a36Sopenharmony_ci cond_resched(); 70162306a36Sopenharmony_ci } 70262306a36Sopenharmony_ci} 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci/** 70562306a36Sopenharmony_ci * evict_inodes - evict all evictable inodes for a superblock 70662306a36Sopenharmony_ci * @sb: superblock to operate on 70762306a36Sopenharmony_ci * 70862306a36Sopenharmony_ci * Make sure that no inodes with zero refcount are retained. This is 70962306a36Sopenharmony_ci * called by superblock shutdown after having SB_ACTIVE flag removed, 71062306a36Sopenharmony_ci * so any inode reaching zero refcount during or after that call will 71162306a36Sopenharmony_ci * be immediately evicted. 71262306a36Sopenharmony_ci */ 71362306a36Sopenharmony_civoid evict_inodes(struct super_block *sb) 71462306a36Sopenharmony_ci{ 71562306a36Sopenharmony_ci struct inode *inode, *next; 71662306a36Sopenharmony_ci LIST_HEAD(dispose); 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ciagain: 71962306a36Sopenharmony_ci spin_lock(&sb->s_inode_list_lock); 72062306a36Sopenharmony_ci list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 72162306a36Sopenharmony_ci if (atomic_read(&inode->i_count)) 72262306a36Sopenharmony_ci continue; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci spin_lock(&inode->i_lock); 72562306a36Sopenharmony_ci if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 72662306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 72762306a36Sopenharmony_ci continue; 72862306a36Sopenharmony_ci } 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci inode->i_state |= I_FREEING; 73162306a36Sopenharmony_ci inode_lru_list_del(inode); 73262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 73362306a36Sopenharmony_ci list_add(&inode->i_lru, &dispose); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* 73662306a36Sopenharmony_ci * We can have a ton of inodes to evict at unmount time given 73762306a36Sopenharmony_ci * enough memory, check to see if we need to go to sleep for a 73862306a36Sopenharmony_ci * bit so we don't livelock. 73962306a36Sopenharmony_ci */ 74062306a36Sopenharmony_ci if (need_resched()) { 74162306a36Sopenharmony_ci spin_unlock(&sb->s_inode_list_lock); 74262306a36Sopenharmony_ci cond_resched(); 74362306a36Sopenharmony_ci dispose_list(&dispose); 74462306a36Sopenharmony_ci goto again; 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci } 74762306a36Sopenharmony_ci spin_unlock(&sb->s_inode_list_lock); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci dispose_list(&dispose); 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(evict_inodes); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci/** 75462306a36Sopenharmony_ci * invalidate_inodes - attempt to free all inodes on a superblock 75562306a36Sopenharmony_ci * @sb: superblock to operate on 75662306a36Sopenharmony_ci * 75762306a36Sopenharmony_ci * Attempts to free all inodes (including dirty inodes) for a given superblock. 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_civoid invalidate_inodes(struct super_block *sb) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci struct inode *inode, *next; 76262306a36Sopenharmony_ci LIST_HEAD(dispose); 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ciagain: 76562306a36Sopenharmony_ci spin_lock(&sb->s_inode_list_lock); 76662306a36Sopenharmony_ci list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 76762306a36Sopenharmony_ci spin_lock(&inode->i_lock); 76862306a36Sopenharmony_ci if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 76962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 77062306a36Sopenharmony_ci continue; 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci if (atomic_read(&inode->i_count)) { 77362306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 77462306a36Sopenharmony_ci continue; 77562306a36Sopenharmony_ci } 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci inode->i_state |= I_FREEING; 77862306a36Sopenharmony_ci inode_lru_list_del(inode); 77962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 78062306a36Sopenharmony_ci list_add(&inode->i_lru, &dispose); 78162306a36Sopenharmony_ci if (need_resched()) { 78262306a36Sopenharmony_ci spin_unlock(&sb->s_inode_list_lock); 78362306a36Sopenharmony_ci cond_resched(); 78462306a36Sopenharmony_ci dispose_list(&dispose); 78562306a36Sopenharmony_ci goto again; 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci } 78862306a36Sopenharmony_ci spin_unlock(&sb->s_inode_list_lock); 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci dispose_list(&dispose); 79162306a36Sopenharmony_ci} 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci/* 79462306a36Sopenharmony_ci * Isolate the inode from the LRU in preparation for freeing it. 79562306a36Sopenharmony_ci * 79662306a36Sopenharmony_ci * If the inode has the I_REFERENCED flag set, then it means that it has been 79762306a36Sopenharmony_ci * used recently - the flag is set in iput_final(). When we encounter such an 79862306a36Sopenharmony_ci * inode, clear the flag and move it to the back of the LRU so it gets another 79962306a36Sopenharmony_ci * pass through the LRU before it gets reclaimed. This is necessary because of 80062306a36Sopenharmony_ci * the fact we are doing lazy LRU updates to minimise lock contention so the 80162306a36Sopenharmony_ci * LRU does not have strict ordering. Hence we don't want to reclaim inodes 80262306a36Sopenharmony_ci * with this flag set because they are the inodes that are out of order. 80362306a36Sopenharmony_ci */ 80462306a36Sopenharmony_cistatic enum lru_status inode_lru_isolate(struct list_head *item, 80562306a36Sopenharmony_ci struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 80662306a36Sopenharmony_ci{ 80762306a36Sopenharmony_ci struct list_head *freeable = arg; 80862306a36Sopenharmony_ci struct inode *inode = container_of(item, struct inode, i_lru); 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci /* 81162306a36Sopenharmony_ci * We are inverting the lru lock/inode->i_lock here, so use a 81262306a36Sopenharmony_ci * trylock. If we fail to get the lock, just skip it. 81362306a36Sopenharmony_ci */ 81462306a36Sopenharmony_ci if (!spin_trylock(&inode->i_lock)) 81562306a36Sopenharmony_ci return LRU_SKIP; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci /* 81862306a36Sopenharmony_ci * Inodes can get referenced, redirtied, or repopulated while 81962306a36Sopenharmony_ci * they're already on the LRU, and this can make them 82062306a36Sopenharmony_ci * unreclaimable for a while. Remove them lazily here; iput, 82162306a36Sopenharmony_ci * sync, or the last page cache deletion will requeue them. 82262306a36Sopenharmony_ci */ 82362306a36Sopenharmony_ci if (atomic_read(&inode->i_count) || 82462306a36Sopenharmony_ci (inode->i_state & ~I_REFERENCED) || 82562306a36Sopenharmony_ci !mapping_shrinkable(&inode->i_data)) { 82662306a36Sopenharmony_ci list_lru_isolate(lru, &inode->i_lru); 82762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 82862306a36Sopenharmony_ci this_cpu_dec(nr_unused); 82962306a36Sopenharmony_ci return LRU_REMOVED; 83062306a36Sopenharmony_ci } 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci /* Recently referenced inodes get one more pass */ 83362306a36Sopenharmony_ci if (inode->i_state & I_REFERENCED) { 83462306a36Sopenharmony_ci inode->i_state &= ~I_REFERENCED; 83562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 83662306a36Sopenharmony_ci return LRU_ROTATE; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci /* 84062306a36Sopenharmony_ci * On highmem systems, mapping_shrinkable() permits dropping 84162306a36Sopenharmony_ci * page cache in order to free up struct inodes: lowmem might 84262306a36Sopenharmony_ci * be under pressure before the cache inside the highmem zone. 84362306a36Sopenharmony_ci */ 84462306a36Sopenharmony_ci if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { 84562306a36Sopenharmony_ci __iget(inode); 84662306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 84762306a36Sopenharmony_ci spin_unlock(lru_lock); 84862306a36Sopenharmony_ci if (remove_inode_buffers(inode)) { 84962306a36Sopenharmony_ci unsigned long reap; 85062306a36Sopenharmony_ci reap = invalidate_mapping_pages(&inode->i_data, 0, -1); 85162306a36Sopenharmony_ci if (current_is_kswapd()) 85262306a36Sopenharmony_ci __count_vm_events(KSWAPD_INODESTEAL, reap); 85362306a36Sopenharmony_ci else 85462306a36Sopenharmony_ci __count_vm_events(PGINODESTEAL, reap); 85562306a36Sopenharmony_ci mm_account_reclaimed_pages(reap); 85662306a36Sopenharmony_ci } 85762306a36Sopenharmony_ci iput(inode); 85862306a36Sopenharmony_ci spin_lock(lru_lock); 85962306a36Sopenharmony_ci return LRU_RETRY; 86062306a36Sopenharmony_ci } 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci WARN_ON(inode->i_state & I_NEW); 86362306a36Sopenharmony_ci inode->i_state |= I_FREEING; 86462306a36Sopenharmony_ci list_lru_isolate_move(lru, &inode->i_lru, freeable); 86562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci this_cpu_dec(nr_unused); 86862306a36Sopenharmony_ci return LRU_REMOVED; 86962306a36Sopenharmony_ci} 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci/* 87262306a36Sopenharmony_ci * Walk the superblock inode LRU for freeable inodes and attempt to free them. 87362306a36Sopenharmony_ci * This is called from the superblock shrinker function with a number of inodes 87462306a36Sopenharmony_ci * to trim from the LRU. Inodes to be freed are moved to a temporary list and 87562306a36Sopenharmony_ci * then are freed outside inode_lock by dispose_list(). 87662306a36Sopenharmony_ci */ 87762306a36Sopenharmony_cilong prune_icache_sb(struct super_block *sb, struct shrink_control *sc) 87862306a36Sopenharmony_ci{ 87962306a36Sopenharmony_ci LIST_HEAD(freeable); 88062306a36Sopenharmony_ci long freed; 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci freed = list_lru_shrink_walk(&sb->s_inode_lru, sc, 88362306a36Sopenharmony_ci inode_lru_isolate, &freeable); 88462306a36Sopenharmony_ci dispose_list(&freeable); 88562306a36Sopenharmony_ci return freed; 88662306a36Sopenharmony_ci} 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_cistatic void __wait_on_freeing_inode(struct inode *inode); 88962306a36Sopenharmony_ci/* 89062306a36Sopenharmony_ci * Called with the inode lock held. 89162306a36Sopenharmony_ci */ 89262306a36Sopenharmony_cistatic struct inode *find_inode(struct super_block *sb, 89362306a36Sopenharmony_ci struct hlist_head *head, 89462306a36Sopenharmony_ci int (*test)(struct inode *, void *), 89562306a36Sopenharmony_ci void *data) 89662306a36Sopenharmony_ci{ 89762306a36Sopenharmony_ci struct inode *inode = NULL; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_cirepeat: 90062306a36Sopenharmony_ci hlist_for_each_entry(inode, head, i_hash) { 90162306a36Sopenharmony_ci if (inode->i_sb != sb) 90262306a36Sopenharmony_ci continue; 90362306a36Sopenharmony_ci if (!test(inode, data)) 90462306a36Sopenharmony_ci continue; 90562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 90662306a36Sopenharmony_ci if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 90762306a36Sopenharmony_ci __wait_on_freeing_inode(inode); 90862306a36Sopenharmony_ci goto repeat; 90962306a36Sopenharmony_ci } 91062306a36Sopenharmony_ci if (unlikely(inode->i_state & I_CREATING)) { 91162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 91262306a36Sopenharmony_ci return ERR_PTR(-ESTALE); 91362306a36Sopenharmony_ci } 91462306a36Sopenharmony_ci __iget(inode); 91562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 91662306a36Sopenharmony_ci return inode; 91762306a36Sopenharmony_ci } 91862306a36Sopenharmony_ci return NULL; 91962306a36Sopenharmony_ci} 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci/* 92262306a36Sopenharmony_ci * find_inode_fast is the fast path version of find_inode, see the comment at 92362306a36Sopenharmony_ci * iget_locked for details. 92462306a36Sopenharmony_ci */ 92562306a36Sopenharmony_cistatic struct inode *find_inode_fast(struct super_block *sb, 92662306a36Sopenharmony_ci struct hlist_head *head, unsigned long ino) 92762306a36Sopenharmony_ci{ 92862306a36Sopenharmony_ci struct inode *inode = NULL; 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_cirepeat: 93162306a36Sopenharmony_ci hlist_for_each_entry(inode, head, i_hash) { 93262306a36Sopenharmony_ci if (inode->i_ino != ino) 93362306a36Sopenharmony_ci continue; 93462306a36Sopenharmony_ci if (inode->i_sb != sb) 93562306a36Sopenharmony_ci continue; 93662306a36Sopenharmony_ci spin_lock(&inode->i_lock); 93762306a36Sopenharmony_ci if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 93862306a36Sopenharmony_ci __wait_on_freeing_inode(inode); 93962306a36Sopenharmony_ci goto repeat; 94062306a36Sopenharmony_ci } 94162306a36Sopenharmony_ci if (unlikely(inode->i_state & I_CREATING)) { 94262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 94362306a36Sopenharmony_ci return ERR_PTR(-ESTALE); 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci __iget(inode); 94662306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 94762306a36Sopenharmony_ci return inode; 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci return NULL; 95062306a36Sopenharmony_ci} 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci/* 95362306a36Sopenharmony_ci * Each cpu owns a range of LAST_INO_BATCH numbers. 95462306a36Sopenharmony_ci * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, 95562306a36Sopenharmony_ci * to renew the exhausted range. 95662306a36Sopenharmony_ci * 95762306a36Sopenharmony_ci * This does not significantly increase overflow rate because every CPU can 95862306a36Sopenharmony_ci * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is 95962306a36Sopenharmony_ci * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the 96062306a36Sopenharmony_ci * 2^32 range, and is a worst-case. Even a 50% wastage would only increase 96162306a36Sopenharmony_ci * overflow rate by 2x, which does not seem too significant. 96262306a36Sopenharmony_ci * 96362306a36Sopenharmony_ci * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 96462306a36Sopenharmony_ci * error if st_ino won't fit in target struct field. Use 32bit counter 96562306a36Sopenharmony_ci * here to attempt to avoid that. 96662306a36Sopenharmony_ci */ 96762306a36Sopenharmony_ci#define LAST_INO_BATCH 1024 96862306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned int, last_ino); 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ciunsigned int get_next_ino(void) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci unsigned int *p = &get_cpu_var(last_ino); 97362306a36Sopenharmony_ci unsigned int res = *p; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci#ifdef CONFIG_SMP 97662306a36Sopenharmony_ci if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { 97762306a36Sopenharmony_ci static atomic_t shared_last_ino; 97862306a36Sopenharmony_ci int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci res = next - LAST_INO_BATCH; 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci#endif 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci res++; 98562306a36Sopenharmony_ci /* get_next_ino should not provide a 0 inode number */ 98662306a36Sopenharmony_ci if (unlikely(!res)) 98762306a36Sopenharmony_ci res++; 98862306a36Sopenharmony_ci *p = res; 98962306a36Sopenharmony_ci put_cpu_var(last_ino); 99062306a36Sopenharmony_ci return res; 99162306a36Sopenharmony_ci} 99262306a36Sopenharmony_ciEXPORT_SYMBOL(get_next_ino); 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci/** 99562306a36Sopenharmony_ci * new_inode_pseudo - obtain an inode 99662306a36Sopenharmony_ci * @sb: superblock 99762306a36Sopenharmony_ci * 99862306a36Sopenharmony_ci * Allocates a new inode for given superblock. 99962306a36Sopenharmony_ci * Inode wont be chained in superblock s_inodes list 100062306a36Sopenharmony_ci * This means : 100162306a36Sopenharmony_ci * - fs can't be unmount 100262306a36Sopenharmony_ci * - quotas, fsnotify, writeback can't work 100362306a36Sopenharmony_ci */ 100462306a36Sopenharmony_cistruct inode *new_inode_pseudo(struct super_block *sb) 100562306a36Sopenharmony_ci{ 100662306a36Sopenharmony_ci struct inode *inode = alloc_inode(sb); 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci if (inode) { 100962306a36Sopenharmony_ci spin_lock(&inode->i_lock); 101062306a36Sopenharmony_ci inode->i_state = 0; 101162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 101262306a36Sopenharmony_ci } 101362306a36Sopenharmony_ci return inode; 101462306a36Sopenharmony_ci} 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci/** 101762306a36Sopenharmony_ci * new_inode - obtain an inode 101862306a36Sopenharmony_ci * @sb: superblock 101962306a36Sopenharmony_ci * 102062306a36Sopenharmony_ci * Allocates a new inode for given superblock. The default gfp_mask 102162306a36Sopenharmony_ci * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 102262306a36Sopenharmony_ci * If HIGHMEM pages are unsuitable or it is known that pages allocated 102362306a36Sopenharmony_ci * for the page cache are not reclaimable or migratable, 102462306a36Sopenharmony_ci * mapping_set_gfp_mask() must be called with suitable flags on the 102562306a36Sopenharmony_ci * newly created inode's mapping 102662306a36Sopenharmony_ci * 102762306a36Sopenharmony_ci */ 102862306a36Sopenharmony_cistruct inode *new_inode(struct super_block *sb) 102962306a36Sopenharmony_ci{ 103062306a36Sopenharmony_ci struct inode *inode; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci inode = new_inode_pseudo(sb); 103362306a36Sopenharmony_ci if (inode) 103462306a36Sopenharmony_ci inode_sb_list_add(inode); 103562306a36Sopenharmony_ci return inode; 103662306a36Sopenharmony_ci} 103762306a36Sopenharmony_ciEXPORT_SYMBOL(new_inode); 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC 104062306a36Sopenharmony_civoid lockdep_annotate_inode_mutex_key(struct inode *inode) 104162306a36Sopenharmony_ci{ 104262306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 104362306a36Sopenharmony_ci struct file_system_type *type = inode->i_sb->s_type; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci /* Set new key only if filesystem hasn't already changed it */ 104662306a36Sopenharmony_ci if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { 104762306a36Sopenharmony_ci /* 104862306a36Sopenharmony_ci * ensure nobody is actually holding i_mutex 104962306a36Sopenharmony_ci */ 105062306a36Sopenharmony_ci // mutex_destroy(&inode->i_mutex); 105162306a36Sopenharmony_ci init_rwsem(&inode->i_rwsem); 105262306a36Sopenharmony_ci lockdep_set_class(&inode->i_rwsem, 105362306a36Sopenharmony_ci &type->i_mutex_dir_key); 105462306a36Sopenharmony_ci } 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci} 105762306a36Sopenharmony_ciEXPORT_SYMBOL(lockdep_annotate_inode_mutex_key); 105862306a36Sopenharmony_ci#endif 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci/** 106162306a36Sopenharmony_ci * unlock_new_inode - clear the I_NEW state and wake up any waiters 106262306a36Sopenharmony_ci * @inode: new inode to unlock 106362306a36Sopenharmony_ci * 106462306a36Sopenharmony_ci * Called when the inode is fully initialised to clear the new state of the 106562306a36Sopenharmony_ci * inode and wake up anyone waiting for the inode to finish initialisation. 106662306a36Sopenharmony_ci */ 106762306a36Sopenharmony_civoid unlock_new_inode(struct inode *inode) 106862306a36Sopenharmony_ci{ 106962306a36Sopenharmony_ci lockdep_annotate_inode_mutex_key(inode); 107062306a36Sopenharmony_ci spin_lock(&inode->i_lock); 107162306a36Sopenharmony_ci WARN_ON(!(inode->i_state & I_NEW)); 107262306a36Sopenharmony_ci inode->i_state &= ~I_NEW & ~I_CREATING; 107362306a36Sopenharmony_ci smp_mb(); 107462306a36Sopenharmony_ci wake_up_bit(&inode->i_state, __I_NEW); 107562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ciEXPORT_SYMBOL(unlock_new_inode); 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_civoid discard_new_inode(struct inode *inode) 108062306a36Sopenharmony_ci{ 108162306a36Sopenharmony_ci lockdep_annotate_inode_mutex_key(inode); 108262306a36Sopenharmony_ci spin_lock(&inode->i_lock); 108362306a36Sopenharmony_ci WARN_ON(!(inode->i_state & I_NEW)); 108462306a36Sopenharmony_ci inode->i_state &= ~I_NEW; 108562306a36Sopenharmony_ci smp_mb(); 108662306a36Sopenharmony_ci wake_up_bit(&inode->i_state, __I_NEW); 108762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 108862306a36Sopenharmony_ci iput(inode); 108962306a36Sopenharmony_ci} 109062306a36Sopenharmony_ciEXPORT_SYMBOL(discard_new_inode); 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci/** 109362306a36Sopenharmony_ci * lock_two_inodes - lock two inodes (may be regular files but also dirs) 109462306a36Sopenharmony_ci * 109562306a36Sopenharmony_ci * Lock any non-NULL argument. The caller must make sure that if he is passing 109662306a36Sopenharmony_ci * in two directories, one is not ancestor of the other. Zero, one or two 109762306a36Sopenharmony_ci * objects may be locked by this function. 109862306a36Sopenharmony_ci * 109962306a36Sopenharmony_ci * @inode1: first inode to lock 110062306a36Sopenharmony_ci * @inode2: second inode to lock 110162306a36Sopenharmony_ci * @subclass1: inode lock subclass for the first lock obtained 110262306a36Sopenharmony_ci * @subclass2: inode lock subclass for the second lock obtained 110362306a36Sopenharmony_ci */ 110462306a36Sopenharmony_civoid lock_two_inodes(struct inode *inode1, struct inode *inode2, 110562306a36Sopenharmony_ci unsigned subclass1, unsigned subclass2) 110662306a36Sopenharmony_ci{ 110762306a36Sopenharmony_ci if (!inode1 || !inode2) { 110862306a36Sopenharmony_ci /* 110962306a36Sopenharmony_ci * Make sure @subclass1 will be used for the acquired lock. 111062306a36Sopenharmony_ci * This is not strictly necessary (no current caller cares) but 111162306a36Sopenharmony_ci * let's keep things consistent. 111262306a36Sopenharmony_ci */ 111362306a36Sopenharmony_ci if (!inode1) 111462306a36Sopenharmony_ci swap(inode1, inode2); 111562306a36Sopenharmony_ci goto lock; 111662306a36Sopenharmony_ci } 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci /* 111962306a36Sopenharmony_ci * If one object is directory and the other is not, we must make sure 112062306a36Sopenharmony_ci * to lock directory first as the other object may be its child. 112162306a36Sopenharmony_ci */ 112262306a36Sopenharmony_ci if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) { 112362306a36Sopenharmony_ci if (inode1 > inode2) 112462306a36Sopenharmony_ci swap(inode1, inode2); 112562306a36Sopenharmony_ci } else if (!S_ISDIR(inode1->i_mode)) 112662306a36Sopenharmony_ci swap(inode1, inode2); 112762306a36Sopenharmony_cilock: 112862306a36Sopenharmony_ci if (inode1) 112962306a36Sopenharmony_ci inode_lock_nested(inode1, subclass1); 113062306a36Sopenharmony_ci if (inode2 && inode2 != inode1) 113162306a36Sopenharmony_ci inode_lock_nested(inode2, subclass2); 113262306a36Sopenharmony_ci} 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci/** 113562306a36Sopenharmony_ci * lock_two_nondirectories - take two i_mutexes on non-directory objects 113662306a36Sopenharmony_ci * 113762306a36Sopenharmony_ci * Lock any non-NULL argument. Passed objects must not be directories. 113862306a36Sopenharmony_ci * Zero, one or two objects may be locked by this function. 113962306a36Sopenharmony_ci * 114062306a36Sopenharmony_ci * @inode1: first inode to lock 114162306a36Sopenharmony_ci * @inode2: second inode to lock 114262306a36Sopenharmony_ci */ 114362306a36Sopenharmony_civoid lock_two_nondirectories(struct inode *inode1, struct inode *inode2) 114462306a36Sopenharmony_ci{ 114562306a36Sopenharmony_ci if (inode1) 114662306a36Sopenharmony_ci WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); 114762306a36Sopenharmony_ci if (inode2) 114862306a36Sopenharmony_ci WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); 114962306a36Sopenharmony_ci lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); 115062306a36Sopenharmony_ci} 115162306a36Sopenharmony_ciEXPORT_SYMBOL(lock_two_nondirectories); 115262306a36Sopenharmony_ci 115362306a36Sopenharmony_ci/** 115462306a36Sopenharmony_ci * unlock_two_nondirectories - release locks from lock_two_nondirectories() 115562306a36Sopenharmony_ci * @inode1: first inode to unlock 115662306a36Sopenharmony_ci * @inode2: second inode to unlock 115762306a36Sopenharmony_ci */ 115862306a36Sopenharmony_civoid unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) 115962306a36Sopenharmony_ci{ 116062306a36Sopenharmony_ci if (inode1) { 116162306a36Sopenharmony_ci WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); 116262306a36Sopenharmony_ci inode_unlock(inode1); 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci if (inode2 && inode2 != inode1) { 116562306a36Sopenharmony_ci WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); 116662306a36Sopenharmony_ci inode_unlock(inode2); 116762306a36Sopenharmony_ci } 116862306a36Sopenharmony_ci} 116962306a36Sopenharmony_ciEXPORT_SYMBOL(unlock_two_nondirectories); 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci/** 117262306a36Sopenharmony_ci * inode_insert5 - obtain an inode from a mounted file system 117362306a36Sopenharmony_ci * @inode: pre-allocated inode to use for insert to cache 117462306a36Sopenharmony_ci * @hashval: hash value (usually inode number) to get 117562306a36Sopenharmony_ci * @test: callback used for comparisons between inodes 117662306a36Sopenharmony_ci * @set: callback used to initialize a new struct inode 117762306a36Sopenharmony_ci * @data: opaque data pointer to pass to @test and @set 117862306a36Sopenharmony_ci * 117962306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache, 118062306a36Sopenharmony_ci * and if present it is return it with an increased reference count. This is 118162306a36Sopenharmony_ci * a variant of iget5_locked() for callers that don't want to fail on memory 118262306a36Sopenharmony_ci * allocation of inode. 118362306a36Sopenharmony_ci * 118462306a36Sopenharmony_ci * If the inode is not in cache, insert the pre-allocated inode to cache and 118562306a36Sopenharmony_ci * return it locked, hashed, and with the I_NEW flag set. The file system gets 118662306a36Sopenharmony_ci * to fill it in before unlocking it via unlock_new_inode(). 118762306a36Sopenharmony_ci * 118862306a36Sopenharmony_ci * Note both @test and @set are called with the inode_hash_lock held, so can't 118962306a36Sopenharmony_ci * sleep. 119062306a36Sopenharmony_ci */ 119162306a36Sopenharmony_cistruct inode *inode_insert5(struct inode *inode, unsigned long hashval, 119262306a36Sopenharmony_ci int (*test)(struct inode *, void *), 119362306a36Sopenharmony_ci int (*set)(struct inode *, void *), void *data) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 119662306a36Sopenharmony_ci struct inode *old; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ciagain: 119962306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 120062306a36Sopenharmony_ci old = find_inode(inode->i_sb, head, test, data); 120162306a36Sopenharmony_ci if (unlikely(old)) { 120262306a36Sopenharmony_ci /* 120362306a36Sopenharmony_ci * Uhhuh, somebody else created the same inode under us. 120462306a36Sopenharmony_ci * Use the old inode instead of the preallocated one. 120562306a36Sopenharmony_ci */ 120662306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 120762306a36Sopenharmony_ci if (IS_ERR(old)) 120862306a36Sopenharmony_ci return NULL; 120962306a36Sopenharmony_ci wait_on_inode(old); 121062306a36Sopenharmony_ci if (unlikely(inode_unhashed(old))) { 121162306a36Sopenharmony_ci iput(old); 121262306a36Sopenharmony_ci goto again; 121362306a36Sopenharmony_ci } 121462306a36Sopenharmony_ci return old; 121562306a36Sopenharmony_ci } 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci if (set && unlikely(set(inode, data))) { 121862306a36Sopenharmony_ci inode = NULL; 121962306a36Sopenharmony_ci goto unlock; 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci /* 122362306a36Sopenharmony_ci * Return the locked inode with I_NEW set, the 122462306a36Sopenharmony_ci * caller is responsible for filling in the contents 122562306a36Sopenharmony_ci */ 122662306a36Sopenharmony_ci spin_lock(&inode->i_lock); 122762306a36Sopenharmony_ci inode->i_state |= I_NEW; 122862306a36Sopenharmony_ci hlist_add_head_rcu(&inode->i_hash, head); 122962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci /* 123262306a36Sopenharmony_ci * Add inode to the sb list if it's not already. It has I_NEW at this 123362306a36Sopenharmony_ci * point, so it should be safe to test i_sb_list locklessly. 123462306a36Sopenharmony_ci */ 123562306a36Sopenharmony_ci if (list_empty(&inode->i_sb_list)) 123662306a36Sopenharmony_ci inode_sb_list_add(inode); 123762306a36Sopenharmony_ciunlock: 123862306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci return inode; 124162306a36Sopenharmony_ci} 124262306a36Sopenharmony_ciEXPORT_SYMBOL(inode_insert5); 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci/** 124562306a36Sopenharmony_ci * iget5_locked - obtain an inode from a mounted file system 124662306a36Sopenharmony_ci * @sb: super block of file system 124762306a36Sopenharmony_ci * @hashval: hash value (usually inode number) to get 124862306a36Sopenharmony_ci * @test: callback used for comparisons between inodes 124962306a36Sopenharmony_ci * @set: callback used to initialize a new struct inode 125062306a36Sopenharmony_ci * @data: opaque data pointer to pass to @test and @set 125162306a36Sopenharmony_ci * 125262306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache, 125362306a36Sopenharmony_ci * and if present it is return it with an increased reference count. This is 125462306a36Sopenharmony_ci * a generalized version of iget_locked() for file systems where the inode 125562306a36Sopenharmony_ci * number is not sufficient for unique identification of an inode. 125662306a36Sopenharmony_ci * 125762306a36Sopenharmony_ci * If the inode is not in cache, allocate a new inode and return it locked, 125862306a36Sopenharmony_ci * hashed, and with the I_NEW flag set. The file system gets to fill it in 125962306a36Sopenharmony_ci * before unlocking it via unlock_new_inode(). 126062306a36Sopenharmony_ci * 126162306a36Sopenharmony_ci * Note both @test and @set are called with the inode_hash_lock held, so can't 126262306a36Sopenharmony_ci * sleep. 126362306a36Sopenharmony_ci */ 126462306a36Sopenharmony_cistruct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 126562306a36Sopenharmony_ci int (*test)(struct inode *, void *), 126662306a36Sopenharmony_ci int (*set)(struct inode *, void *), void *data) 126762306a36Sopenharmony_ci{ 126862306a36Sopenharmony_ci struct inode *inode = ilookup5(sb, hashval, test, data); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci if (!inode) { 127162306a36Sopenharmony_ci struct inode *new = alloc_inode(sb); 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci if (new) { 127462306a36Sopenharmony_ci new->i_state = 0; 127562306a36Sopenharmony_ci inode = inode_insert5(new, hashval, test, set, data); 127662306a36Sopenharmony_ci if (unlikely(inode != new)) 127762306a36Sopenharmony_ci destroy_inode(new); 127862306a36Sopenharmony_ci } 127962306a36Sopenharmony_ci } 128062306a36Sopenharmony_ci return inode; 128162306a36Sopenharmony_ci} 128262306a36Sopenharmony_ciEXPORT_SYMBOL(iget5_locked); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci/** 128562306a36Sopenharmony_ci * iget_locked - obtain an inode from a mounted file system 128662306a36Sopenharmony_ci * @sb: super block of file system 128762306a36Sopenharmony_ci * @ino: inode number to get 128862306a36Sopenharmony_ci * 128962306a36Sopenharmony_ci * Search for the inode specified by @ino in the inode cache and if present 129062306a36Sopenharmony_ci * return it with an increased reference count. This is for file systems 129162306a36Sopenharmony_ci * where the inode number is sufficient for unique identification of an inode. 129262306a36Sopenharmony_ci * 129362306a36Sopenharmony_ci * If the inode is not in cache, allocate a new inode and return it locked, 129462306a36Sopenharmony_ci * hashed, and with the I_NEW flag set. The file system gets to fill it in 129562306a36Sopenharmony_ci * before unlocking it via unlock_new_inode(). 129662306a36Sopenharmony_ci */ 129762306a36Sopenharmony_cistruct inode *iget_locked(struct super_block *sb, unsigned long ino) 129862306a36Sopenharmony_ci{ 129962306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, ino); 130062306a36Sopenharmony_ci struct inode *inode; 130162306a36Sopenharmony_ciagain: 130262306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 130362306a36Sopenharmony_ci inode = find_inode_fast(sb, head, ino); 130462306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 130562306a36Sopenharmony_ci if (inode) { 130662306a36Sopenharmony_ci if (IS_ERR(inode)) 130762306a36Sopenharmony_ci return NULL; 130862306a36Sopenharmony_ci wait_on_inode(inode); 130962306a36Sopenharmony_ci if (unlikely(inode_unhashed(inode))) { 131062306a36Sopenharmony_ci iput(inode); 131162306a36Sopenharmony_ci goto again; 131262306a36Sopenharmony_ci } 131362306a36Sopenharmony_ci return inode; 131462306a36Sopenharmony_ci } 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci inode = alloc_inode(sb); 131762306a36Sopenharmony_ci if (inode) { 131862306a36Sopenharmony_ci struct inode *old; 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 132162306a36Sopenharmony_ci /* We released the lock, so.. */ 132262306a36Sopenharmony_ci old = find_inode_fast(sb, head, ino); 132362306a36Sopenharmony_ci if (!old) { 132462306a36Sopenharmony_ci inode->i_ino = ino; 132562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 132662306a36Sopenharmony_ci inode->i_state = I_NEW; 132762306a36Sopenharmony_ci hlist_add_head_rcu(&inode->i_hash, head); 132862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 132962306a36Sopenharmony_ci inode_sb_list_add(inode); 133062306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci /* Return the locked inode with I_NEW set, the 133362306a36Sopenharmony_ci * caller is responsible for filling in the contents 133462306a36Sopenharmony_ci */ 133562306a36Sopenharmony_ci return inode; 133662306a36Sopenharmony_ci } 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci /* 133962306a36Sopenharmony_ci * Uhhuh, somebody else created the same inode under 134062306a36Sopenharmony_ci * us. Use the old inode instead of the one we just 134162306a36Sopenharmony_ci * allocated. 134262306a36Sopenharmony_ci */ 134362306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 134462306a36Sopenharmony_ci destroy_inode(inode); 134562306a36Sopenharmony_ci if (IS_ERR(old)) 134662306a36Sopenharmony_ci return NULL; 134762306a36Sopenharmony_ci inode = old; 134862306a36Sopenharmony_ci wait_on_inode(inode); 134962306a36Sopenharmony_ci if (unlikely(inode_unhashed(inode))) { 135062306a36Sopenharmony_ci iput(inode); 135162306a36Sopenharmony_ci goto again; 135262306a36Sopenharmony_ci } 135362306a36Sopenharmony_ci } 135462306a36Sopenharmony_ci return inode; 135562306a36Sopenharmony_ci} 135662306a36Sopenharmony_ciEXPORT_SYMBOL(iget_locked); 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci/* 135962306a36Sopenharmony_ci * search the inode cache for a matching inode number. 136062306a36Sopenharmony_ci * If we find one, then the inode number we are trying to 136162306a36Sopenharmony_ci * allocate is not unique and so we should not use it. 136262306a36Sopenharmony_ci * 136362306a36Sopenharmony_ci * Returns 1 if the inode number is unique, 0 if it is not. 136462306a36Sopenharmony_ci */ 136562306a36Sopenharmony_cistatic int test_inode_iunique(struct super_block *sb, unsigned long ino) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci struct hlist_head *b = inode_hashtable + hash(sb, ino); 136862306a36Sopenharmony_ci struct inode *inode; 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci hlist_for_each_entry_rcu(inode, b, i_hash) { 137162306a36Sopenharmony_ci if (inode->i_ino == ino && inode->i_sb == sb) 137262306a36Sopenharmony_ci return 0; 137362306a36Sopenharmony_ci } 137462306a36Sopenharmony_ci return 1; 137562306a36Sopenharmony_ci} 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ci/** 137862306a36Sopenharmony_ci * iunique - get a unique inode number 137962306a36Sopenharmony_ci * @sb: superblock 138062306a36Sopenharmony_ci * @max_reserved: highest reserved inode number 138162306a36Sopenharmony_ci * 138262306a36Sopenharmony_ci * Obtain an inode number that is unique on the system for a given 138362306a36Sopenharmony_ci * superblock. This is used by file systems that have no natural 138462306a36Sopenharmony_ci * permanent inode numbering system. An inode number is returned that 138562306a36Sopenharmony_ci * is higher than the reserved limit but unique. 138662306a36Sopenharmony_ci * 138762306a36Sopenharmony_ci * BUGS: 138862306a36Sopenharmony_ci * With a large number of inodes live on the file system this function 138962306a36Sopenharmony_ci * currently becomes quite slow. 139062306a36Sopenharmony_ci */ 139162306a36Sopenharmony_ciino_t iunique(struct super_block *sb, ino_t max_reserved) 139262306a36Sopenharmony_ci{ 139362306a36Sopenharmony_ci /* 139462306a36Sopenharmony_ci * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 139562306a36Sopenharmony_ci * error if st_ino won't fit in target struct field. Use 32bit counter 139662306a36Sopenharmony_ci * here to attempt to avoid that. 139762306a36Sopenharmony_ci */ 139862306a36Sopenharmony_ci static DEFINE_SPINLOCK(iunique_lock); 139962306a36Sopenharmony_ci static unsigned int counter; 140062306a36Sopenharmony_ci ino_t res; 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci rcu_read_lock(); 140362306a36Sopenharmony_ci spin_lock(&iunique_lock); 140462306a36Sopenharmony_ci do { 140562306a36Sopenharmony_ci if (counter <= max_reserved) 140662306a36Sopenharmony_ci counter = max_reserved + 1; 140762306a36Sopenharmony_ci res = counter++; 140862306a36Sopenharmony_ci } while (!test_inode_iunique(sb, res)); 140962306a36Sopenharmony_ci spin_unlock(&iunique_lock); 141062306a36Sopenharmony_ci rcu_read_unlock(); 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci return res; 141362306a36Sopenharmony_ci} 141462306a36Sopenharmony_ciEXPORT_SYMBOL(iunique); 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_cistruct inode *igrab(struct inode *inode) 141762306a36Sopenharmony_ci{ 141862306a36Sopenharmony_ci spin_lock(&inode->i_lock); 141962306a36Sopenharmony_ci if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { 142062306a36Sopenharmony_ci __iget(inode); 142162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 142262306a36Sopenharmony_ci } else { 142362306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 142462306a36Sopenharmony_ci /* 142562306a36Sopenharmony_ci * Handle the case where s_op->clear_inode is not been 142662306a36Sopenharmony_ci * called yet, and somebody is calling igrab 142762306a36Sopenharmony_ci * while the inode is getting freed. 142862306a36Sopenharmony_ci */ 142962306a36Sopenharmony_ci inode = NULL; 143062306a36Sopenharmony_ci } 143162306a36Sopenharmony_ci return inode; 143262306a36Sopenharmony_ci} 143362306a36Sopenharmony_ciEXPORT_SYMBOL(igrab); 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_ci/** 143662306a36Sopenharmony_ci * ilookup5_nowait - search for an inode in the inode cache 143762306a36Sopenharmony_ci * @sb: super block of file system to search 143862306a36Sopenharmony_ci * @hashval: hash value (usually inode number) to search for 143962306a36Sopenharmony_ci * @test: callback used for comparisons between inodes 144062306a36Sopenharmony_ci * @data: opaque data pointer to pass to @test 144162306a36Sopenharmony_ci * 144262306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache. 144362306a36Sopenharmony_ci * If the inode is in the cache, the inode is returned with an incremented 144462306a36Sopenharmony_ci * reference count. 144562306a36Sopenharmony_ci * 144662306a36Sopenharmony_ci * Note: I_NEW is not waited upon so you have to be very careful what you do 144762306a36Sopenharmony_ci * with the returned inode. You probably should be using ilookup5() instead. 144862306a36Sopenharmony_ci * 144962306a36Sopenharmony_ci * Note2: @test is called with the inode_hash_lock held, so can't sleep. 145062306a36Sopenharmony_ci */ 145162306a36Sopenharmony_cistruct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 145262306a36Sopenharmony_ci int (*test)(struct inode *, void *), void *data) 145362306a36Sopenharmony_ci{ 145462306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, hashval); 145562306a36Sopenharmony_ci struct inode *inode; 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 145862306a36Sopenharmony_ci inode = find_inode(sb, head, test, data); 145962306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci return IS_ERR(inode) ? NULL : inode; 146262306a36Sopenharmony_ci} 146362306a36Sopenharmony_ciEXPORT_SYMBOL(ilookup5_nowait); 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci/** 146662306a36Sopenharmony_ci * ilookup5 - search for an inode in the inode cache 146762306a36Sopenharmony_ci * @sb: super block of file system to search 146862306a36Sopenharmony_ci * @hashval: hash value (usually inode number) to search for 146962306a36Sopenharmony_ci * @test: callback used for comparisons between inodes 147062306a36Sopenharmony_ci * @data: opaque data pointer to pass to @test 147162306a36Sopenharmony_ci * 147262306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache, 147362306a36Sopenharmony_ci * and if the inode is in the cache, return the inode with an incremented 147462306a36Sopenharmony_ci * reference count. Waits on I_NEW before returning the inode. 147562306a36Sopenharmony_ci * returned with an incremented reference count. 147662306a36Sopenharmony_ci * 147762306a36Sopenharmony_ci * This is a generalized version of ilookup() for file systems where the 147862306a36Sopenharmony_ci * inode number is not sufficient for unique identification of an inode. 147962306a36Sopenharmony_ci * 148062306a36Sopenharmony_ci * Note: @test is called with the inode_hash_lock held, so can't sleep. 148162306a36Sopenharmony_ci */ 148262306a36Sopenharmony_cistruct inode *ilookup5(struct super_block *sb, unsigned long hashval, 148362306a36Sopenharmony_ci int (*test)(struct inode *, void *), void *data) 148462306a36Sopenharmony_ci{ 148562306a36Sopenharmony_ci struct inode *inode; 148662306a36Sopenharmony_ciagain: 148762306a36Sopenharmony_ci inode = ilookup5_nowait(sb, hashval, test, data); 148862306a36Sopenharmony_ci if (inode) { 148962306a36Sopenharmony_ci wait_on_inode(inode); 149062306a36Sopenharmony_ci if (unlikely(inode_unhashed(inode))) { 149162306a36Sopenharmony_ci iput(inode); 149262306a36Sopenharmony_ci goto again; 149362306a36Sopenharmony_ci } 149462306a36Sopenharmony_ci } 149562306a36Sopenharmony_ci return inode; 149662306a36Sopenharmony_ci} 149762306a36Sopenharmony_ciEXPORT_SYMBOL(ilookup5); 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci/** 150062306a36Sopenharmony_ci * ilookup - search for an inode in the inode cache 150162306a36Sopenharmony_ci * @sb: super block of file system to search 150262306a36Sopenharmony_ci * @ino: inode number to search for 150362306a36Sopenharmony_ci * 150462306a36Sopenharmony_ci * Search for the inode @ino in the inode cache, and if the inode is in the 150562306a36Sopenharmony_ci * cache, the inode is returned with an incremented reference count. 150662306a36Sopenharmony_ci */ 150762306a36Sopenharmony_cistruct inode *ilookup(struct super_block *sb, unsigned long ino) 150862306a36Sopenharmony_ci{ 150962306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, ino); 151062306a36Sopenharmony_ci struct inode *inode; 151162306a36Sopenharmony_ciagain: 151262306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 151362306a36Sopenharmony_ci inode = find_inode_fast(sb, head, ino); 151462306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci if (inode) { 151762306a36Sopenharmony_ci if (IS_ERR(inode)) 151862306a36Sopenharmony_ci return NULL; 151962306a36Sopenharmony_ci wait_on_inode(inode); 152062306a36Sopenharmony_ci if (unlikely(inode_unhashed(inode))) { 152162306a36Sopenharmony_ci iput(inode); 152262306a36Sopenharmony_ci goto again; 152362306a36Sopenharmony_ci } 152462306a36Sopenharmony_ci } 152562306a36Sopenharmony_ci return inode; 152662306a36Sopenharmony_ci} 152762306a36Sopenharmony_ciEXPORT_SYMBOL(ilookup); 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci/** 153062306a36Sopenharmony_ci * find_inode_nowait - find an inode in the inode cache 153162306a36Sopenharmony_ci * @sb: super block of file system to search 153262306a36Sopenharmony_ci * @hashval: hash value (usually inode number) to search for 153362306a36Sopenharmony_ci * @match: callback used for comparisons between inodes 153462306a36Sopenharmony_ci * @data: opaque data pointer to pass to @match 153562306a36Sopenharmony_ci * 153662306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode 153762306a36Sopenharmony_ci * cache, where the helper function @match will return 0 if the inode 153862306a36Sopenharmony_ci * does not match, 1 if the inode does match, and -1 if the search 153962306a36Sopenharmony_ci * should be stopped. The @match function must be responsible for 154062306a36Sopenharmony_ci * taking the i_lock spin_lock and checking i_state for an inode being 154162306a36Sopenharmony_ci * freed or being initialized, and incrementing the reference count 154262306a36Sopenharmony_ci * before returning 1. It also must not sleep, since it is called with 154362306a36Sopenharmony_ci * the inode_hash_lock spinlock held. 154462306a36Sopenharmony_ci * 154562306a36Sopenharmony_ci * This is a even more generalized version of ilookup5() when the 154662306a36Sopenharmony_ci * function must never block --- find_inode() can block in 154762306a36Sopenharmony_ci * __wait_on_freeing_inode() --- or when the caller can not increment 154862306a36Sopenharmony_ci * the reference count because the resulting iput() might cause an 154962306a36Sopenharmony_ci * inode eviction. The tradeoff is that the @match funtion must be 155062306a36Sopenharmony_ci * very carefully implemented. 155162306a36Sopenharmony_ci */ 155262306a36Sopenharmony_cistruct inode *find_inode_nowait(struct super_block *sb, 155362306a36Sopenharmony_ci unsigned long hashval, 155462306a36Sopenharmony_ci int (*match)(struct inode *, unsigned long, 155562306a36Sopenharmony_ci void *), 155662306a36Sopenharmony_ci void *data) 155762306a36Sopenharmony_ci{ 155862306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, hashval); 155962306a36Sopenharmony_ci struct inode *inode, *ret_inode = NULL; 156062306a36Sopenharmony_ci int mval; 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 156362306a36Sopenharmony_ci hlist_for_each_entry(inode, head, i_hash) { 156462306a36Sopenharmony_ci if (inode->i_sb != sb) 156562306a36Sopenharmony_ci continue; 156662306a36Sopenharmony_ci mval = match(inode, hashval, data); 156762306a36Sopenharmony_ci if (mval == 0) 156862306a36Sopenharmony_ci continue; 156962306a36Sopenharmony_ci if (mval == 1) 157062306a36Sopenharmony_ci ret_inode = inode; 157162306a36Sopenharmony_ci goto out; 157262306a36Sopenharmony_ci } 157362306a36Sopenharmony_ciout: 157462306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 157562306a36Sopenharmony_ci return ret_inode; 157662306a36Sopenharmony_ci} 157762306a36Sopenharmony_ciEXPORT_SYMBOL(find_inode_nowait); 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_ci/** 158062306a36Sopenharmony_ci * find_inode_rcu - find an inode in the inode cache 158162306a36Sopenharmony_ci * @sb: Super block of file system to search 158262306a36Sopenharmony_ci * @hashval: Key to hash 158362306a36Sopenharmony_ci * @test: Function to test match on an inode 158462306a36Sopenharmony_ci * @data: Data for test function 158562306a36Sopenharmony_ci * 158662306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache, 158762306a36Sopenharmony_ci * where the helper function @test will return 0 if the inode does not match 158862306a36Sopenharmony_ci * and 1 if it does. The @test function must be responsible for taking the 158962306a36Sopenharmony_ci * i_lock spin_lock and checking i_state for an inode being freed or being 159062306a36Sopenharmony_ci * initialized. 159162306a36Sopenharmony_ci * 159262306a36Sopenharmony_ci * If successful, this will return the inode for which the @test function 159362306a36Sopenharmony_ci * returned 1 and NULL otherwise. 159462306a36Sopenharmony_ci * 159562306a36Sopenharmony_ci * The @test function is not permitted to take a ref on any inode presented. 159662306a36Sopenharmony_ci * It is also not permitted to sleep. 159762306a36Sopenharmony_ci * 159862306a36Sopenharmony_ci * The caller must hold the RCU read lock. 159962306a36Sopenharmony_ci */ 160062306a36Sopenharmony_cistruct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, 160162306a36Sopenharmony_ci int (*test)(struct inode *, void *), void *data) 160262306a36Sopenharmony_ci{ 160362306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, hashval); 160462306a36Sopenharmony_ci struct inode *inode; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci RCU_LOCKDEP_WARN(!rcu_read_lock_held(), 160762306a36Sopenharmony_ci "suspicious find_inode_rcu() usage"); 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci hlist_for_each_entry_rcu(inode, head, i_hash) { 161062306a36Sopenharmony_ci if (inode->i_sb == sb && 161162306a36Sopenharmony_ci !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) && 161262306a36Sopenharmony_ci test(inode, data)) 161362306a36Sopenharmony_ci return inode; 161462306a36Sopenharmony_ci } 161562306a36Sopenharmony_ci return NULL; 161662306a36Sopenharmony_ci} 161762306a36Sopenharmony_ciEXPORT_SYMBOL(find_inode_rcu); 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci/** 162062306a36Sopenharmony_ci * find_inode_by_ino_rcu - Find an inode in the inode cache 162162306a36Sopenharmony_ci * @sb: Super block of file system to search 162262306a36Sopenharmony_ci * @ino: The inode number to match 162362306a36Sopenharmony_ci * 162462306a36Sopenharmony_ci * Search for the inode specified by @hashval and @data in the inode cache, 162562306a36Sopenharmony_ci * where the helper function @test will return 0 if the inode does not match 162662306a36Sopenharmony_ci * and 1 if it does. The @test function must be responsible for taking the 162762306a36Sopenharmony_ci * i_lock spin_lock and checking i_state for an inode being freed or being 162862306a36Sopenharmony_ci * initialized. 162962306a36Sopenharmony_ci * 163062306a36Sopenharmony_ci * If successful, this will return the inode for which the @test function 163162306a36Sopenharmony_ci * returned 1 and NULL otherwise. 163262306a36Sopenharmony_ci * 163362306a36Sopenharmony_ci * The @test function is not permitted to take a ref on any inode presented. 163462306a36Sopenharmony_ci * It is also not permitted to sleep. 163562306a36Sopenharmony_ci * 163662306a36Sopenharmony_ci * The caller must hold the RCU read lock. 163762306a36Sopenharmony_ci */ 163862306a36Sopenharmony_cistruct inode *find_inode_by_ino_rcu(struct super_block *sb, 163962306a36Sopenharmony_ci unsigned long ino) 164062306a36Sopenharmony_ci{ 164162306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, ino); 164262306a36Sopenharmony_ci struct inode *inode; 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci RCU_LOCKDEP_WARN(!rcu_read_lock_held(), 164562306a36Sopenharmony_ci "suspicious find_inode_by_ino_rcu() usage"); 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_ci hlist_for_each_entry_rcu(inode, head, i_hash) { 164862306a36Sopenharmony_ci if (inode->i_ino == ino && 164962306a36Sopenharmony_ci inode->i_sb == sb && 165062306a36Sopenharmony_ci !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE))) 165162306a36Sopenharmony_ci return inode; 165262306a36Sopenharmony_ci } 165362306a36Sopenharmony_ci return NULL; 165462306a36Sopenharmony_ci} 165562306a36Sopenharmony_ciEXPORT_SYMBOL(find_inode_by_ino_rcu); 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ciint insert_inode_locked(struct inode *inode) 165862306a36Sopenharmony_ci{ 165962306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 166062306a36Sopenharmony_ci ino_t ino = inode->i_ino; 166162306a36Sopenharmony_ci struct hlist_head *head = inode_hashtable + hash(sb, ino); 166262306a36Sopenharmony_ci 166362306a36Sopenharmony_ci while (1) { 166462306a36Sopenharmony_ci struct inode *old = NULL; 166562306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 166662306a36Sopenharmony_ci hlist_for_each_entry(old, head, i_hash) { 166762306a36Sopenharmony_ci if (old->i_ino != ino) 166862306a36Sopenharmony_ci continue; 166962306a36Sopenharmony_ci if (old->i_sb != sb) 167062306a36Sopenharmony_ci continue; 167162306a36Sopenharmony_ci spin_lock(&old->i_lock); 167262306a36Sopenharmony_ci if (old->i_state & (I_FREEING|I_WILL_FREE)) { 167362306a36Sopenharmony_ci spin_unlock(&old->i_lock); 167462306a36Sopenharmony_ci continue; 167562306a36Sopenharmony_ci } 167662306a36Sopenharmony_ci break; 167762306a36Sopenharmony_ci } 167862306a36Sopenharmony_ci if (likely(!old)) { 167962306a36Sopenharmony_ci spin_lock(&inode->i_lock); 168062306a36Sopenharmony_ci inode->i_state |= I_NEW | I_CREATING; 168162306a36Sopenharmony_ci hlist_add_head_rcu(&inode->i_hash, head); 168262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 168362306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 168462306a36Sopenharmony_ci return 0; 168562306a36Sopenharmony_ci } 168662306a36Sopenharmony_ci if (unlikely(old->i_state & I_CREATING)) { 168762306a36Sopenharmony_ci spin_unlock(&old->i_lock); 168862306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 168962306a36Sopenharmony_ci return -EBUSY; 169062306a36Sopenharmony_ci } 169162306a36Sopenharmony_ci __iget(old); 169262306a36Sopenharmony_ci spin_unlock(&old->i_lock); 169362306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 169462306a36Sopenharmony_ci wait_on_inode(old); 169562306a36Sopenharmony_ci if (unlikely(!inode_unhashed(old))) { 169662306a36Sopenharmony_ci iput(old); 169762306a36Sopenharmony_ci return -EBUSY; 169862306a36Sopenharmony_ci } 169962306a36Sopenharmony_ci iput(old); 170062306a36Sopenharmony_ci } 170162306a36Sopenharmony_ci} 170262306a36Sopenharmony_ciEXPORT_SYMBOL(insert_inode_locked); 170362306a36Sopenharmony_ci 170462306a36Sopenharmony_ciint insert_inode_locked4(struct inode *inode, unsigned long hashval, 170562306a36Sopenharmony_ci int (*test)(struct inode *, void *), void *data) 170662306a36Sopenharmony_ci{ 170762306a36Sopenharmony_ci struct inode *old; 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci inode->i_state |= I_CREATING; 171062306a36Sopenharmony_ci old = inode_insert5(inode, hashval, test, NULL, data); 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci if (old != inode) { 171362306a36Sopenharmony_ci iput(old); 171462306a36Sopenharmony_ci return -EBUSY; 171562306a36Sopenharmony_ci } 171662306a36Sopenharmony_ci return 0; 171762306a36Sopenharmony_ci} 171862306a36Sopenharmony_ciEXPORT_SYMBOL(insert_inode_locked4); 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_ciint generic_delete_inode(struct inode *inode) 172262306a36Sopenharmony_ci{ 172362306a36Sopenharmony_ci return 1; 172462306a36Sopenharmony_ci} 172562306a36Sopenharmony_ciEXPORT_SYMBOL(generic_delete_inode); 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci/* 172862306a36Sopenharmony_ci * Called when we're dropping the last reference 172962306a36Sopenharmony_ci * to an inode. 173062306a36Sopenharmony_ci * 173162306a36Sopenharmony_ci * Call the FS "drop_inode()" function, defaulting to 173262306a36Sopenharmony_ci * the legacy UNIX filesystem behaviour. If it tells 173362306a36Sopenharmony_ci * us to evict inode, do so. Otherwise, retain inode 173462306a36Sopenharmony_ci * in cache if fs is alive, sync and evict if fs is 173562306a36Sopenharmony_ci * shutting down. 173662306a36Sopenharmony_ci */ 173762306a36Sopenharmony_cistatic void iput_final(struct inode *inode) 173862306a36Sopenharmony_ci{ 173962306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 174062306a36Sopenharmony_ci const struct super_operations *op = inode->i_sb->s_op; 174162306a36Sopenharmony_ci unsigned long state; 174262306a36Sopenharmony_ci int drop; 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci WARN_ON(inode->i_state & I_NEW); 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_ci if (op->drop_inode) 174762306a36Sopenharmony_ci drop = op->drop_inode(inode); 174862306a36Sopenharmony_ci else 174962306a36Sopenharmony_ci drop = generic_drop_inode(inode); 175062306a36Sopenharmony_ci 175162306a36Sopenharmony_ci if (!drop && 175262306a36Sopenharmony_ci !(inode->i_state & I_DONTCACHE) && 175362306a36Sopenharmony_ci (sb->s_flags & SB_ACTIVE)) { 175462306a36Sopenharmony_ci __inode_add_lru(inode, true); 175562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 175662306a36Sopenharmony_ci return; 175762306a36Sopenharmony_ci } 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci state = inode->i_state; 176062306a36Sopenharmony_ci if (!drop) { 176162306a36Sopenharmony_ci WRITE_ONCE(inode->i_state, state | I_WILL_FREE); 176262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci write_inode_now(inode, 1); 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci spin_lock(&inode->i_lock); 176762306a36Sopenharmony_ci state = inode->i_state; 176862306a36Sopenharmony_ci WARN_ON(state & I_NEW); 176962306a36Sopenharmony_ci state &= ~I_WILL_FREE; 177062306a36Sopenharmony_ci } 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci WRITE_ONCE(inode->i_state, state | I_FREEING); 177362306a36Sopenharmony_ci if (!list_empty(&inode->i_lru)) 177462306a36Sopenharmony_ci inode_lru_list_del(inode); 177562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci evict(inode); 177862306a36Sopenharmony_ci} 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci/** 178162306a36Sopenharmony_ci * iput - put an inode 178262306a36Sopenharmony_ci * @inode: inode to put 178362306a36Sopenharmony_ci * 178462306a36Sopenharmony_ci * Puts an inode, dropping its usage count. If the inode use count hits 178562306a36Sopenharmony_ci * zero, the inode is then freed and may also be destroyed. 178662306a36Sopenharmony_ci * 178762306a36Sopenharmony_ci * Consequently, iput() can sleep. 178862306a36Sopenharmony_ci */ 178962306a36Sopenharmony_civoid iput(struct inode *inode) 179062306a36Sopenharmony_ci{ 179162306a36Sopenharmony_ci if (!inode) 179262306a36Sopenharmony_ci return; 179362306a36Sopenharmony_ci BUG_ON(inode->i_state & I_CLEAR); 179462306a36Sopenharmony_ciretry: 179562306a36Sopenharmony_ci if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { 179662306a36Sopenharmony_ci if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { 179762306a36Sopenharmony_ci atomic_inc(&inode->i_count); 179862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 179962306a36Sopenharmony_ci trace_writeback_lazytime_iput(inode); 180062306a36Sopenharmony_ci mark_inode_dirty_sync(inode); 180162306a36Sopenharmony_ci goto retry; 180262306a36Sopenharmony_ci } 180362306a36Sopenharmony_ci iput_final(inode); 180462306a36Sopenharmony_ci } 180562306a36Sopenharmony_ci} 180662306a36Sopenharmony_ciEXPORT_SYMBOL(iput); 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci#ifdef CONFIG_BLOCK 180962306a36Sopenharmony_ci/** 181062306a36Sopenharmony_ci * bmap - find a block number in a file 181162306a36Sopenharmony_ci * @inode: inode owning the block number being requested 181262306a36Sopenharmony_ci * @block: pointer containing the block to find 181362306a36Sopenharmony_ci * 181462306a36Sopenharmony_ci * Replaces the value in ``*block`` with the block number on the device holding 181562306a36Sopenharmony_ci * corresponding to the requested block number in the file. 181662306a36Sopenharmony_ci * That is, asked for block 4 of inode 1 the function will replace the 181762306a36Sopenharmony_ci * 4 in ``*block``, with disk block relative to the disk start that holds that 181862306a36Sopenharmony_ci * block of the file. 181962306a36Sopenharmony_ci * 182062306a36Sopenharmony_ci * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a 182162306a36Sopenharmony_ci * hole, returns 0 and ``*block`` is also set to 0. 182262306a36Sopenharmony_ci */ 182362306a36Sopenharmony_ciint bmap(struct inode *inode, sector_t *block) 182462306a36Sopenharmony_ci{ 182562306a36Sopenharmony_ci if (!inode->i_mapping->a_ops->bmap) 182662306a36Sopenharmony_ci return -EINVAL; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block); 182962306a36Sopenharmony_ci return 0; 183062306a36Sopenharmony_ci} 183162306a36Sopenharmony_ciEXPORT_SYMBOL(bmap); 183262306a36Sopenharmony_ci#endif 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci/* 183562306a36Sopenharmony_ci * With relative atime, only update atime if the previous atime is 183662306a36Sopenharmony_ci * earlier than or equal to either the ctime or mtime, 183762306a36Sopenharmony_ci * or if at least a day has passed since the last atime update. 183862306a36Sopenharmony_ci */ 183962306a36Sopenharmony_cistatic int relatime_need_update(struct vfsmount *mnt, struct inode *inode, 184062306a36Sopenharmony_ci struct timespec64 now) 184162306a36Sopenharmony_ci{ 184262306a36Sopenharmony_ci struct timespec64 ctime; 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci if (!(mnt->mnt_flags & MNT_RELATIME)) 184562306a36Sopenharmony_ci return 1; 184662306a36Sopenharmony_ci /* 184762306a36Sopenharmony_ci * Is mtime younger than or equal to atime? If yes, update atime: 184862306a36Sopenharmony_ci */ 184962306a36Sopenharmony_ci if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0) 185062306a36Sopenharmony_ci return 1; 185162306a36Sopenharmony_ci /* 185262306a36Sopenharmony_ci * Is ctime younger than or equal to atime? If yes, update atime: 185362306a36Sopenharmony_ci */ 185462306a36Sopenharmony_ci ctime = inode_get_ctime(inode); 185562306a36Sopenharmony_ci if (timespec64_compare(&ctime, &inode->i_atime) >= 0) 185662306a36Sopenharmony_ci return 1; 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_ci /* 185962306a36Sopenharmony_ci * Is the previous atime value older than a day? If yes, 186062306a36Sopenharmony_ci * update atime: 186162306a36Sopenharmony_ci */ 186262306a36Sopenharmony_ci if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) 186362306a36Sopenharmony_ci return 1; 186462306a36Sopenharmony_ci /* 186562306a36Sopenharmony_ci * Good, we can skip the atime update: 186662306a36Sopenharmony_ci */ 186762306a36Sopenharmony_ci return 0; 186862306a36Sopenharmony_ci} 186962306a36Sopenharmony_ci 187062306a36Sopenharmony_ci/** 187162306a36Sopenharmony_ci * inode_update_timestamps - update the timestamps on the inode 187262306a36Sopenharmony_ci * @inode: inode to be updated 187362306a36Sopenharmony_ci * @flags: S_* flags that needed to be updated 187462306a36Sopenharmony_ci * 187562306a36Sopenharmony_ci * The update_time function is called when an inode's timestamps need to be 187662306a36Sopenharmony_ci * updated for a read or write operation. This function handles updating the 187762306a36Sopenharmony_ci * actual timestamps. It's up to the caller to ensure that the inode is marked 187862306a36Sopenharmony_ci * dirty appropriately. 187962306a36Sopenharmony_ci * 188062306a36Sopenharmony_ci * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated, 188162306a36Sopenharmony_ci * attempt to update all three of them. S_ATIME updates can be handled 188262306a36Sopenharmony_ci * independently of the rest. 188362306a36Sopenharmony_ci * 188462306a36Sopenharmony_ci * Returns a set of S_* flags indicating which values changed. 188562306a36Sopenharmony_ci */ 188662306a36Sopenharmony_ciint inode_update_timestamps(struct inode *inode, int flags) 188762306a36Sopenharmony_ci{ 188862306a36Sopenharmony_ci int updated = 0; 188962306a36Sopenharmony_ci struct timespec64 now; 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci if (flags & (S_MTIME|S_CTIME|S_VERSION)) { 189262306a36Sopenharmony_ci struct timespec64 ctime = inode_get_ctime(inode); 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci now = inode_set_ctime_current(inode); 189562306a36Sopenharmony_ci if (!timespec64_equal(&now, &ctime)) 189662306a36Sopenharmony_ci updated |= S_CTIME; 189762306a36Sopenharmony_ci if (!timespec64_equal(&now, &inode->i_mtime)) { 189862306a36Sopenharmony_ci inode->i_mtime = now; 189962306a36Sopenharmony_ci updated |= S_MTIME; 190062306a36Sopenharmony_ci } 190162306a36Sopenharmony_ci if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated)) 190262306a36Sopenharmony_ci updated |= S_VERSION; 190362306a36Sopenharmony_ci } else { 190462306a36Sopenharmony_ci now = current_time(inode); 190562306a36Sopenharmony_ci } 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci if (flags & S_ATIME) { 190862306a36Sopenharmony_ci if (!timespec64_equal(&now, &inode->i_atime)) { 190962306a36Sopenharmony_ci inode->i_atime = now; 191062306a36Sopenharmony_ci updated |= S_ATIME; 191162306a36Sopenharmony_ci } 191262306a36Sopenharmony_ci } 191362306a36Sopenharmony_ci return updated; 191462306a36Sopenharmony_ci} 191562306a36Sopenharmony_ciEXPORT_SYMBOL(inode_update_timestamps); 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ci/** 191862306a36Sopenharmony_ci * generic_update_time - update the timestamps on the inode 191962306a36Sopenharmony_ci * @inode: inode to be updated 192062306a36Sopenharmony_ci * @flags: S_* flags that needed to be updated 192162306a36Sopenharmony_ci * 192262306a36Sopenharmony_ci * The update_time function is called when an inode's timestamps need to be 192362306a36Sopenharmony_ci * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME, 192462306a36Sopenharmony_ci * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME 192562306a36Sopenharmony_ci * updates can be handled done independently of the rest. 192662306a36Sopenharmony_ci * 192762306a36Sopenharmony_ci * Returns a S_* mask indicating which fields were updated. 192862306a36Sopenharmony_ci */ 192962306a36Sopenharmony_ciint generic_update_time(struct inode *inode, int flags) 193062306a36Sopenharmony_ci{ 193162306a36Sopenharmony_ci int updated = inode_update_timestamps(inode, flags); 193262306a36Sopenharmony_ci int dirty_flags = 0; 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci if (updated & (S_ATIME|S_MTIME|S_CTIME)) 193562306a36Sopenharmony_ci dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC; 193662306a36Sopenharmony_ci if (updated & S_VERSION) 193762306a36Sopenharmony_ci dirty_flags |= I_DIRTY_SYNC; 193862306a36Sopenharmony_ci __mark_inode_dirty(inode, dirty_flags); 193962306a36Sopenharmony_ci return updated; 194062306a36Sopenharmony_ci} 194162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_update_time); 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ci/* 194462306a36Sopenharmony_ci * This does the actual work of updating an inodes time or version. Must have 194562306a36Sopenharmony_ci * had called mnt_want_write() before calling this. 194662306a36Sopenharmony_ci */ 194762306a36Sopenharmony_ciint inode_update_time(struct inode *inode, int flags) 194862306a36Sopenharmony_ci{ 194962306a36Sopenharmony_ci if (inode->i_op->update_time) 195062306a36Sopenharmony_ci return inode->i_op->update_time(inode, flags); 195162306a36Sopenharmony_ci generic_update_time(inode, flags); 195262306a36Sopenharmony_ci return 0; 195362306a36Sopenharmony_ci} 195462306a36Sopenharmony_ciEXPORT_SYMBOL(inode_update_time); 195562306a36Sopenharmony_ci 195662306a36Sopenharmony_ci/** 195762306a36Sopenharmony_ci * atime_needs_update - update the access time 195862306a36Sopenharmony_ci * @path: the &struct path to update 195962306a36Sopenharmony_ci * @inode: inode to update 196062306a36Sopenharmony_ci * 196162306a36Sopenharmony_ci * Update the accessed time on an inode and mark it for writeback. 196262306a36Sopenharmony_ci * This function automatically handles read only file systems and media, 196362306a36Sopenharmony_ci * as well as the "noatime" flag and inode specific "noatime" markers. 196462306a36Sopenharmony_ci */ 196562306a36Sopenharmony_cibool atime_needs_update(const struct path *path, struct inode *inode) 196662306a36Sopenharmony_ci{ 196762306a36Sopenharmony_ci struct vfsmount *mnt = path->mnt; 196862306a36Sopenharmony_ci struct timespec64 now; 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci if (inode->i_flags & S_NOATIME) 197162306a36Sopenharmony_ci return false; 197262306a36Sopenharmony_ci 197362306a36Sopenharmony_ci /* Atime updates will likely cause i_uid and i_gid to be written 197462306a36Sopenharmony_ci * back improprely if their true value is unknown to the vfs. 197562306a36Sopenharmony_ci */ 197662306a36Sopenharmony_ci if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode)) 197762306a36Sopenharmony_ci return false; 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_ci if (IS_NOATIME(inode)) 198062306a36Sopenharmony_ci return false; 198162306a36Sopenharmony_ci if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) 198262306a36Sopenharmony_ci return false; 198362306a36Sopenharmony_ci 198462306a36Sopenharmony_ci if (mnt->mnt_flags & MNT_NOATIME) 198562306a36Sopenharmony_ci return false; 198662306a36Sopenharmony_ci if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 198762306a36Sopenharmony_ci return false; 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci now = current_time(inode); 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci if (!relatime_need_update(mnt, inode, now)) 199262306a36Sopenharmony_ci return false; 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci if (timespec64_equal(&inode->i_atime, &now)) 199562306a36Sopenharmony_ci return false; 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci return true; 199862306a36Sopenharmony_ci} 199962306a36Sopenharmony_ci 200062306a36Sopenharmony_civoid touch_atime(const struct path *path) 200162306a36Sopenharmony_ci{ 200262306a36Sopenharmony_ci struct vfsmount *mnt = path->mnt; 200362306a36Sopenharmony_ci struct inode *inode = d_inode(path->dentry); 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci if (!atime_needs_update(path, inode)) 200662306a36Sopenharmony_ci return; 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci if (!sb_start_write_trylock(inode->i_sb)) 200962306a36Sopenharmony_ci return; 201062306a36Sopenharmony_ci 201162306a36Sopenharmony_ci if (__mnt_want_write(mnt) != 0) 201262306a36Sopenharmony_ci goto skip_update; 201362306a36Sopenharmony_ci /* 201462306a36Sopenharmony_ci * File systems can error out when updating inodes if they need to 201562306a36Sopenharmony_ci * allocate new space to modify an inode (such is the case for 201662306a36Sopenharmony_ci * Btrfs), but since we touch atime while walking down the path we 201762306a36Sopenharmony_ci * really don't care if we failed to update the atime of the file, 201862306a36Sopenharmony_ci * so just ignore the return value. 201962306a36Sopenharmony_ci * We may also fail on filesystems that have the ability to make parts 202062306a36Sopenharmony_ci * of the fs read only, e.g. subvolumes in Btrfs. 202162306a36Sopenharmony_ci */ 202262306a36Sopenharmony_ci inode_update_time(inode, S_ATIME); 202362306a36Sopenharmony_ci __mnt_drop_write(mnt); 202462306a36Sopenharmony_ciskip_update: 202562306a36Sopenharmony_ci sb_end_write(inode->i_sb); 202662306a36Sopenharmony_ci} 202762306a36Sopenharmony_ciEXPORT_SYMBOL(touch_atime); 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci/* 203062306a36Sopenharmony_ci * Return mask of changes for notify_change() that need to be done as a 203162306a36Sopenharmony_ci * response to write or truncate. Return 0 if nothing has to be changed. 203262306a36Sopenharmony_ci * Negative value on error (change should be denied). 203362306a36Sopenharmony_ci */ 203462306a36Sopenharmony_ciint dentry_needs_remove_privs(struct mnt_idmap *idmap, 203562306a36Sopenharmony_ci struct dentry *dentry) 203662306a36Sopenharmony_ci{ 203762306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 203862306a36Sopenharmony_ci int mask = 0; 203962306a36Sopenharmony_ci int ret; 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_ci if (IS_NOSEC(inode)) 204262306a36Sopenharmony_ci return 0; 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci mask = setattr_should_drop_suidgid(idmap, inode); 204562306a36Sopenharmony_ci ret = security_inode_need_killpriv(dentry); 204662306a36Sopenharmony_ci if (ret < 0) 204762306a36Sopenharmony_ci return ret; 204862306a36Sopenharmony_ci if (ret) 204962306a36Sopenharmony_ci mask |= ATTR_KILL_PRIV; 205062306a36Sopenharmony_ci return mask; 205162306a36Sopenharmony_ci} 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_cistatic int __remove_privs(struct mnt_idmap *idmap, 205462306a36Sopenharmony_ci struct dentry *dentry, int kill) 205562306a36Sopenharmony_ci{ 205662306a36Sopenharmony_ci struct iattr newattrs; 205762306a36Sopenharmony_ci 205862306a36Sopenharmony_ci newattrs.ia_valid = ATTR_FORCE | kill; 205962306a36Sopenharmony_ci /* 206062306a36Sopenharmony_ci * Note we call this on write, so notify_change will not 206162306a36Sopenharmony_ci * encounter any conflicting delegations: 206262306a36Sopenharmony_ci */ 206362306a36Sopenharmony_ci return notify_change(idmap, dentry, &newattrs, NULL); 206462306a36Sopenharmony_ci} 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_cistatic int __file_remove_privs(struct file *file, unsigned int flags) 206762306a36Sopenharmony_ci{ 206862306a36Sopenharmony_ci struct dentry *dentry = file_dentry(file); 206962306a36Sopenharmony_ci struct inode *inode = file_inode(file); 207062306a36Sopenharmony_ci int error = 0; 207162306a36Sopenharmony_ci int kill; 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode)) 207462306a36Sopenharmony_ci return 0; 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_ci kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry); 207762306a36Sopenharmony_ci if (kill < 0) 207862306a36Sopenharmony_ci return kill; 207962306a36Sopenharmony_ci 208062306a36Sopenharmony_ci if (kill) { 208162306a36Sopenharmony_ci if (flags & IOCB_NOWAIT) 208262306a36Sopenharmony_ci return -EAGAIN; 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci error = __remove_privs(file_mnt_idmap(file), dentry, kill); 208562306a36Sopenharmony_ci } 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_ci if (!error) 208862306a36Sopenharmony_ci inode_has_no_xattr(inode); 208962306a36Sopenharmony_ci return error; 209062306a36Sopenharmony_ci} 209162306a36Sopenharmony_ci 209262306a36Sopenharmony_ci/** 209362306a36Sopenharmony_ci * file_remove_privs - remove special file privileges (suid, capabilities) 209462306a36Sopenharmony_ci * @file: file to remove privileges from 209562306a36Sopenharmony_ci * 209662306a36Sopenharmony_ci * When file is modified by a write or truncation ensure that special 209762306a36Sopenharmony_ci * file privileges are removed. 209862306a36Sopenharmony_ci * 209962306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure. 210062306a36Sopenharmony_ci */ 210162306a36Sopenharmony_ciint file_remove_privs(struct file *file) 210262306a36Sopenharmony_ci{ 210362306a36Sopenharmony_ci return __file_remove_privs(file, 0); 210462306a36Sopenharmony_ci} 210562306a36Sopenharmony_ciEXPORT_SYMBOL(file_remove_privs); 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_cistatic int inode_needs_update_time(struct inode *inode) 210862306a36Sopenharmony_ci{ 210962306a36Sopenharmony_ci int sync_it = 0; 211062306a36Sopenharmony_ci struct timespec64 now = current_time(inode); 211162306a36Sopenharmony_ci struct timespec64 ctime; 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_ci /* First try to exhaust all avenues to not sync */ 211462306a36Sopenharmony_ci if (IS_NOCMTIME(inode)) 211562306a36Sopenharmony_ci return 0; 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci if (!timespec64_equal(&inode->i_mtime, &now)) 211862306a36Sopenharmony_ci sync_it = S_MTIME; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci ctime = inode_get_ctime(inode); 212162306a36Sopenharmony_ci if (!timespec64_equal(&ctime, &now)) 212262306a36Sopenharmony_ci sync_it |= S_CTIME; 212362306a36Sopenharmony_ci 212462306a36Sopenharmony_ci if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) 212562306a36Sopenharmony_ci sync_it |= S_VERSION; 212662306a36Sopenharmony_ci 212762306a36Sopenharmony_ci return sync_it; 212862306a36Sopenharmony_ci} 212962306a36Sopenharmony_ci 213062306a36Sopenharmony_cistatic int __file_update_time(struct file *file, int sync_mode) 213162306a36Sopenharmony_ci{ 213262306a36Sopenharmony_ci int ret = 0; 213362306a36Sopenharmony_ci struct inode *inode = file_inode(file); 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ci /* try to update time settings */ 213662306a36Sopenharmony_ci if (!__mnt_want_write_file(file)) { 213762306a36Sopenharmony_ci ret = inode_update_time(inode, sync_mode); 213862306a36Sopenharmony_ci __mnt_drop_write_file(file); 213962306a36Sopenharmony_ci } 214062306a36Sopenharmony_ci 214162306a36Sopenharmony_ci return ret; 214262306a36Sopenharmony_ci} 214362306a36Sopenharmony_ci 214462306a36Sopenharmony_ci/** 214562306a36Sopenharmony_ci * file_update_time - update mtime and ctime time 214662306a36Sopenharmony_ci * @file: file accessed 214762306a36Sopenharmony_ci * 214862306a36Sopenharmony_ci * Update the mtime and ctime members of an inode and mark the inode for 214962306a36Sopenharmony_ci * writeback. Note that this function is meant exclusively for usage in 215062306a36Sopenharmony_ci * the file write path of filesystems, and filesystems may choose to 215162306a36Sopenharmony_ci * explicitly ignore updates via this function with the _NOCMTIME inode 215262306a36Sopenharmony_ci * flag, e.g. for network filesystem where these imestamps are handled 215362306a36Sopenharmony_ci * by the server. This can return an error for file systems who need to 215462306a36Sopenharmony_ci * allocate space in order to update an inode. 215562306a36Sopenharmony_ci * 215662306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure. 215762306a36Sopenharmony_ci */ 215862306a36Sopenharmony_ciint file_update_time(struct file *file) 215962306a36Sopenharmony_ci{ 216062306a36Sopenharmony_ci int ret; 216162306a36Sopenharmony_ci struct inode *inode = file_inode(file); 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_ci ret = inode_needs_update_time(inode); 216462306a36Sopenharmony_ci if (ret <= 0) 216562306a36Sopenharmony_ci return ret; 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci return __file_update_time(file, ret); 216862306a36Sopenharmony_ci} 216962306a36Sopenharmony_ciEXPORT_SYMBOL(file_update_time); 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci/** 217262306a36Sopenharmony_ci * file_modified_flags - handle mandated vfs changes when modifying a file 217362306a36Sopenharmony_ci * @file: file that was modified 217462306a36Sopenharmony_ci * @flags: kiocb flags 217562306a36Sopenharmony_ci * 217662306a36Sopenharmony_ci * When file has been modified ensure that special 217762306a36Sopenharmony_ci * file privileges are removed and time settings are updated. 217862306a36Sopenharmony_ci * 217962306a36Sopenharmony_ci * If IOCB_NOWAIT is set, special file privileges will not be removed and 218062306a36Sopenharmony_ci * time settings will not be updated. It will return -EAGAIN. 218162306a36Sopenharmony_ci * 218262306a36Sopenharmony_ci * Context: Caller must hold the file's inode lock. 218362306a36Sopenharmony_ci * 218462306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure. 218562306a36Sopenharmony_ci */ 218662306a36Sopenharmony_cistatic int file_modified_flags(struct file *file, int flags) 218762306a36Sopenharmony_ci{ 218862306a36Sopenharmony_ci int ret; 218962306a36Sopenharmony_ci struct inode *inode = file_inode(file); 219062306a36Sopenharmony_ci 219162306a36Sopenharmony_ci /* 219262306a36Sopenharmony_ci * Clear the security bits if the process is not being run by root. 219362306a36Sopenharmony_ci * This keeps people from modifying setuid and setgid binaries. 219462306a36Sopenharmony_ci */ 219562306a36Sopenharmony_ci ret = __file_remove_privs(file, flags); 219662306a36Sopenharmony_ci if (ret) 219762306a36Sopenharmony_ci return ret; 219862306a36Sopenharmony_ci 219962306a36Sopenharmony_ci if (unlikely(file->f_mode & FMODE_NOCMTIME)) 220062306a36Sopenharmony_ci return 0; 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci ret = inode_needs_update_time(inode); 220362306a36Sopenharmony_ci if (ret <= 0) 220462306a36Sopenharmony_ci return ret; 220562306a36Sopenharmony_ci if (flags & IOCB_NOWAIT) 220662306a36Sopenharmony_ci return -EAGAIN; 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci return __file_update_time(file, ret); 220962306a36Sopenharmony_ci} 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci/** 221262306a36Sopenharmony_ci * file_modified - handle mandated vfs changes when modifying a file 221362306a36Sopenharmony_ci * @file: file that was modified 221462306a36Sopenharmony_ci * 221562306a36Sopenharmony_ci * When file has been modified ensure that special 221662306a36Sopenharmony_ci * file privileges are removed and time settings are updated. 221762306a36Sopenharmony_ci * 221862306a36Sopenharmony_ci * Context: Caller must hold the file's inode lock. 221962306a36Sopenharmony_ci * 222062306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure. 222162306a36Sopenharmony_ci */ 222262306a36Sopenharmony_ciint file_modified(struct file *file) 222362306a36Sopenharmony_ci{ 222462306a36Sopenharmony_ci return file_modified_flags(file, 0); 222562306a36Sopenharmony_ci} 222662306a36Sopenharmony_ciEXPORT_SYMBOL(file_modified); 222762306a36Sopenharmony_ci 222862306a36Sopenharmony_ci/** 222962306a36Sopenharmony_ci * kiocb_modified - handle mandated vfs changes when modifying a file 223062306a36Sopenharmony_ci * @iocb: iocb that was modified 223162306a36Sopenharmony_ci * 223262306a36Sopenharmony_ci * When file has been modified ensure that special 223362306a36Sopenharmony_ci * file privileges are removed and time settings are updated. 223462306a36Sopenharmony_ci * 223562306a36Sopenharmony_ci * Context: Caller must hold the file's inode lock. 223662306a36Sopenharmony_ci * 223762306a36Sopenharmony_ci * Return: 0 on success, negative errno on failure. 223862306a36Sopenharmony_ci */ 223962306a36Sopenharmony_ciint kiocb_modified(struct kiocb *iocb) 224062306a36Sopenharmony_ci{ 224162306a36Sopenharmony_ci return file_modified_flags(iocb->ki_filp, iocb->ki_flags); 224262306a36Sopenharmony_ci} 224362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kiocb_modified); 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ciint inode_needs_sync(struct inode *inode) 224662306a36Sopenharmony_ci{ 224762306a36Sopenharmony_ci if (IS_SYNC(inode)) 224862306a36Sopenharmony_ci return 1; 224962306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 225062306a36Sopenharmony_ci return 1; 225162306a36Sopenharmony_ci return 0; 225262306a36Sopenharmony_ci} 225362306a36Sopenharmony_ciEXPORT_SYMBOL(inode_needs_sync); 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci/* 225662306a36Sopenharmony_ci * If we try to find an inode in the inode hash while it is being 225762306a36Sopenharmony_ci * deleted, we have to wait until the filesystem completes its 225862306a36Sopenharmony_ci * deletion before reporting that it isn't found. This function waits 225962306a36Sopenharmony_ci * until the deletion _might_ have completed. Callers are responsible 226062306a36Sopenharmony_ci * to recheck inode state. 226162306a36Sopenharmony_ci * 226262306a36Sopenharmony_ci * It doesn't matter if I_NEW is not set initially, a call to 226362306a36Sopenharmony_ci * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list 226462306a36Sopenharmony_ci * will DTRT. 226562306a36Sopenharmony_ci */ 226662306a36Sopenharmony_cistatic void __wait_on_freeing_inode(struct inode *inode) 226762306a36Sopenharmony_ci{ 226862306a36Sopenharmony_ci wait_queue_head_t *wq; 226962306a36Sopenharmony_ci DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 227062306a36Sopenharmony_ci wq = bit_waitqueue(&inode->i_state, __I_NEW); 227162306a36Sopenharmony_ci prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 227262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 227362306a36Sopenharmony_ci spin_unlock(&inode_hash_lock); 227462306a36Sopenharmony_ci schedule(); 227562306a36Sopenharmony_ci finish_wait(wq, &wait.wq_entry); 227662306a36Sopenharmony_ci spin_lock(&inode_hash_lock); 227762306a36Sopenharmony_ci} 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_cistatic __initdata unsigned long ihash_entries; 228062306a36Sopenharmony_cistatic int __init set_ihash_entries(char *str) 228162306a36Sopenharmony_ci{ 228262306a36Sopenharmony_ci if (!str) 228362306a36Sopenharmony_ci return 0; 228462306a36Sopenharmony_ci ihash_entries = simple_strtoul(str, &str, 0); 228562306a36Sopenharmony_ci return 1; 228662306a36Sopenharmony_ci} 228762306a36Sopenharmony_ci__setup("ihash_entries=", set_ihash_entries); 228862306a36Sopenharmony_ci 228962306a36Sopenharmony_ci/* 229062306a36Sopenharmony_ci * Initialize the waitqueues and inode hash table. 229162306a36Sopenharmony_ci */ 229262306a36Sopenharmony_civoid __init inode_init_early(void) 229362306a36Sopenharmony_ci{ 229462306a36Sopenharmony_ci /* If hashes are distributed across NUMA nodes, defer 229562306a36Sopenharmony_ci * hash allocation until vmalloc space is available. 229662306a36Sopenharmony_ci */ 229762306a36Sopenharmony_ci if (hashdist) 229862306a36Sopenharmony_ci return; 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci inode_hashtable = 230162306a36Sopenharmony_ci alloc_large_system_hash("Inode-cache", 230262306a36Sopenharmony_ci sizeof(struct hlist_head), 230362306a36Sopenharmony_ci ihash_entries, 230462306a36Sopenharmony_ci 14, 230562306a36Sopenharmony_ci HASH_EARLY | HASH_ZERO, 230662306a36Sopenharmony_ci &i_hash_shift, 230762306a36Sopenharmony_ci &i_hash_mask, 230862306a36Sopenharmony_ci 0, 230962306a36Sopenharmony_ci 0); 231062306a36Sopenharmony_ci} 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_civoid __init inode_init(void) 231362306a36Sopenharmony_ci{ 231462306a36Sopenharmony_ci /* inode slab cache */ 231562306a36Sopenharmony_ci inode_cachep = kmem_cache_create("inode_cache", 231662306a36Sopenharmony_ci sizeof(struct inode), 231762306a36Sopenharmony_ci 0, 231862306a36Sopenharmony_ci (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 231962306a36Sopenharmony_ci SLAB_MEM_SPREAD|SLAB_ACCOUNT), 232062306a36Sopenharmony_ci init_once); 232162306a36Sopenharmony_ci 232262306a36Sopenharmony_ci /* Hash may have been set up in inode_init_early */ 232362306a36Sopenharmony_ci if (!hashdist) 232462306a36Sopenharmony_ci return; 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci inode_hashtable = 232762306a36Sopenharmony_ci alloc_large_system_hash("Inode-cache", 232862306a36Sopenharmony_ci sizeof(struct hlist_head), 232962306a36Sopenharmony_ci ihash_entries, 233062306a36Sopenharmony_ci 14, 233162306a36Sopenharmony_ci HASH_ZERO, 233262306a36Sopenharmony_ci &i_hash_shift, 233362306a36Sopenharmony_ci &i_hash_mask, 233462306a36Sopenharmony_ci 0, 233562306a36Sopenharmony_ci 0); 233662306a36Sopenharmony_ci} 233762306a36Sopenharmony_ci 233862306a36Sopenharmony_civoid init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 233962306a36Sopenharmony_ci{ 234062306a36Sopenharmony_ci inode->i_mode = mode; 234162306a36Sopenharmony_ci if (S_ISCHR(mode)) { 234262306a36Sopenharmony_ci inode->i_fop = &def_chr_fops; 234362306a36Sopenharmony_ci inode->i_rdev = rdev; 234462306a36Sopenharmony_ci } else if (S_ISBLK(mode)) { 234562306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_BLOCK)) 234662306a36Sopenharmony_ci inode->i_fop = &def_blk_fops; 234762306a36Sopenharmony_ci inode->i_rdev = rdev; 234862306a36Sopenharmony_ci } else if (S_ISFIFO(mode)) 234962306a36Sopenharmony_ci inode->i_fop = &pipefifo_fops; 235062306a36Sopenharmony_ci else if (S_ISSOCK(mode)) 235162306a36Sopenharmony_ci ; /* leave it no_open_fops */ 235262306a36Sopenharmony_ci else 235362306a36Sopenharmony_ci printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" 235462306a36Sopenharmony_ci " inode %s:%lu\n", mode, inode->i_sb->s_id, 235562306a36Sopenharmony_ci inode->i_ino); 235662306a36Sopenharmony_ci} 235762306a36Sopenharmony_ciEXPORT_SYMBOL(init_special_inode); 235862306a36Sopenharmony_ci 235962306a36Sopenharmony_ci/** 236062306a36Sopenharmony_ci * inode_init_owner - Init uid,gid,mode for new inode according to posix standards 236162306a36Sopenharmony_ci * @idmap: idmap of the mount the inode was created from 236262306a36Sopenharmony_ci * @inode: New inode 236362306a36Sopenharmony_ci * @dir: Directory inode 236462306a36Sopenharmony_ci * @mode: mode of the new inode 236562306a36Sopenharmony_ci * 236662306a36Sopenharmony_ci * If the inode has been created through an idmapped mount the idmap of 236762306a36Sopenharmony_ci * the vfsmount must be passed through @idmap. This function will then take 236862306a36Sopenharmony_ci * care to map the inode according to @idmap before checking permissions 236962306a36Sopenharmony_ci * and initializing i_uid and i_gid. On non-idmapped mounts or if permission 237062306a36Sopenharmony_ci * checking is to be performed on the raw inode simply pass @nop_mnt_idmap. 237162306a36Sopenharmony_ci */ 237262306a36Sopenharmony_civoid inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, 237362306a36Sopenharmony_ci const struct inode *dir, umode_t mode) 237462306a36Sopenharmony_ci{ 237562306a36Sopenharmony_ci inode_fsuid_set(inode, idmap); 237662306a36Sopenharmony_ci if (dir && dir->i_mode & S_ISGID) { 237762306a36Sopenharmony_ci inode->i_gid = dir->i_gid; 237862306a36Sopenharmony_ci 237962306a36Sopenharmony_ci /* Directories are special, and always inherit S_ISGID */ 238062306a36Sopenharmony_ci if (S_ISDIR(mode)) 238162306a36Sopenharmony_ci mode |= S_ISGID; 238262306a36Sopenharmony_ci } else 238362306a36Sopenharmony_ci inode_fsgid_set(inode, idmap); 238462306a36Sopenharmony_ci inode->i_mode = mode; 238562306a36Sopenharmony_ci} 238662306a36Sopenharmony_ciEXPORT_SYMBOL(inode_init_owner); 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci/** 238962306a36Sopenharmony_ci * inode_owner_or_capable - check current task permissions to inode 239062306a36Sopenharmony_ci * @idmap: idmap of the mount the inode was found from 239162306a36Sopenharmony_ci * @inode: inode being checked 239262306a36Sopenharmony_ci * 239362306a36Sopenharmony_ci * Return true if current either has CAP_FOWNER in a namespace with the 239462306a36Sopenharmony_ci * inode owner uid mapped, or owns the file. 239562306a36Sopenharmony_ci * 239662306a36Sopenharmony_ci * If the inode has been found through an idmapped mount the idmap of 239762306a36Sopenharmony_ci * the vfsmount must be passed through @idmap. This function will then take 239862306a36Sopenharmony_ci * care to map the inode according to @idmap before checking permissions. 239962306a36Sopenharmony_ci * On non-idmapped mounts or if permission checking is to be performed on the 240062306a36Sopenharmony_ci * raw inode simply passs @nop_mnt_idmap. 240162306a36Sopenharmony_ci */ 240262306a36Sopenharmony_cibool inode_owner_or_capable(struct mnt_idmap *idmap, 240362306a36Sopenharmony_ci const struct inode *inode) 240462306a36Sopenharmony_ci{ 240562306a36Sopenharmony_ci vfsuid_t vfsuid; 240662306a36Sopenharmony_ci struct user_namespace *ns; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci vfsuid = i_uid_into_vfsuid(idmap, inode); 240962306a36Sopenharmony_ci if (vfsuid_eq_kuid(vfsuid, current_fsuid())) 241062306a36Sopenharmony_ci return true; 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_ci ns = current_user_ns(); 241362306a36Sopenharmony_ci if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER)) 241462306a36Sopenharmony_ci return true; 241562306a36Sopenharmony_ci return false; 241662306a36Sopenharmony_ci} 241762306a36Sopenharmony_ciEXPORT_SYMBOL(inode_owner_or_capable); 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci/* 242062306a36Sopenharmony_ci * Direct i/o helper functions 242162306a36Sopenharmony_ci */ 242262306a36Sopenharmony_cistatic void __inode_dio_wait(struct inode *inode) 242362306a36Sopenharmony_ci{ 242462306a36Sopenharmony_ci wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); 242562306a36Sopenharmony_ci DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci do { 242862306a36Sopenharmony_ci prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); 242962306a36Sopenharmony_ci if (atomic_read(&inode->i_dio_count)) 243062306a36Sopenharmony_ci schedule(); 243162306a36Sopenharmony_ci } while (atomic_read(&inode->i_dio_count)); 243262306a36Sopenharmony_ci finish_wait(wq, &q.wq_entry); 243362306a36Sopenharmony_ci} 243462306a36Sopenharmony_ci 243562306a36Sopenharmony_ci/** 243662306a36Sopenharmony_ci * inode_dio_wait - wait for outstanding DIO requests to finish 243762306a36Sopenharmony_ci * @inode: inode to wait for 243862306a36Sopenharmony_ci * 243962306a36Sopenharmony_ci * Waits for all pending direct I/O requests to finish so that we can 244062306a36Sopenharmony_ci * proceed with a truncate or equivalent operation. 244162306a36Sopenharmony_ci * 244262306a36Sopenharmony_ci * Must be called under a lock that serializes taking new references 244362306a36Sopenharmony_ci * to i_dio_count, usually by inode->i_mutex. 244462306a36Sopenharmony_ci */ 244562306a36Sopenharmony_civoid inode_dio_wait(struct inode *inode) 244662306a36Sopenharmony_ci{ 244762306a36Sopenharmony_ci if (atomic_read(&inode->i_dio_count)) 244862306a36Sopenharmony_ci __inode_dio_wait(inode); 244962306a36Sopenharmony_ci} 245062306a36Sopenharmony_ciEXPORT_SYMBOL(inode_dio_wait); 245162306a36Sopenharmony_ci 245262306a36Sopenharmony_ci/* 245362306a36Sopenharmony_ci * inode_set_flags - atomically set some inode flags 245462306a36Sopenharmony_ci * 245562306a36Sopenharmony_ci * Note: the caller should be holding i_mutex, or else be sure that 245662306a36Sopenharmony_ci * they have exclusive access to the inode structure (i.e., while the 245762306a36Sopenharmony_ci * inode is being instantiated). The reason for the cmpxchg() loop 245862306a36Sopenharmony_ci * --- which wouldn't be necessary if all code paths which modify 245962306a36Sopenharmony_ci * i_flags actually followed this rule, is that there is at least one 246062306a36Sopenharmony_ci * code path which doesn't today so we use cmpxchg() out of an abundance 246162306a36Sopenharmony_ci * of caution. 246262306a36Sopenharmony_ci * 246362306a36Sopenharmony_ci * In the long run, i_mutex is overkill, and we should probably look 246462306a36Sopenharmony_ci * at using the i_lock spinlock to protect i_flags, and then make sure 246562306a36Sopenharmony_ci * it is so documented in include/linux/fs.h and that all code follows 246662306a36Sopenharmony_ci * the locking convention!! 246762306a36Sopenharmony_ci */ 246862306a36Sopenharmony_civoid inode_set_flags(struct inode *inode, unsigned int flags, 246962306a36Sopenharmony_ci unsigned int mask) 247062306a36Sopenharmony_ci{ 247162306a36Sopenharmony_ci WARN_ON_ONCE(flags & ~mask); 247262306a36Sopenharmony_ci set_mask_bits(&inode->i_flags, mask, flags); 247362306a36Sopenharmony_ci} 247462306a36Sopenharmony_ciEXPORT_SYMBOL(inode_set_flags); 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_civoid inode_nohighmem(struct inode *inode) 247762306a36Sopenharmony_ci{ 247862306a36Sopenharmony_ci mapping_set_gfp_mask(inode->i_mapping, GFP_USER); 247962306a36Sopenharmony_ci} 248062306a36Sopenharmony_ciEXPORT_SYMBOL(inode_nohighmem); 248162306a36Sopenharmony_ci 248262306a36Sopenharmony_ci/** 248362306a36Sopenharmony_ci * timestamp_truncate - Truncate timespec to a granularity 248462306a36Sopenharmony_ci * @t: Timespec 248562306a36Sopenharmony_ci * @inode: inode being updated 248662306a36Sopenharmony_ci * 248762306a36Sopenharmony_ci * Truncate a timespec to the granularity supported by the fs 248862306a36Sopenharmony_ci * containing the inode. Always rounds down. gran must 248962306a36Sopenharmony_ci * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns). 249062306a36Sopenharmony_ci */ 249162306a36Sopenharmony_cistruct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode) 249262306a36Sopenharmony_ci{ 249362306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 249462306a36Sopenharmony_ci unsigned int gran = sb->s_time_gran; 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max); 249762306a36Sopenharmony_ci if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min)) 249862306a36Sopenharmony_ci t.tv_nsec = 0; 249962306a36Sopenharmony_ci 250062306a36Sopenharmony_ci /* Avoid division in the common cases 1 ns and 1 s. */ 250162306a36Sopenharmony_ci if (gran == 1) 250262306a36Sopenharmony_ci ; /* nothing */ 250362306a36Sopenharmony_ci else if (gran == NSEC_PER_SEC) 250462306a36Sopenharmony_ci t.tv_nsec = 0; 250562306a36Sopenharmony_ci else if (gran > 1 && gran < NSEC_PER_SEC) 250662306a36Sopenharmony_ci t.tv_nsec -= t.tv_nsec % gran; 250762306a36Sopenharmony_ci else 250862306a36Sopenharmony_ci WARN(1, "invalid file time granularity: %u", gran); 250962306a36Sopenharmony_ci return t; 251062306a36Sopenharmony_ci} 251162306a36Sopenharmony_ciEXPORT_SYMBOL(timestamp_truncate); 251262306a36Sopenharmony_ci 251362306a36Sopenharmony_ci/** 251462306a36Sopenharmony_ci * current_time - Return FS time 251562306a36Sopenharmony_ci * @inode: inode. 251662306a36Sopenharmony_ci * 251762306a36Sopenharmony_ci * Return the current time truncated to the time granularity supported by 251862306a36Sopenharmony_ci * the fs. 251962306a36Sopenharmony_ci * 252062306a36Sopenharmony_ci * Note that inode and inode->sb cannot be NULL. 252162306a36Sopenharmony_ci * Otherwise, the function warns and returns time without truncation. 252262306a36Sopenharmony_ci */ 252362306a36Sopenharmony_cistruct timespec64 current_time(struct inode *inode) 252462306a36Sopenharmony_ci{ 252562306a36Sopenharmony_ci struct timespec64 now; 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_ci ktime_get_coarse_real_ts64(&now); 252862306a36Sopenharmony_ci return timestamp_truncate(now, inode); 252962306a36Sopenharmony_ci} 253062306a36Sopenharmony_ciEXPORT_SYMBOL(current_time); 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci/** 253362306a36Sopenharmony_ci * inode_set_ctime_current - set the ctime to current_time 253462306a36Sopenharmony_ci * @inode: inode 253562306a36Sopenharmony_ci * 253662306a36Sopenharmony_ci * Set the inode->i_ctime to the current value for the inode. Returns 253762306a36Sopenharmony_ci * the current value that was assigned to i_ctime. 253862306a36Sopenharmony_ci */ 253962306a36Sopenharmony_cistruct timespec64 inode_set_ctime_current(struct inode *inode) 254062306a36Sopenharmony_ci{ 254162306a36Sopenharmony_ci struct timespec64 now = current_time(inode); 254262306a36Sopenharmony_ci 254362306a36Sopenharmony_ci inode_set_ctime(inode, now.tv_sec, now.tv_nsec); 254462306a36Sopenharmony_ci return now; 254562306a36Sopenharmony_ci} 254662306a36Sopenharmony_ciEXPORT_SYMBOL(inode_set_ctime_current); 254762306a36Sopenharmony_ci 254862306a36Sopenharmony_ci/** 254962306a36Sopenharmony_ci * in_group_or_capable - check whether caller is CAP_FSETID privileged 255062306a36Sopenharmony_ci * @idmap: idmap of the mount @inode was found from 255162306a36Sopenharmony_ci * @inode: inode to check 255262306a36Sopenharmony_ci * @vfsgid: the new/current vfsgid of @inode 255362306a36Sopenharmony_ci * 255462306a36Sopenharmony_ci * Check wether @vfsgid is in the caller's group list or if the caller is 255562306a36Sopenharmony_ci * privileged with CAP_FSETID over @inode. This can be used to determine 255662306a36Sopenharmony_ci * whether the setgid bit can be kept or must be dropped. 255762306a36Sopenharmony_ci * 255862306a36Sopenharmony_ci * Return: true if the caller is sufficiently privileged, false if not. 255962306a36Sopenharmony_ci */ 256062306a36Sopenharmony_cibool in_group_or_capable(struct mnt_idmap *idmap, 256162306a36Sopenharmony_ci const struct inode *inode, vfsgid_t vfsgid) 256262306a36Sopenharmony_ci{ 256362306a36Sopenharmony_ci if (vfsgid_in_group_p(vfsgid)) 256462306a36Sopenharmony_ci return true; 256562306a36Sopenharmony_ci if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) 256662306a36Sopenharmony_ci return true; 256762306a36Sopenharmony_ci return false; 256862306a36Sopenharmony_ci} 256962306a36Sopenharmony_ci 257062306a36Sopenharmony_ci/** 257162306a36Sopenharmony_ci * mode_strip_sgid - handle the sgid bit for non-directories 257262306a36Sopenharmony_ci * @idmap: idmap of the mount the inode was created from 257362306a36Sopenharmony_ci * @dir: parent directory inode 257462306a36Sopenharmony_ci * @mode: mode of the file to be created in @dir 257562306a36Sopenharmony_ci * 257662306a36Sopenharmony_ci * If the @mode of the new file has both the S_ISGID and S_IXGRP bit 257762306a36Sopenharmony_ci * raised and @dir has the S_ISGID bit raised ensure that the caller is 257862306a36Sopenharmony_ci * either in the group of the parent directory or they have CAP_FSETID 257962306a36Sopenharmony_ci * in their user namespace and are privileged over the parent directory. 258062306a36Sopenharmony_ci * In all other cases, strip the S_ISGID bit from @mode. 258162306a36Sopenharmony_ci * 258262306a36Sopenharmony_ci * Return: the new mode to use for the file 258362306a36Sopenharmony_ci */ 258462306a36Sopenharmony_ciumode_t mode_strip_sgid(struct mnt_idmap *idmap, 258562306a36Sopenharmony_ci const struct inode *dir, umode_t mode) 258662306a36Sopenharmony_ci{ 258762306a36Sopenharmony_ci if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP)) 258862306a36Sopenharmony_ci return mode; 258962306a36Sopenharmony_ci if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID)) 259062306a36Sopenharmony_ci return mode; 259162306a36Sopenharmony_ci if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir))) 259262306a36Sopenharmony_ci return mode; 259362306a36Sopenharmony_ci return mode & ~S_ISGID; 259462306a36Sopenharmony_ci} 259562306a36Sopenharmony_ciEXPORT_SYMBOL(mode_strip_sgid); 2596