162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_log_format.h" 1162306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1262306a36Sopenharmony_ci#include "xfs_mount.h" 1362306a36Sopenharmony_ci#include "xfs_inode.h" 1462306a36Sopenharmony_ci#include "xfs_trans.h" 1562306a36Sopenharmony_ci#include "xfs_trans_priv.h" 1662306a36Sopenharmony_ci#include "xfs_inode_item.h" 1762306a36Sopenharmony_ci#include "xfs_quota.h" 1862306a36Sopenharmony_ci#include "xfs_trace.h" 1962306a36Sopenharmony_ci#include "xfs_icache.h" 2062306a36Sopenharmony_ci#include "xfs_bmap_util.h" 2162306a36Sopenharmony_ci#include "xfs_dquot_item.h" 2262306a36Sopenharmony_ci#include "xfs_dquot.h" 2362306a36Sopenharmony_ci#include "xfs_reflink.h" 2462306a36Sopenharmony_ci#include "xfs_ialloc.h" 2562306a36Sopenharmony_ci#include "xfs_ag.h" 2662306a36Sopenharmony_ci#include "xfs_log_priv.h" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include <linux/iversion.h> 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* Radix tree tags for incore inode tree. */ 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* inode is to be reclaimed */ 3362306a36Sopenharmony_ci#define XFS_ICI_RECLAIM_TAG 0 3462306a36Sopenharmony_ci/* Inode has speculative preallocations (posteof or cow) to clean. */ 3562306a36Sopenharmony_ci#define XFS_ICI_BLOCKGC_TAG 1 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * The goal for walking incore inodes. These can correspond with incore inode 3962306a36Sopenharmony_ci * radix tree tags when convenient. Avoid existing XFS_IWALK namespace. 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_cienum xfs_icwalk_goal { 4262306a36Sopenharmony_ci /* Goals directly associated with tagged inodes. */ 4362306a36Sopenharmony_ci XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, 4462306a36Sopenharmony_ci XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG, 4562306a36Sopenharmony_ci}; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic int xfs_icwalk(struct xfs_mount *mp, 4862306a36Sopenharmony_ci enum xfs_icwalk_goal goal, struct xfs_icwalk *icw); 4962306a36Sopenharmony_cistatic int xfs_icwalk_ag(struct xfs_perag *pag, 5062306a36Sopenharmony_ci enum xfs_icwalk_goal goal, struct xfs_icwalk *icw); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci/* 5362306a36Sopenharmony_ci * Private inode cache walk flags for struct xfs_icwalk. Must not 5462306a36Sopenharmony_ci * coincide with XFS_ICWALK_FLAGS_VALID. 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci/* Stop scanning after icw_scan_limit inodes. */ 5862306a36Sopenharmony_ci#define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28) 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#define XFS_ICWALK_FLAG_RECLAIM_SICK (1U << 27) 6162306a36Sopenharmony_ci#define XFS_ICWALK_FLAG_UNION (1U << 26) /* union filter algorithm */ 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_SCAN_LIMIT | \ 6462306a36Sopenharmony_ci XFS_ICWALK_FLAG_RECLAIM_SICK | \ 6562306a36Sopenharmony_ci XFS_ICWALK_FLAG_UNION) 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci/* 6862306a36Sopenharmony_ci * Allocate and initialise an xfs_inode. 6962306a36Sopenharmony_ci */ 7062306a36Sopenharmony_cistruct xfs_inode * 7162306a36Sopenharmony_cixfs_inode_alloc( 7262306a36Sopenharmony_ci struct xfs_mount *mp, 7362306a36Sopenharmony_ci xfs_ino_t ino) 7462306a36Sopenharmony_ci{ 7562306a36Sopenharmony_ci struct xfs_inode *ip; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci /* 7862306a36Sopenharmony_ci * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL 7962306a36Sopenharmony_ci * and return NULL here on ENOMEM. 8062306a36Sopenharmony_ci */ 8162306a36Sopenharmony_ci ip = alloc_inode_sb(mp->m_super, xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci if (inode_init_always(mp->m_super, VFS_I(ip))) { 8462306a36Sopenharmony_ci kmem_cache_free(xfs_inode_cache, ip); 8562306a36Sopenharmony_ci return NULL; 8662306a36Sopenharmony_ci } 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci /* VFS doesn't initialise i_mode or i_state! */ 8962306a36Sopenharmony_ci VFS_I(ip)->i_mode = 0; 9062306a36Sopenharmony_ci VFS_I(ip)->i_state = 0; 9162306a36Sopenharmony_ci mapping_set_large_folios(VFS_I(ip)->i_mapping); 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci XFS_STATS_INC(mp, vn_active); 9462306a36Sopenharmony_ci ASSERT(atomic_read(&ip->i_pincount) == 0); 9562306a36Sopenharmony_ci ASSERT(ip->i_ino == 0); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci /* initialise the xfs inode */ 9862306a36Sopenharmony_ci ip->i_ino = ino; 9962306a36Sopenharmony_ci ip->i_mount = mp; 10062306a36Sopenharmony_ci memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 10162306a36Sopenharmony_ci ip->i_cowfp = NULL; 10262306a36Sopenharmony_ci memset(&ip->i_af, 0, sizeof(ip->i_af)); 10362306a36Sopenharmony_ci ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS; 10462306a36Sopenharmony_ci memset(&ip->i_df, 0, sizeof(ip->i_df)); 10562306a36Sopenharmony_ci ip->i_flags = 0; 10662306a36Sopenharmony_ci ip->i_delayed_blks = 0; 10762306a36Sopenharmony_ci ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 10862306a36Sopenharmony_ci ip->i_nblocks = 0; 10962306a36Sopenharmony_ci ip->i_forkoff = 0; 11062306a36Sopenharmony_ci ip->i_sick = 0; 11162306a36Sopenharmony_ci ip->i_checked = 0; 11262306a36Sopenharmony_ci INIT_WORK(&ip->i_ioend_work, xfs_end_io); 11362306a36Sopenharmony_ci INIT_LIST_HEAD(&ip->i_ioend_list); 11462306a36Sopenharmony_ci spin_lock_init(&ip->i_ioend_lock); 11562306a36Sopenharmony_ci ip->i_next_unlinked = NULLAGINO; 11662306a36Sopenharmony_ci ip->i_prev_unlinked = 0; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci return ip; 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ciSTATIC void 12262306a36Sopenharmony_cixfs_inode_free_callback( 12362306a36Sopenharmony_ci struct rcu_head *head) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci struct inode *inode = container_of(head, struct inode, i_rcu); 12662306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(inode); 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci switch (VFS_I(ip)->i_mode & S_IFMT) { 12962306a36Sopenharmony_ci case S_IFREG: 13062306a36Sopenharmony_ci case S_IFDIR: 13162306a36Sopenharmony_ci case S_IFLNK: 13262306a36Sopenharmony_ci xfs_idestroy_fork(&ip->i_df); 13362306a36Sopenharmony_ci break; 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci xfs_ifork_zap_attr(ip); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (ip->i_cowfp) { 13962306a36Sopenharmony_ci xfs_idestroy_fork(ip->i_cowfp); 14062306a36Sopenharmony_ci kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci if (ip->i_itemp) { 14362306a36Sopenharmony_ci ASSERT(!test_bit(XFS_LI_IN_AIL, 14462306a36Sopenharmony_ci &ip->i_itemp->ili_item.li_flags)); 14562306a36Sopenharmony_ci xfs_inode_item_destroy(ip); 14662306a36Sopenharmony_ci ip->i_itemp = NULL; 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci kmem_cache_free(xfs_inode_cache, ip); 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_cistatic void 15362306a36Sopenharmony_ci__xfs_inode_free( 15462306a36Sopenharmony_ci struct xfs_inode *ip) 15562306a36Sopenharmony_ci{ 15662306a36Sopenharmony_ci /* asserts to verify all state is correct here */ 15762306a36Sopenharmony_ci ASSERT(atomic_read(&ip->i_pincount) == 0); 15862306a36Sopenharmony_ci ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list)); 15962306a36Sopenharmony_ci XFS_STATS_DEC(ip->i_mount, vn_active); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_civoid 16562306a36Sopenharmony_cixfs_inode_free( 16662306a36Sopenharmony_ci struct xfs_inode *ip) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING)); 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci /* 17162306a36Sopenharmony_ci * Because we use RCU freeing we need to ensure the inode always 17262306a36Sopenharmony_ci * appears to be reclaimed with an invalid inode number when in the 17362306a36Sopenharmony_ci * free state. The ip->i_flags_lock provides the barrier against lookup 17462306a36Sopenharmony_ci * races. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 17762306a36Sopenharmony_ci ip->i_flags = XFS_IRECLAIM; 17862306a36Sopenharmony_ci ip->i_ino = 0; 17962306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci __xfs_inode_free(ip); 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci/* 18562306a36Sopenharmony_ci * Queue background inode reclaim work if there are reclaimable inodes and there 18662306a36Sopenharmony_ci * isn't reclaim work already scheduled or in progress. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_cistatic void 18962306a36Sopenharmony_cixfs_reclaim_work_queue( 19062306a36Sopenharmony_ci struct xfs_mount *mp) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci rcu_read_lock(); 19462306a36Sopenharmony_ci if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 19562306a36Sopenharmony_ci queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, 19662306a36Sopenharmony_ci msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci rcu_read_unlock(); 19962306a36Sopenharmony_ci} 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci/* 20262306a36Sopenharmony_ci * Background scanning to trim preallocated space. This is queued based on the 20362306a36Sopenharmony_ci * 'speculative_prealloc_lifetime' tunable (5m by default). 20462306a36Sopenharmony_ci */ 20562306a36Sopenharmony_cistatic inline void 20662306a36Sopenharmony_cixfs_blockgc_queue( 20762306a36Sopenharmony_ci struct xfs_perag *pag) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci if (!xfs_is_blockgc_enabled(mp)) 21262306a36Sopenharmony_ci return; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci rcu_read_lock(); 21562306a36Sopenharmony_ci if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) 21662306a36Sopenharmony_ci queue_delayed_work(pag->pag_mount->m_blockgc_wq, 21762306a36Sopenharmony_ci &pag->pag_blockgc_work, 21862306a36Sopenharmony_ci msecs_to_jiffies(xfs_blockgc_secs * 1000)); 21962306a36Sopenharmony_ci rcu_read_unlock(); 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci/* Set a tag on both the AG incore inode tree and the AG radix tree. */ 22362306a36Sopenharmony_cistatic void 22462306a36Sopenharmony_cixfs_perag_set_inode_tag( 22562306a36Sopenharmony_ci struct xfs_perag *pag, 22662306a36Sopenharmony_ci xfs_agino_t agino, 22762306a36Sopenharmony_ci unsigned int tag) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 23062306a36Sopenharmony_ci bool was_tagged; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci lockdep_assert_held(&pag->pag_ici_lock); 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci was_tagged = radix_tree_tagged(&pag->pag_ici_root, tag); 23562306a36Sopenharmony_ci radix_tree_tag_set(&pag->pag_ici_root, agino, tag); 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci if (tag == XFS_ICI_RECLAIM_TAG) 23862306a36Sopenharmony_ci pag->pag_ici_reclaimable++; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci if (was_tagged) 24162306a36Sopenharmony_ci return; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci /* propagate the tag up into the perag radix tree */ 24462306a36Sopenharmony_ci spin_lock(&mp->m_perag_lock); 24562306a36Sopenharmony_ci radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag); 24662306a36Sopenharmony_ci spin_unlock(&mp->m_perag_lock); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci /* start background work */ 24962306a36Sopenharmony_ci switch (tag) { 25062306a36Sopenharmony_ci case XFS_ICI_RECLAIM_TAG: 25162306a36Sopenharmony_ci xfs_reclaim_work_queue(mp); 25262306a36Sopenharmony_ci break; 25362306a36Sopenharmony_ci case XFS_ICI_BLOCKGC_TAG: 25462306a36Sopenharmony_ci xfs_blockgc_queue(pag); 25562306a36Sopenharmony_ci break; 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci trace_xfs_perag_set_inode_tag(pag, _RET_IP_); 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci/* Clear a tag on both the AG incore inode tree and the AG radix tree. */ 26262306a36Sopenharmony_cistatic void 26362306a36Sopenharmony_cixfs_perag_clear_inode_tag( 26462306a36Sopenharmony_ci struct xfs_perag *pag, 26562306a36Sopenharmony_ci xfs_agino_t agino, 26662306a36Sopenharmony_ci unsigned int tag) 26762306a36Sopenharmony_ci{ 26862306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci lockdep_assert_held(&pag->pag_ici_lock); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* 27362306a36Sopenharmony_ci * Reclaim can signal (with a null agino) that it cleared its own tag 27462306a36Sopenharmony_ci * by removing the inode from the radix tree. 27562306a36Sopenharmony_ci */ 27662306a36Sopenharmony_ci if (agino != NULLAGINO) 27762306a36Sopenharmony_ci radix_tree_tag_clear(&pag->pag_ici_root, agino, tag); 27862306a36Sopenharmony_ci else 27962306a36Sopenharmony_ci ASSERT(tag == XFS_ICI_RECLAIM_TAG); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (tag == XFS_ICI_RECLAIM_TAG) 28262306a36Sopenharmony_ci pag->pag_ici_reclaimable--; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci if (radix_tree_tagged(&pag->pag_ici_root, tag)) 28562306a36Sopenharmony_ci return; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci /* clear the tag from the perag radix tree */ 28862306a36Sopenharmony_ci spin_lock(&mp->m_perag_lock); 28962306a36Sopenharmony_ci radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag); 29062306a36Sopenharmony_ci spin_unlock(&mp->m_perag_lock); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci trace_xfs_perag_clear_inode_tag(pag, _RET_IP_); 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci/* 29662306a36Sopenharmony_ci * When we recycle a reclaimable inode, we need to re-initialise the VFS inode 29762306a36Sopenharmony_ci * part of the structure. This is made more complex by the fact we store 29862306a36Sopenharmony_ci * information about the on-disk values in the VFS inode and so we can't just 29962306a36Sopenharmony_ci * overwrite the values unconditionally. Hence we save the parameters we 30062306a36Sopenharmony_ci * need to retain across reinitialisation, and rewrite them into the VFS inode 30162306a36Sopenharmony_ci * after reinitialisation even if it fails. 30262306a36Sopenharmony_ci */ 30362306a36Sopenharmony_cistatic int 30462306a36Sopenharmony_cixfs_reinit_inode( 30562306a36Sopenharmony_ci struct xfs_mount *mp, 30662306a36Sopenharmony_ci struct inode *inode) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci int error; 30962306a36Sopenharmony_ci uint32_t nlink = inode->i_nlink; 31062306a36Sopenharmony_ci uint32_t generation = inode->i_generation; 31162306a36Sopenharmony_ci uint64_t version = inode_peek_iversion(inode); 31262306a36Sopenharmony_ci umode_t mode = inode->i_mode; 31362306a36Sopenharmony_ci dev_t dev = inode->i_rdev; 31462306a36Sopenharmony_ci kuid_t uid = inode->i_uid; 31562306a36Sopenharmony_ci kgid_t gid = inode->i_gid; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci error = inode_init_always(mp->m_super, inode); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci set_nlink(inode, nlink); 32062306a36Sopenharmony_ci inode->i_generation = generation; 32162306a36Sopenharmony_ci inode_set_iversion_queried(inode, version); 32262306a36Sopenharmony_ci inode->i_mode = mode; 32362306a36Sopenharmony_ci inode->i_rdev = dev; 32462306a36Sopenharmony_ci inode->i_uid = uid; 32562306a36Sopenharmony_ci inode->i_gid = gid; 32662306a36Sopenharmony_ci mapping_set_large_folios(inode->i_mapping); 32762306a36Sopenharmony_ci return error; 32862306a36Sopenharmony_ci} 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci/* 33162306a36Sopenharmony_ci * Carefully nudge an inode whose VFS state has been torn down back into a 33262306a36Sopenharmony_ci * usable state. Drops the i_flags_lock and the rcu read lock. 33362306a36Sopenharmony_ci */ 33462306a36Sopenharmony_cistatic int 33562306a36Sopenharmony_cixfs_iget_recycle( 33662306a36Sopenharmony_ci struct xfs_perag *pag, 33762306a36Sopenharmony_ci struct xfs_inode *ip) __releases(&ip->i_flags_lock) 33862306a36Sopenharmony_ci{ 33962306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 34062306a36Sopenharmony_ci struct inode *inode = VFS_I(ip); 34162306a36Sopenharmony_ci int error; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci trace_xfs_iget_recycle(ip); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 34662306a36Sopenharmony_ci return -EAGAIN; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci /* 34962306a36Sopenharmony_ci * We need to make it look like the inode is being reclaimed to prevent 35062306a36Sopenharmony_ci * the actual reclaim workers from stomping over us while we recycle 35162306a36Sopenharmony_ci * the inode. We can't clear the radix tree tag yet as it requires 35262306a36Sopenharmony_ci * pag_ici_lock to be held exclusive. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci ip->i_flags |= XFS_IRECLAIM; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 35762306a36Sopenharmony_ci rcu_read_unlock(); 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 36062306a36Sopenharmony_ci error = xfs_reinit_inode(mp, inode); 36162306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 36262306a36Sopenharmony_ci if (error) { 36362306a36Sopenharmony_ci /* 36462306a36Sopenharmony_ci * Re-initializing the inode failed, and we are in deep 36562306a36Sopenharmony_ci * trouble. Try to re-add it to the reclaim list. 36662306a36Sopenharmony_ci */ 36762306a36Sopenharmony_ci rcu_read_lock(); 36862306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 36962306a36Sopenharmony_ci ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 37062306a36Sopenharmony_ci ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 37162306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 37262306a36Sopenharmony_ci rcu_read_unlock(); 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci trace_xfs_iget_recycle_fail(ip); 37562306a36Sopenharmony_ci return error; 37662306a36Sopenharmony_ci } 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci spin_lock(&pag->pag_ici_lock); 37962306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci /* 38262306a36Sopenharmony_ci * Clear the per-lifetime state in the inode as we are now effectively 38362306a36Sopenharmony_ci * a new inode and need to return to the initial state before reuse 38462306a36Sopenharmony_ci * occurs. 38562306a36Sopenharmony_ci */ 38662306a36Sopenharmony_ci ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 38762306a36Sopenharmony_ci ip->i_flags |= XFS_INEW; 38862306a36Sopenharmony_ci xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 38962306a36Sopenharmony_ci XFS_ICI_RECLAIM_TAG); 39062306a36Sopenharmony_ci inode->i_state = I_NEW; 39162306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 39262306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci return 0; 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci/* 39862306a36Sopenharmony_ci * If we are allocating a new inode, then check what was returned is 39962306a36Sopenharmony_ci * actually a free, empty inode. If we are not allocating an inode, 40062306a36Sopenharmony_ci * then check we didn't find a free inode. 40162306a36Sopenharmony_ci * 40262306a36Sopenharmony_ci * Returns: 40362306a36Sopenharmony_ci * 0 if the inode free state matches the lookup context 40462306a36Sopenharmony_ci * -ENOENT if the inode is free and we are not allocating 40562306a36Sopenharmony_ci * -EFSCORRUPTED if there is any state mismatch at all 40662306a36Sopenharmony_ci */ 40762306a36Sopenharmony_cistatic int 40862306a36Sopenharmony_cixfs_iget_check_free_state( 40962306a36Sopenharmony_ci struct xfs_inode *ip, 41062306a36Sopenharmony_ci int flags) 41162306a36Sopenharmony_ci{ 41262306a36Sopenharmony_ci if (flags & XFS_IGET_CREATE) { 41362306a36Sopenharmony_ci /* should be a free inode */ 41462306a36Sopenharmony_ci if (VFS_I(ip)->i_mode != 0) { 41562306a36Sopenharmony_ci xfs_warn(ip->i_mount, 41662306a36Sopenharmony_ci"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)", 41762306a36Sopenharmony_ci ip->i_ino, VFS_I(ip)->i_mode); 41862306a36Sopenharmony_ci return -EFSCORRUPTED; 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci if (ip->i_nblocks != 0) { 42262306a36Sopenharmony_ci xfs_warn(ip->i_mount, 42362306a36Sopenharmony_ci"Corruption detected! Free inode 0x%llx has blocks allocated!", 42462306a36Sopenharmony_ci ip->i_ino); 42562306a36Sopenharmony_ci return -EFSCORRUPTED; 42662306a36Sopenharmony_ci } 42762306a36Sopenharmony_ci return 0; 42862306a36Sopenharmony_ci } 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci /* should be an allocated inode */ 43162306a36Sopenharmony_ci if (VFS_I(ip)->i_mode == 0) 43262306a36Sopenharmony_ci return -ENOENT; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci return 0; 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci/* Make all pending inactivation work start immediately. */ 43862306a36Sopenharmony_cistatic bool 43962306a36Sopenharmony_cixfs_inodegc_queue_all( 44062306a36Sopenharmony_ci struct xfs_mount *mp) 44162306a36Sopenharmony_ci{ 44262306a36Sopenharmony_ci struct xfs_inodegc *gc; 44362306a36Sopenharmony_ci int cpu; 44462306a36Sopenharmony_ci bool ret = false; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 44762306a36Sopenharmony_ci gc = per_cpu_ptr(mp->m_inodegc, cpu); 44862306a36Sopenharmony_ci if (!llist_empty(&gc->list)) { 44962306a36Sopenharmony_ci mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); 45062306a36Sopenharmony_ci ret = true; 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci } 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci return ret; 45562306a36Sopenharmony_ci} 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci/* Wait for all queued work and collect errors */ 45862306a36Sopenharmony_cistatic int 45962306a36Sopenharmony_cixfs_inodegc_wait_all( 46062306a36Sopenharmony_ci struct xfs_mount *mp) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci int cpu; 46362306a36Sopenharmony_ci int error = 0; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci flush_workqueue(mp->m_inodegc_wq); 46662306a36Sopenharmony_ci for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 46762306a36Sopenharmony_ci struct xfs_inodegc *gc; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci gc = per_cpu_ptr(mp->m_inodegc, cpu); 47062306a36Sopenharmony_ci if (gc->error && !error) 47162306a36Sopenharmony_ci error = gc->error; 47262306a36Sopenharmony_ci gc->error = 0; 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci return error; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci/* 47962306a36Sopenharmony_ci * Check the validity of the inode we just found it the cache 48062306a36Sopenharmony_ci */ 48162306a36Sopenharmony_cistatic int 48262306a36Sopenharmony_cixfs_iget_cache_hit( 48362306a36Sopenharmony_ci struct xfs_perag *pag, 48462306a36Sopenharmony_ci struct xfs_inode *ip, 48562306a36Sopenharmony_ci xfs_ino_t ino, 48662306a36Sopenharmony_ci int flags, 48762306a36Sopenharmony_ci int lock_flags) __releases(RCU) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci struct inode *inode = VFS_I(ip); 49062306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 49162306a36Sopenharmony_ci int error; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci /* 49462306a36Sopenharmony_ci * check for re-use of an inode within an RCU grace period due to the 49562306a36Sopenharmony_ci * radix tree nodes not being updated yet. We monitor for this by 49662306a36Sopenharmony_ci * setting the inode number to zero before freeing the inode structure. 49762306a36Sopenharmony_ci * If the inode has been reallocated and set up, then the inode number 49862306a36Sopenharmony_ci * will not match, so check for that, too. 49962306a36Sopenharmony_ci */ 50062306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 50162306a36Sopenharmony_ci if (ip->i_ino != ino) 50262306a36Sopenharmony_ci goto out_skip; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* 50562306a36Sopenharmony_ci * If we are racing with another cache hit that is currently 50662306a36Sopenharmony_ci * instantiating this inode or currently recycling it out of 50762306a36Sopenharmony_ci * reclaimable state, wait for the initialisation to complete 50862306a36Sopenharmony_ci * before continuing. 50962306a36Sopenharmony_ci * 51062306a36Sopenharmony_ci * If we're racing with the inactivation worker we also want to wait. 51162306a36Sopenharmony_ci * If we're creating a new file, it's possible that the worker 51262306a36Sopenharmony_ci * previously marked the inode as free on disk but hasn't finished 51362306a36Sopenharmony_ci * updating the incore state yet. The AGI buffer will be dirty and 51462306a36Sopenharmony_ci * locked to the icreate transaction, so a synchronous push of the 51562306a36Sopenharmony_ci * inodegc workers would result in deadlock. For a regular iget, the 51662306a36Sopenharmony_ci * worker is running already, so we might as well wait. 51762306a36Sopenharmony_ci * 51862306a36Sopenharmony_ci * XXX(hch): eventually we should do something equivalent to 51962306a36Sopenharmony_ci * wait_on_inode to wait for these flags to be cleared 52062306a36Sopenharmony_ci * instead of polling for it. 52162306a36Sopenharmony_ci */ 52262306a36Sopenharmony_ci if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING)) 52362306a36Sopenharmony_ci goto out_skip; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci if (ip->i_flags & XFS_NEED_INACTIVE) { 52662306a36Sopenharmony_ci /* Unlinked inodes cannot be re-grabbed. */ 52762306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink == 0) { 52862306a36Sopenharmony_ci error = -ENOENT; 52962306a36Sopenharmony_ci goto out_error; 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci goto out_inodegc_flush; 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci /* 53562306a36Sopenharmony_ci * Check the inode free state is valid. This also detects lookup 53662306a36Sopenharmony_ci * racing with unlinks. 53762306a36Sopenharmony_ci */ 53862306a36Sopenharmony_ci error = xfs_iget_check_free_state(ip, flags); 53962306a36Sopenharmony_ci if (error) 54062306a36Sopenharmony_ci goto out_error; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci /* Skip inodes that have no vfs state. */ 54362306a36Sopenharmony_ci if ((flags & XFS_IGET_INCORE) && 54462306a36Sopenharmony_ci (ip->i_flags & XFS_IRECLAIMABLE)) 54562306a36Sopenharmony_ci goto out_skip; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci /* The inode fits the selection criteria; process it. */ 54862306a36Sopenharmony_ci if (ip->i_flags & XFS_IRECLAIMABLE) { 54962306a36Sopenharmony_ci /* Drops i_flags_lock and RCU read lock. */ 55062306a36Sopenharmony_ci error = xfs_iget_recycle(pag, ip); 55162306a36Sopenharmony_ci if (error == -EAGAIN) 55262306a36Sopenharmony_ci goto out_skip; 55362306a36Sopenharmony_ci if (error) 55462306a36Sopenharmony_ci return error; 55562306a36Sopenharmony_ci } else { 55662306a36Sopenharmony_ci /* If the VFS inode is being torn down, pause and try again. */ 55762306a36Sopenharmony_ci if (!igrab(inode)) 55862306a36Sopenharmony_ci goto out_skip; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci /* We've got a live one. */ 56162306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 56262306a36Sopenharmony_ci rcu_read_unlock(); 56362306a36Sopenharmony_ci trace_xfs_iget_hit(ip); 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci if (lock_flags != 0) 56762306a36Sopenharmony_ci xfs_ilock(ip, lock_flags); 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci if (!(flags & XFS_IGET_INCORE)) 57062306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_ISTALE); 57162306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_ig_found); 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci return 0; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ciout_skip: 57662306a36Sopenharmony_ci trace_xfs_iget_skip(ip); 57762306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_ig_frecycle); 57862306a36Sopenharmony_ci error = -EAGAIN; 57962306a36Sopenharmony_ciout_error: 58062306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 58162306a36Sopenharmony_ci rcu_read_unlock(); 58262306a36Sopenharmony_ci return error; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ciout_inodegc_flush: 58562306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 58662306a36Sopenharmony_ci rcu_read_unlock(); 58762306a36Sopenharmony_ci /* 58862306a36Sopenharmony_ci * Do not wait for the workers, because the caller could hold an AGI 58962306a36Sopenharmony_ci * buffer lock. We're just going to sleep in a loop anyway. 59062306a36Sopenharmony_ci */ 59162306a36Sopenharmony_ci if (xfs_is_inodegc_enabled(mp)) 59262306a36Sopenharmony_ci xfs_inodegc_queue_all(mp); 59362306a36Sopenharmony_ci return -EAGAIN; 59462306a36Sopenharmony_ci} 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_cistatic int 59762306a36Sopenharmony_cixfs_iget_cache_miss( 59862306a36Sopenharmony_ci struct xfs_mount *mp, 59962306a36Sopenharmony_ci struct xfs_perag *pag, 60062306a36Sopenharmony_ci xfs_trans_t *tp, 60162306a36Sopenharmony_ci xfs_ino_t ino, 60262306a36Sopenharmony_ci struct xfs_inode **ipp, 60362306a36Sopenharmony_ci int flags, 60462306a36Sopenharmony_ci int lock_flags) 60562306a36Sopenharmony_ci{ 60662306a36Sopenharmony_ci struct xfs_inode *ip; 60762306a36Sopenharmony_ci int error; 60862306a36Sopenharmony_ci xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 60962306a36Sopenharmony_ci int iflags; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci ip = xfs_inode_alloc(mp, ino); 61262306a36Sopenharmony_ci if (!ip) 61362306a36Sopenharmony_ci return -ENOMEM; 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags); 61662306a36Sopenharmony_ci if (error) 61762306a36Sopenharmony_ci goto out_destroy; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci /* 62062306a36Sopenharmony_ci * For version 5 superblocks, if we are initialising a new inode and we 62162306a36Sopenharmony_ci * are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can 62262306a36Sopenharmony_ci * simply build the new inode core with a random generation number. 62362306a36Sopenharmony_ci * 62462306a36Sopenharmony_ci * For version 4 (and older) superblocks, log recovery is dependent on 62562306a36Sopenharmony_ci * the i_flushiter field being initialised from the current on-disk 62662306a36Sopenharmony_ci * value and hence we must also read the inode off disk even when 62762306a36Sopenharmony_ci * initializing new inodes. 62862306a36Sopenharmony_ci */ 62962306a36Sopenharmony_ci if (xfs_has_v3inodes(mp) && 63062306a36Sopenharmony_ci (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) { 63162306a36Sopenharmony_ci VFS_I(ip)->i_generation = get_random_u32(); 63262306a36Sopenharmony_ci } else { 63362306a36Sopenharmony_ci struct xfs_buf *bp; 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp); 63662306a36Sopenharmony_ci if (error) 63762306a36Sopenharmony_ci goto out_destroy; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci error = xfs_inode_from_disk(ip, 64062306a36Sopenharmony_ci xfs_buf_offset(bp, ip->i_imap.im_boffset)); 64162306a36Sopenharmony_ci if (!error) 64262306a36Sopenharmony_ci xfs_buf_set_ref(bp, XFS_INO_REF); 64362306a36Sopenharmony_ci xfs_trans_brelse(tp, bp); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci if (error) 64662306a36Sopenharmony_ci goto out_destroy; 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci trace_xfs_iget_miss(ip); 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci /* 65262306a36Sopenharmony_ci * Check the inode free state is valid. This also detects lookup 65362306a36Sopenharmony_ci * racing with unlinks. 65462306a36Sopenharmony_ci */ 65562306a36Sopenharmony_ci error = xfs_iget_check_free_state(ip, flags); 65662306a36Sopenharmony_ci if (error) 65762306a36Sopenharmony_ci goto out_destroy; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci /* 66062306a36Sopenharmony_ci * Preload the radix tree so we can insert safely under the 66162306a36Sopenharmony_ci * write spinlock. Note that we cannot sleep inside the preload 66262306a36Sopenharmony_ci * region. Since we can be called from transaction context, don't 66362306a36Sopenharmony_ci * recurse into the file system. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_ci if (radix_tree_preload(GFP_NOFS)) { 66662306a36Sopenharmony_ci error = -EAGAIN; 66762306a36Sopenharmony_ci goto out_destroy; 66862306a36Sopenharmony_ci } 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci /* 67162306a36Sopenharmony_ci * Because the inode hasn't been added to the radix-tree yet it can't 67262306a36Sopenharmony_ci * be found by another thread, so we can do the non-sleeping lock here. 67362306a36Sopenharmony_ci */ 67462306a36Sopenharmony_ci if (lock_flags) { 67562306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, lock_flags)) 67662306a36Sopenharmony_ci BUG(); 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci /* 68062306a36Sopenharmony_ci * These values must be set before inserting the inode into the radix 68162306a36Sopenharmony_ci * tree as the moment it is inserted a concurrent lookup (allowed by the 68262306a36Sopenharmony_ci * RCU locking mechanism) can find it and that lookup must see that this 68362306a36Sopenharmony_ci * is an inode currently under construction (i.e. that XFS_INEW is set). 68462306a36Sopenharmony_ci * The ip->i_flags_lock that protects the XFS_INEW flag forms the 68562306a36Sopenharmony_ci * memory barrier that ensures this detection works correctly at lookup 68662306a36Sopenharmony_ci * time. 68762306a36Sopenharmony_ci */ 68862306a36Sopenharmony_ci iflags = XFS_INEW; 68962306a36Sopenharmony_ci if (flags & XFS_IGET_DONTCACHE) 69062306a36Sopenharmony_ci d_mark_dontcache(VFS_I(ip)); 69162306a36Sopenharmony_ci ip->i_udquot = NULL; 69262306a36Sopenharmony_ci ip->i_gdquot = NULL; 69362306a36Sopenharmony_ci ip->i_pdquot = NULL; 69462306a36Sopenharmony_ci xfs_iflags_set(ip, iflags); 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci /* insert the new inode */ 69762306a36Sopenharmony_ci spin_lock(&pag->pag_ici_lock); 69862306a36Sopenharmony_ci error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 69962306a36Sopenharmony_ci if (unlikely(error)) { 70062306a36Sopenharmony_ci WARN_ON(error != -EEXIST); 70162306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_ig_dup); 70262306a36Sopenharmony_ci error = -EAGAIN; 70362306a36Sopenharmony_ci goto out_preload_end; 70462306a36Sopenharmony_ci } 70562306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 70662306a36Sopenharmony_ci radix_tree_preload_end(); 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci *ipp = ip; 70962306a36Sopenharmony_ci return 0; 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ciout_preload_end: 71262306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 71362306a36Sopenharmony_ci radix_tree_preload_end(); 71462306a36Sopenharmony_ci if (lock_flags) 71562306a36Sopenharmony_ci xfs_iunlock(ip, lock_flags); 71662306a36Sopenharmony_ciout_destroy: 71762306a36Sopenharmony_ci __destroy_inode(VFS_I(ip)); 71862306a36Sopenharmony_ci xfs_inode_free(ip); 71962306a36Sopenharmony_ci return error; 72062306a36Sopenharmony_ci} 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci/* 72362306a36Sopenharmony_ci * Look up an inode by number in the given file system. The inode is looked up 72462306a36Sopenharmony_ci * in the cache held in each AG. If the inode is found in the cache, initialise 72562306a36Sopenharmony_ci * the vfs inode if necessary. 72662306a36Sopenharmony_ci * 72762306a36Sopenharmony_ci * If it is not in core, read it in from the file system's device, add it to the 72862306a36Sopenharmony_ci * cache and initialise the vfs inode. 72962306a36Sopenharmony_ci * 73062306a36Sopenharmony_ci * The inode is locked according to the value of the lock_flags parameter. 73162306a36Sopenharmony_ci * Inode lookup is only done during metadata operations and not as part of the 73262306a36Sopenharmony_ci * data IO path. Hence we only allow locking of the XFS_ILOCK during lookup. 73362306a36Sopenharmony_ci */ 73462306a36Sopenharmony_ciint 73562306a36Sopenharmony_cixfs_iget( 73662306a36Sopenharmony_ci struct xfs_mount *mp, 73762306a36Sopenharmony_ci struct xfs_trans *tp, 73862306a36Sopenharmony_ci xfs_ino_t ino, 73962306a36Sopenharmony_ci uint flags, 74062306a36Sopenharmony_ci uint lock_flags, 74162306a36Sopenharmony_ci struct xfs_inode **ipp) 74262306a36Sopenharmony_ci{ 74362306a36Sopenharmony_ci struct xfs_inode *ip; 74462306a36Sopenharmony_ci struct xfs_perag *pag; 74562306a36Sopenharmony_ci xfs_agino_t agino; 74662306a36Sopenharmony_ci int error; 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci /* reject inode numbers outside existing AGs */ 75162306a36Sopenharmony_ci if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 75262306a36Sopenharmony_ci return -EINVAL; 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_ig_attempts); 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci /* get the perag structure and ensure that it's inode capable */ 75762306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 75862306a36Sopenharmony_ci agino = XFS_INO_TO_AGINO(mp, ino); 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ciagain: 76162306a36Sopenharmony_ci error = 0; 76262306a36Sopenharmony_ci rcu_read_lock(); 76362306a36Sopenharmony_ci ip = radix_tree_lookup(&pag->pag_ici_root, agino); 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci if (ip) { 76662306a36Sopenharmony_ci error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 76762306a36Sopenharmony_ci if (error) 76862306a36Sopenharmony_ci goto out_error_or_again; 76962306a36Sopenharmony_ci } else { 77062306a36Sopenharmony_ci rcu_read_unlock(); 77162306a36Sopenharmony_ci if (flags & XFS_IGET_INCORE) { 77262306a36Sopenharmony_ci error = -ENODATA; 77362306a36Sopenharmony_ci goto out_error_or_again; 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_ig_missed); 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 77862306a36Sopenharmony_ci flags, lock_flags); 77962306a36Sopenharmony_ci if (error) 78062306a36Sopenharmony_ci goto out_error_or_again; 78162306a36Sopenharmony_ci } 78262306a36Sopenharmony_ci xfs_perag_put(pag); 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci *ipp = ip; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci /* 78762306a36Sopenharmony_ci * If we have a real type for an on-disk inode, we can setup the inode 78862306a36Sopenharmony_ci * now. If it's a new inode being created, xfs_init_new_inode will 78962306a36Sopenharmony_ci * handle it. 79062306a36Sopenharmony_ci */ 79162306a36Sopenharmony_ci if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0) 79262306a36Sopenharmony_ci xfs_setup_existing_inode(ip); 79362306a36Sopenharmony_ci return 0; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ciout_error_or_again: 79662306a36Sopenharmony_ci if (!(flags & (XFS_IGET_INCORE | XFS_IGET_NORETRY)) && 79762306a36Sopenharmony_ci error == -EAGAIN) { 79862306a36Sopenharmony_ci delay(1); 79962306a36Sopenharmony_ci goto again; 80062306a36Sopenharmony_ci } 80162306a36Sopenharmony_ci xfs_perag_put(pag); 80262306a36Sopenharmony_ci return error; 80362306a36Sopenharmony_ci} 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci/* 80662306a36Sopenharmony_ci * Grab the inode for reclaim exclusively. 80762306a36Sopenharmony_ci * 80862306a36Sopenharmony_ci * We have found this inode via a lookup under RCU, so the inode may have 80962306a36Sopenharmony_ci * already been freed, or it may be in the process of being recycled by 81062306a36Sopenharmony_ci * xfs_iget(). In both cases, the inode will have XFS_IRECLAIM set. If the inode 81162306a36Sopenharmony_ci * has been fully recycled by the time we get the i_flags_lock, XFS_IRECLAIMABLE 81262306a36Sopenharmony_ci * will not be set. Hence we need to check for both these flag conditions to 81362306a36Sopenharmony_ci * avoid inodes that are no longer reclaim candidates. 81462306a36Sopenharmony_ci * 81562306a36Sopenharmony_ci * Note: checking for other state flags here, under the i_flags_lock or not, is 81662306a36Sopenharmony_ci * racy and should be avoided. Those races should be resolved only after we have 81762306a36Sopenharmony_ci * ensured that we are able to reclaim this inode and the world can see that we 81862306a36Sopenharmony_ci * are going to reclaim it. 81962306a36Sopenharmony_ci * 82062306a36Sopenharmony_ci * Return true if we grabbed it, false otherwise. 82162306a36Sopenharmony_ci */ 82262306a36Sopenharmony_cistatic bool 82362306a36Sopenharmony_cixfs_reclaim_igrab( 82462306a36Sopenharmony_ci struct xfs_inode *ip, 82562306a36Sopenharmony_ci struct xfs_icwalk *icw) 82662306a36Sopenharmony_ci{ 82762306a36Sopenharmony_ci ASSERT(rcu_read_lock_held()); 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 83062306a36Sopenharmony_ci if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || 83162306a36Sopenharmony_ci __xfs_iflags_test(ip, XFS_IRECLAIM)) { 83262306a36Sopenharmony_ci /* not a reclaim candidate. */ 83362306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 83462306a36Sopenharmony_ci return false; 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci /* Don't reclaim a sick inode unless the caller asked for it. */ 83862306a36Sopenharmony_ci if (ip->i_sick && 83962306a36Sopenharmony_ci (!icw || !(icw->icw_flags & XFS_ICWALK_FLAG_RECLAIM_SICK))) { 84062306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 84162306a36Sopenharmony_ci return false; 84262306a36Sopenharmony_ci } 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci __xfs_iflags_set(ip, XFS_IRECLAIM); 84562306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 84662306a36Sopenharmony_ci return true; 84762306a36Sopenharmony_ci} 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci/* 85062306a36Sopenharmony_ci * Inode reclaim is non-blocking, so the default action if progress cannot be 85162306a36Sopenharmony_ci * made is to "requeue" the inode for reclaim by unlocking it and clearing the 85262306a36Sopenharmony_ci * XFS_IRECLAIM flag. If we are in a shutdown state, we don't care about 85362306a36Sopenharmony_ci * blocking anymore and hence we can wait for the inode to be able to reclaim 85462306a36Sopenharmony_ci * it. 85562306a36Sopenharmony_ci * 85662306a36Sopenharmony_ci * We do no IO here - if callers require inodes to be cleaned they must push the 85762306a36Sopenharmony_ci * AIL first to trigger writeback of dirty inodes. This enables writeback to be 85862306a36Sopenharmony_ci * done in the background in a non-blocking manner, and enables memory reclaim 85962306a36Sopenharmony_ci * to make progress without blocking. 86062306a36Sopenharmony_ci */ 86162306a36Sopenharmony_cistatic void 86262306a36Sopenharmony_cixfs_reclaim_inode( 86362306a36Sopenharmony_ci struct xfs_inode *ip, 86462306a36Sopenharmony_ci struct xfs_perag *pag) 86562306a36Sopenharmony_ci{ 86662306a36Sopenharmony_ci xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 86962306a36Sopenharmony_ci goto out; 87062306a36Sopenharmony_ci if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING)) 87162306a36Sopenharmony_ci goto out_iunlock; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci /* 87462306a36Sopenharmony_ci * Check for log shutdown because aborting the inode can move the log 87562306a36Sopenharmony_ci * tail and corrupt in memory state. This is fine if the log is shut 87662306a36Sopenharmony_ci * down, but if the log is still active and only the mount is shut down 87762306a36Sopenharmony_ci * then the in-memory log tail movement caused by the abort can be 87862306a36Sopenharmony_ci * incorrectly propagated to disk. 87962306a36Sopenharmony_ci */ 88062306a36Sopenharmony_ci if (xlog_is_shutdown(ip->i_mount->m_log)) { 88162306a36Sopenharmony_ci xfs_iunpin_wait(ip); 88262306a36Sopenharmony_ci xfs_iflush_shutdown_abort(ip); 88362306a36Sopenharmony_ci goto reclaim; 88462306a36Sopenharmony_ci } 88562306a36Sopenharmony_ci if (xfs_ipincount(ip)) 88662306a36Sopenharmony_ci goto out_clear_flush; 88762306a36Sopenharmony_ci if (!xfs_inode_clean(ip)) 88862306a36Sopenharmony_ci goto out_clear_flush; 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IFLUSHING); 89162306a36Sopenharmony_cireclaim: 89262306a36Sopenharmony_ci trace_xfs_inode_reclaiming(ip); 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci /* 89562306a36Sopenharmony_ci * Because we use RCU freeing we need to ensure the inode always appears 89662306a36Sopenharmony_ci * to be reclaimed with an invalid inode number when in the free state. 89762306a36Sopenharmony_ci * We do this as early as possible under the ILOCK so that 89862306a36Sopenharmony_ci * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to 89962306a36Sopenharmony_ci * detect races with us here. By doing this, we guarantee that once 90062306a36Sopenharmony_ci * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that 90162306a36Sopenharmony_ci * it will see either a valid inode that will serialise correctly, or it 90262306a36Sopenharmony_ci * will see an invalid inode that it can skip. 90362306a36Sopenharmony_ci */ 90462306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 90562306a36Sopenharmony_ci ip->i_flags = XFS_IRECLAIM; 90662306a36Sopenharmony_ci ip->i_ino = 0; 90762306a36Sopenharmony_ci ip->i_sick = 0; 90862306a36Sopenharmony_ci ip->i_checked = 0; 90962306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci ASSERT(!ip->i_itemp || ip->i_itemp->ili_item.li_buf == NULL); 91262306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 91562306a36Sopenharmony_ci /* 91662306a36Sopenharmony_ci * Remove the inode from the per-AG radix tree. 91762306a36Sopenharmony_ci * 91862306a36Sopenharmony_ci * Because radix_tree_delete won't complain even if the item was never 91962306a36Sopenharmony_ci * added to the tree assert that it's been there before to catch 92062306a36Sopenharmony_ci * problems with the inode life time early on. 92162306a36Sopenharmony_ci */ 92262306a36Sopenharmony_ci spin_lock(&pag->pag_ici_lock); 92362306a36Sopenharmony_ci if (!radix_tree_delete(&pag->pag_ici_root, 92462306a36Sopenharmony_ci XFS_INO_TO_AGINO(ip->i_mount, ino))) 92562306a36Sopenharmony_ci ASSERT(0); 92662306a36Sopenharmony_ci xfs_perag_clear_inode_tag(pag, NULLAGINO, XFS_ICI_RECLAIM_TAG); 92762306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci /* 93062306a36Sopenharmony_ci * Here we do an (almost) spurious inode lock in order to coordinate 93162306a36Sopenharmony_ci * with inode cache radix tree lookups. This is because the lookup 93262306a36Sopenharmony_ci * can reference the inodes in the cache without taking references. 93362306a36Sopenharmony_ci * 93462306a36Sopenharmony_ci * We make that OK here by ensuring that we wait until the inode is 93562306a36Sopenharmony_ci * unlocked after the lookup before we go ahead and free it. 93662306a36Sopenharmony_ci */ 93762306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_EXCL); 93862306a36Sopenharmony_ci ASSERT(!ip->i_udquot && !ip->i_gdquot && !ip->i_pdquot); 93962306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 94062306a36Sopenharmony_ci ASSERT(xfs_inode_clean(ip)); 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci __xfs_inode_free(ip); 94362306a36Sopenharmony_ci return; 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ciout_clear_flush: 94662306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IFLUSHING); 94762306a36Sopenharmony_ciout_iunlock: 94862306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 94962306a36Sopenharmony_ciout: 95062306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IRECLAIM); 95162306a36Sopenharmony_ci} 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci/* Reclaim sick inodes if we're unmounting or the fs went down. */ 95462306a36Sopenharmony_cistatic inline bool 95562306a36Sopenharmony_cixfs_want_reclaim_sick( 95662306a36Sopenharmony_ci struct xfs_mount *mp) 95762306a36Sopenharmony_ci{ 95862306a36Sopenharmony_ci return xfs_is_unmounting(mp) || xfs_has_norecovery(mp) || 95962306a36Sopenharmony_ci xfs_is_shutdown(mp); 96062306a36Sopenharmony_ci} 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_civoid 96362306a36Sopenharmony_cixfs_reclaim_inodes( 96462306a36Sopenharmony_ci struct xfs_mount *mp) 96562306a36Sopenharmony_ci{ 96662306a36Sopenharmony_ci struct xfs_icwalk icw = { 96762306a36Sopenharmony_ci .icw_flags = 0, 96862306a36Sopenharmony_ci }; 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci if (xfs_want_reclaim_sick(mp)) 97162306a36Sopenharmony_ci icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 97462306a36Sopenharmony_ci xfs_ail_push_all_sync(mp->m_ail); 97562306a36Sopenharmony_ci xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw); 97662306a36Sopenharmony_ci } 97762306a36Sopenharmony_ci} 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci/* 98062306a36Sopenharmony_ci * The shrinker infrastructure determines how many inodes we should scan for 98162306a36Sopenharmony_ci * reclaim. We want as many clean inodes ready to reclaim as possible, so we 98262306a36Sopenharmony_ci * push the AIL here. We also want to proactively free up memory if we can to 98362306a36Sopenharmony_ci * minimise the amount of work memory reclaim has to do so we kick the 98462306a36Sopenharmony_ci * background reclaim if it isn't already scheduled. 98562306a36Sopenharmony_ci */ 98662306a36Sopenharmony_cilong 98762306a36Sopenharmony_cixfs_reclaim_inodes_nr( 98862306a36Sopenharmony_ci struct xfs_mount *mp, 98962306a36Sopenharmony_ci unsigned long nr_to_scan) 99062306a36Sopenharmony_ci{ 99162306a36Sopenharmony_ci struct xfs_icwalk icw = { 99262306a36Sopenharmony_ci .icw_flags = XFS_ICWALK_FLAG_SCAN_LIMIT, 99362306a36Sopenharmony_ci .icw_scan_limit = min_t(unsigned long, LONG_MAX, nr_to_scan), 99462306a36Sopenharmony_ci }; 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci if (xfs_want_reclaim_sick(mp)) 99762306a36Sopenharmony_ci icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci /* kick background reclaimer and push the AIL */ 100062306a36Sopenharmony_ci xfs_reclaim_work_queue(mp); 100162306a36Sopenharmony_ci xfs_ail_push_all(mp->m_ail); 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw); 100462306a36Sopenharmony_ci return 0; 100562306a36Sopenharmony_ci} 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci/* 100862306a36Sopenharmony_ci * Return the number of reclaimable inodes in the filesystem for 100962306a36Sopenharmony_ci * the shrinker to determine how much to reclaim. 101062306a36Sopenharmony_ci */ 101162306a36Sopenharmony_cilong 101262306a36Sopenharmony_cixfs_reclaim_inodes_count( 101362306a36Sopenharmony_ci struct xfs_mount *mp) 101462306a36Sopenharmony_ci{ 101562306a36Sopenharmony_ci struct xfs_perag *pag; 101662306a36Sopenharmony_ci xfs_agnumber_t ag = 0; 101762306a36Sopenharmony_ci long reclaimable = 0; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 102062306a36Sopenharmony_ci ag = pag->pag_agno + 1; 102162306a36Sopenharmony_ci reclaimable += pag->pag_ici_reclaimable; 102262306a36Sopenharmony_ci xfs_perag_put(pag); 102362306a36Sopenharmony_ci } 102462306a36Sopenharmony_ci return reclaimable; 102562306a36Sopenharmony_ci} 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ciSTATIC bool 102862306a36Sopenharmony_cixfs_icwalk_match_id( 102962306a36Sopenharmony_ci struct xfs_inode *ip, 103062306a36Sopenharmony_ci struct xfs_icwalk *icw) 103162306a36Sopenharmony_ci{ 103262306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_UID) && 103362306a36Sopenharmony_ci !uid_eq(VFS_I(ip)->i_uid, icw->icw_uid)) 103462306a36Sopenharmony_ci return false; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_GID) && 103762306a36Sopenharmony_ci !gid_eq(VFS_I(ip)->i_gid, icw->icw_gid)) 103862306a36Sopenharmony_ci return false; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_PRID) && 104162306a36Sopenharmony_ci ip->i_projid != icw->icw_prid) 104262306a36Sopenharmony_ci return false; 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci return true; 104562306a36Sopenharmony_ci} 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci/* 104862306a36Sopenharmony_ci * A union-based inode filtering algorithm. Process the inode if any of the 104962306a36Sopenharmony_ci * criteria match. This is for global/internal scans only. 105062306a36Sopenharmony_ci */ 105162306a36Sopenharmony_ciSTATIC bool 105262306a36Sopenharmony_cixfs_icwalk_match_id_union( 105362306a36Sopenharmony_ci struct xfs_inode *ip, 105462306a36Sopenharmony_ci struct xfs_icwalk *icw) 105562306a36Sopenharmony_ci{ 105662306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_UID) && 105762306a36Sopenharmony_ci uid_eq(VFS_I(ip)->i_uid, icw->icw_uid)) 105862306a36Sopenharmony_ci return true; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_GID) && 106162306a36Sopenharmony_ci gid_eq(VFS_I(ip)->i_gid, icw->icw_gid)) 106262306a36Sopenharmony_ci return true; 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_PRID) && 106562306a36Sopenharmony_ci ip->i_projid == icw->icw_prid) 106662306a36Sopenharmony_ci return true; 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci return false; 106962306a36Sopenharmony_ci} 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci/* 107262306a36Sopenharmony_ci * Is this inode @ip eligible for eof/cow block reclamation, given some 107362306a36Sopenharmony_ci * filtering parameters @icw? The inode is eligible if @icw is null or 107462306a36Sopenharmony_ci * if the predicate functions match. 107562306a36Sopenharmony_ci */ 107662306a36Sopenharmony_cistatic bool 107762306a36Sopenharmony_cixfs_icwalk_match( 107862306a36Sopenharmony_ci struct xfs_inode *ip, 107962306a36Sopenharmony_ci struct xfs_icwalk *icw) 108062306a36Sopenharmony_ci{ 108162306a36Sopenharmony_ci bool match; 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci if (!icw) 108462306a36Sopenharmony_ci return true; 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci if (icw->icw_flags & XFS_ICWALK_FLAG_UNION) 108762306a36Sopenharmony_ci match = xfs_icwalk_match_id_union(ip, icw); 108862306a36Sopenharmony_ci else 108962306a36Sopenharmony_ci match = xfs_icwalk_match_id(ip, icw); 109062306a36Sopenharmony_ci if (!match) 109162306a36Sopenharmony_ci return false; 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci /* skip the inode if the file size is too small */ 109462306a36Sopenharmony_ci if ((icw->icw_flags & XFS_ICWALK_FLAG_MINFILESIZE) && 109562306a36Sopenharmony_ci XFS_ISIZE(ip) < icw->icw_min_file_size) 109662306a36Sopenharmony_ci return false; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci return true; 109962306a36Sopenharmony_ci} 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci/* 110262306a36Sopenharmony_ci * This is a fast pass over the inode cache to try to get reclaim moving on as 110362306a36Sopenharmony_ci * many inodes as possible in a short period of time. It kicks itself every few 110462306a36Sopenharmony_ci * seconds, as well as being kicked by the inode cache shrinker when memory 110562306a36Sopenharmony_ci * goes low. 110662306a36Sopenharmony_ci */ 110762306a36Sopenharmony_civoid 110862306a36Sopenharmony_cixfs_reclaim_worker( 110962306a36Sopenharmony_ci struct work_struct *work) 111062306a36Sopenharmony_ci{ 111162306a36Sopenharmony_ci struct xfs_mount *mp = container_of(to_delayed_work(work), 111262306a36Sopenharmony_ci struct xfs_mount, m_reclaim_work); 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL); 111562306a36Sopenharmony_ci xfs_reclaim_work_queue(mp); 111662306a36Sopenharmony_ci} 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ciSTATIC int 111962306a36Sopenharmony_cixfs_inode_free_eofblocks( 112062306a36Sopenharmony_ci struct xfs_inode *ip, 112162306a36Sopenharmony_ci struct xfs_icwalk *icw, 112262306a36Sopenharmony_ci unsigned int *lockflags) 112362306a36Sopenharmony_ci{ 112462306a36Sopenharmony_ci bool wait; 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci wait = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC); 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci if (!xfs_iflags_test(ip, XFS_IEOFBLOCKS)) 112962306a36Sopenharmony_ci return 0; 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci /* 113262306a36Sopenharmony_ci * If the mapping is dirty the operation can block and wait for some 113362306a36Sopenharmony_ci * time. Unless we are waiting, skip it. 113462306a36Sopenharmony_ci */ 113562306a36Sopenharmony_ci if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 113662306a36Sopenharmony_ci return 0; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci if (!xfs_icwalk_match(ip, icw)) 113962306a36Sopenharmony_ci return 0; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci /* 114262306a36Sopenharmony_ci * If the caller is waiting, return -EAGAIN to keep the background 114362306a36Sopenharmony_ci * scanner moving and revisit the inode in a subsequent pass. 114462306a36Sopenharmony_ci */ 114562306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 114662306a36Sopenharmony_ci if (wait) 114762306a36Sopenharmony_ci return -EAGAIN; 114862306a36Sopenharmony_ci return 0; 114962306a36Sopenharmony_ci } 115062306a36Sopenharmony_ci *lockflags |= XFS_IOLOCK_EXCL; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci if (xfs_can_free_eofblocks(ip, false)) 115362306a36Sopenharmony_ci return xfs_free_eofblocks(ip); 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci /* inode could be preallocated or append-only */ 115662306a36Sopenharmony_ci trace_xfs_inode_free_eofblocks_invalid(ip); 115762306a36Sopenharmony_ci xfs_inode_clear_eofblocks_tag(ip); 115862306a36Sopenharmony_ci return 0; 115962306a36Sopenharmony_ci} 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_cistatic void 116262306a36Sopenharmony_cixfs_blockgc_set_iflag( 116362306a36Sopenharmony_ci struct xfs_inode *ip, 116462306a36Sopenharmony_ci unsigned long iflag) 116562306a36Sopenharmony_ci{ 116662306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 116762306a36Sopenharmony_ci struct xfs_perag *pag; 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci /* 117262306a36Sopenharmony_ci * Don't bother locking the AG and looking up in the radix trees 117362306a36Sopenharmony_ci * if we already know that we have the tag set. 117462306a36Sopenharmony_ci */ 117562306a36Sopenharmony_ci if (ip->i_flags & iflag) 117662306a36Sopenharmony_ci return; 117762306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 117862306a36Sopenharmony_ci ip->i_flags |= iflag; 117962306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 118262306a36Sopenharmony_ci spin_lock(&pag->pag_ici_lock); 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 118562306a36Sopenharmony_ci XFS_ICI_BLOCKGC_TAG); 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 118862306a36Sopenharmony_ci xfs_perag_put(pag); 118962306a36Sopenharmony_ci} 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_civoid 119262306a36Sopenharmony_cixfs_inode_set_eofblocks_tag( 119362306a36Sopenharmony_ci xfs_inode_t *ip) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci trace_xfs_inode_set_eofblocks_tag(ip); 119662306a36Sopenharmony_ci return xfs_blockgc_set_iflag(ip, XFS_IEOFBLOCKS); 119762306a36Sopenharmony_ci} 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_cistatic void 120062306a36Sopenharmony_cixfs_blockgc_clear_iflag( 120162306a36Sopenharmony_ci struct xfs_inode *ip, 120262306a36Sopenharmony_ci unsigned long iflag) 120362306a36Sopenharmony_ci{ 120462306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 120562306a36Sopenharmony_ci struct xfs_perag *pag; 120662306a36Sopenharmony_ci bool clear_tag; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 121162306a36Sopenharmony_ci ip->i_flags &= ~iflag; 121262306a36Sopenharmony_ci clear_tag = (ip->i_flags & (XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0; 121362306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci if (!clear_tag) 121662306a36Sopenharmony_ci return; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 121962306a36Sopenharmony_ci spin_lock(&pag->pag_ici_lock); 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 122262306a36Sopenharmony_ci XFS_ICI_BLOCKGC_TAG); 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 122562306a36Sopenharmony_ci xfs_perag_put(pag); 122662306a36Sopenharmony_ci} 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_civoid 122962306a36Sopenharmony_cixfs_inode_clear_eofblocks_tag( 123062306a36Sopenharmony_ci xfs_inode_t *ip) 123162306a36Sopenharmony_ci{ 123262306a36Sopenharmony_ci trace_xfs_inode_clear_eofblocks_tag(ip); 123362306a36Sopenharmony_ci return xfs_blockgc_clear_iflag(ip, XFS_IEOFBLOCKS); 123462306a36Sopenharmony_ci} 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci/* 123762306a36Sopenharmony_ci * Set ourselves up to free CoW blocks from this file. If it's already clean 123862306a36Sopenharmony_ci * then we can bail out quickly, but otherwise we must back off if the file 123962306a36Sopenharmony_ci * is undergoing some kind of write. 124062306a36Sopenharmony_ci */ 124162306a36Sopenharmony_cistatic bool 124262306a36Sopenharmony_cixfs_prep_free_cowblocks( 124362306a36Sopenharmony_ci struct xfs_inode *ip) 124462306a36Sopenharmony_ci{ 124562306a36Sopenharmony_ci /* 124662306a36Sopenharmony_ci * Just clear the tag if we have an empty cow fork or none at all. It's 124762306a36Sopenharmony_ci * possible the inode was fully unshared since it was originally tagged. 124862306a36Sopenharmony_ci */ 124962306a36Sopenharmony_ci if (!xfs_inode_has_cow_data(ip)) { 125062306a36Sopenharmony_ci trace_xfs_inode_free_cowblocks_invalid(ip); 125162306a36Sopenharmony_ci xfs_inode_clear_cowblocks_tag(ip); 125262306a36Sopenharmony_ci return false; 125362306a36Sopenharmony_ci } 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci /* 125662306a36Sopenharmony_ci * If the mapping is dirty or under writeback we cannot touch the 125762306a36Sopenharmony_ci * CoW fork. Leave it alone if we're in the midst of a directio. 125862306a36Sopenharmony_ci */ 125962306a36Sopenharmony_ci if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 126062306a36Sopenharmony_ci mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 126162306a36Sopenharmony_ci mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 126262306a36Sopenharmony_ci atomic_read(&VFS_I(ip)->i_dio_count)) 126362306a36Sopenharmony_ci return false; 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci return true; 126662306a36Sopenharmony_ci} 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci/* 126962306a36Sopenharmony_ci * Automatic CoW Reservation Freeing 127062306a36Sopenharmony_ci * 127162306a36Sopenharmony_ci * These functions automatically garbage collect leftover CoW reservations 127262306a36Sopenharmony_ci * that were made on behalf of a cowextsize hint when we start to run out 127362306a36Sopenharmony_ci * of quota or when the reservations sit around for too long. If the file 127462306a36Sopenharmony_ci * has dirty pages or is undergoing writeback, its CoW reservations will 127562306a36Sopenharmony_ci * be retained. 127662306a36Sopenharmony_ci * 127762306a36Sopenharmony_ci * The actual garbage collection piggybacks off the same code that runs 127862306a36Sopenharmony_ci * the speculative EOF preallocation garbage collector. 127962306a36Sopenharmony_ci */ 128062306a36Sopenharmony_ciSTATIC int 128162306a36Sopenharmony_cixfs_inode_free_cowblocks( 128262306a36Sopenharmony_ci struct xfs_inode *ip, 128362306a36Sopenharmony_ci struct xfs_icwalk *icw, 128462306a36Sopenharmony_ci unsigned int *lockflags) 128562306a36Sopenharmony_ci{ 128662306a36Sopenharmony_ci bool wait; 128762306a36Sopenharmony_ci int ret = 0; 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci wait = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC); 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS)) 129262306a36Sopenharmony_ci return 0; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci if (!xfs_prep_free_cowblocks(ip)) 129562306a36Sopenharmony_ci return 0; 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci if (!xfs_icwalk_match(ip, icw)) 129862306a36Sopenharmony_ci return 0; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci /* 130162306a36Sopenharmony_ci * If the caller is waiting, return -EAGAIN to keep the background 130262306a36Sopenharmony_ci * scanner moving and revisit the inode in a subsequent pass. 130362306a36Sopenharmony_ci */ 130462306a36Sopenharmony_ci if (!(*lockflags & XFS_IOLOCK_EXCL) && 130562306a36Sopenharmony_ci !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 130662306a36Sopenharmony_ci if (wait) 130762306a36Sopenharmony_ci return -EAGAIN; 130862306a36Sopenharmony_ci return 0; 130962306a36Sopenharmony_ci } 131062306a36Sopenharmony_ci *lockflags |= XFS_IOLOCK_EXCL; 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_MMAPLOCK_EXCL)) { 131362306a36Sopenharmony_ci if (wait) 131462306a36Sopenharmony_ci return -EAGAIN; 131562306a36Sopenharmony_ci return 0; 131662306a36Sopenharmony_ci } 131762306a36Sopenharmony_ci *lockflags |= XFS_MMAPLOCK_EXCL; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci /* 132062306a36Sopenharmony_ci * Check again, nobody else should be able to dirty blocks or change 132162306a36Sopenharmony_ci * the reflink iflag now that we have the first two locks held. 132262306a36Sopenharmony_ci */ 132362306a36Sopenharmony_ci if (xfs_prep_free_cowblocks(ip)) 132462306a36Sopenharmony_ci ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 132562306a36Sopenharmony_ci return ret; 132662306a36Sopenharmony_ci} 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_civoid 132962306a36Sopenharmony_cixfs_inode_set_cowblocks_tag( 133062306a36Sopenharmony_ci xfs_inode_t *ip) 133162306a36Sopenharmony_ci{ 133262306a36Sopenharmony_ci trace_xfs_inode_set_cowblocks_tag(ip); 133362306a36Sopenharmony_ci return xfs_blockgc_set_iflag(ip, XFS_ICOWBLOCKS); 133462306a36Sopenharmony_ci} 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_civoid 133762306a36Sopenharmony_cixfs_inode_clear_cowblocks_tag( 133862306a36Sopenharmony_ci xfs_inode_t *ip) 133962306a36Sopenharmony_ci{ 134062306a36Sopenharmony_ci trace_xfs_inode_clear_cowblocks_tag(ip); 134162306a36Sopenharmony_ci return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS); 134262306a36Sopenharmony_ci} 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci/* Disable post-EOF and CoW block auto-reclamation. */ 134562306a36Sopenharmony_civoid 134662306a36Sopenharmony_cixfs_blockgc_stop( 134762306a36Sopenharmony_ci struct xfs_mount *mp) 134862306a36Sopenharmony_ci{ 134962306a36Sopenharmony_ci struct xfs_perag *pag; 135062306a36Sopenharmony_ci xfs_agnumber_t agno; 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci if (!xfs_clear_blockgc_enabled(mp)) 135362306a36Sopenharmony_ci return; 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci for_each_perag(mp, agno, pag) 135662306a36Sopenharmony_ci cancel_delayed_work_sync(&pag->pag_blockgc_work); 135762306a36Sopenharmony_ci trace_xfs_blockgc_stop(mp, __return_address); 135862306a36Sopenharmony_ci} 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci/* Enable post-EOF and CoW block auto-reclamation. */ 136162306a36Sopenharmony_civoid 136262306a36Sopenharmony_cixfs_blockgc_start( 136362306a36Sopenharmony_ci struct xfs_mount *mp) 136462306a36Sopenharmony_ci{ 136562306a36Sopenharmony_ci struct xfs_perag *pag; 136662306a36Sopenharmony_ci xfs_agnumber_t agno; 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci if (xfs_set_blockgc_enabled(mp)) 136962306a36Sopenharmony_ci return; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci trace_xfs_blockgc_start(mp, __return_address); 137262306a36Sopenharmony_ci for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 137362306a36Sopenharmony_ci xfs_blockgc_queue(pag); 137462306a36Sopenharmony_ci} 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci/* Don't try to run block gc on an inode that's in any of these states. */ 137762306a36Sopenharmony_ci#define XFS_BLOCKGC_NOGRAB_IFLAGS (XFS_INEW | \ 137862306a36Sopenharmony_ci XFS_NEED_INACTIVE | \ 137962306a36Sopenharmony_ci XFS_INACTIVATING | \ 138062306a36Sopenharmony_ci XFS_IRECLAIMABLE | \ 138162306a36Sopenharmony_ci XFS_IRECLAIM) 138262306a36Sopenharmony_ci/* 138362306a36Sopenharmony_ci * Decide if the given @ip is eligible for garbage collection of speculative 138462306a36Sopenharmony_ci * preallocations, and grab it if so. Returns true if it's ready to go or 138562306a36Sopenharmony_ci * false if we should just ignore it. 138662306a36Sopenharmony_ci */ 138762306a36Sopenharmony_cistatic bool 138862306a36Sopenharmony_cixfs_blockgc_igrab( 138962306a36Sopenharmony_ci struct xfs_inode *ip) 139062306a36Sopenharmony_ci{ 139162306a36Sopenharmony_ci struct inode *inode = VFS_I(ip); 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci ASSERT(rcu_read_lock_held()); 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci /* Check for stale RCU freed inode */ 139662306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 139762306a36Sopenharmony_ci if (!ip->i_ino) 139862306a36Sopenharmony_ci goto out_unlock_noent; 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci if (ip->i_flags & XFS_BLOCKGC_NOGRAB_IFLAGS) 140162306a36Sopenharmony_ci goto out_unlock_noent; 140262306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci /* nothing to sync during shutdown */ 140562306a36Sopenharmony_ci if (xfs_is_shutdown(ip->i_mount)) 140662306a36Sopenharmony_ci return false; 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci /* If we can't grab the inode, it must on it's way to reclaim. */ 140962306a36Sopenharmony_ci if (!igrab(inode)) 141062306a36Sopenharmony_ci return false; 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci /* inode is valid */ 141362306a36Sopenharmony_ci return true; 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ciout_unlock_noent: 141662306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 141762306a36Sopenharmony_ci return false; 141862306a36Sopenharmony_ci} 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci/* Scan one incore inode for block preallocations that we can remove. */ 142162306a36Sopenharmony_cistatic int 142262306a36Sopenharmony_cixfs_blockgc_scan_inode( 142362306a36Sopenharmony_ci struct xfs_inode *ip, 142462306a36Sopenharmony_ci struct xfs_icwalk *icw) 142562306a36Sopenharmony_ci{ 142662306a36Sopenharmony_ci unsigned int lockflags = 0; 142762306a36Sopenharmony_ci int error; 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci error = xfs_inode_free_eofblocks(ip, icw, &lockflags); 143062306a36Sopenharmony_ci if (error) 143162306a36Sopenharmony_ci goto unlock; 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci error = xfs_inode_free_cowblocks(ip, icw, &lockflags); 143462306a36Sopenharmony_ciunlock: 143562306a36Sopenharmony_ci if (lockflags) 143662306a36Sopenharmony_ci xfs_iunlock(ip, lockflags); 143762306a36Sopenharmony_ci xfs_irele(ip); 143862306a36Sopenharmony_ci return error; 143962306a36Sopenharmony_ci} 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci/* Background worker that trims preallocated space. */ 144262306a36Sopenharmony_civoid 144362306a36Sopenharmony_cixfs_blockgc_worker( 144462306a36Sopenharmony_ci struct work_struct *work) 144562306a36Sopenharmony_ci{ 144662306a36Sopenharmony_ci struct xfs_perag *pag = container_of(to_delayed_work(work), 144762306a36Sopenharmony_ci struct xfs_perag, pag_blockgc_work); 144862306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 144962306a36Sopenharmony_ci int error; 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci trace_xfs_blockgc_worker(mp, __return_address); 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL); 145462306a36Sopenharmony_ci if (error) 145562306a36Sopenharmony_ci xfs_info(mp, "AG %u preallocation gc worker failed, err=%d", 145662306a36Sopenharmony_ci pag->pag_agno, error); 145762306a36Sopenharmony_ci xfs_blockgc_queue(pag); 145862306a36Sopenharmony_ci} 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci/* 146162306a36Sopenharmony_ci * Try to free space in the filesystem by purging inactive inodes, eofblocks 146262306a36Sopenharmony_ci * and cowblocks. 146362306a36Sopenharmony_ci */ 146462306a36Sopenharmony_ciint 146562306a36Sopenharmony_cixfs_blockgc_free_space( 146662306a36Sopenharmony_ci struct xfs_mount *mp, 146762306a36Sopenharmony_ci struct xfs_icwalk *icw) 146862306a36Sopenharmony_ci{ 146962306a36Sopenharmony_ci int error; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci trace_xfs_blockgc_free_space(mp, icw, _RET_IP_); 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci error = xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw); 147462306a36Sopenharmony_ci if (error) 147562306a36Sopenharmony_ci return error; 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci return xfs_inodegc_flush(mp); 147862306a36Sopenharmony_ci} 147962306a36Sopenharmony_ci 148062306a36Sopenharmony_ci/* 148162306a36Sopenharmony_ci * Reclaim all the free space that we can by scheduling the background blockgc 148262306a36Sopenharmony_ci * and inodegc workers immediately and waiting for them all to clear. 148362306a36Sopenharmony_ci */ 148462306a36Sopenharmony_ciint 148562306a36Sopenharmony_cixfs_blockgc_flush_all( 148662306a36Sopenharmony_ci struct xfs_mount *mp) 148762306a36Sopenharmony_ci{ 148862306a36Sopenharmony_ci struct xfs_perag *pag; 148962306a36Sopenharmony_ci xfs_agnumber_t agno; 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci trace_xfs_blockgc_flush_all(mp, __return_address); 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci /* 149462306a36Sopenharmony_ci * For each blockgc worker, move its queue time up to now. If it 149562306a36Sopenharmony_ci * wasn't queued, it will not be requeued. Then flush whatever's 149662306a36Sopenharmony_ci * left. 149762306a36Sopenharmony_ci */ 149862306a36Sopenharmony_ci for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 149962306a36Sopenharmony_ci mod_delayed_work(pag->pag_mount->m_blockgc_wq, 150062306a36Sopenharmony_ci &pag->pag_blockgc_work, 0); 150162306a36Sopenharmony_ci 150262306a36Sopenharmony_ci for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 150362306a36Sopenharmony_ci flush_delayed_work(&pag->pag_blockgc_work); 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci return xfs_inodegc_flush(mp); 150662306a36Sopenharmony_ci} 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci/* 150962306a36Sopenharmony_ci * Run cow/eofblocks scans on the supplied dquots. We don't know exactly which 151062306a36Sopenharmony_ci * quota caused an allocation failure, so we make a best effort by including 151162306a36Sopenharmony_ci * each quota under low free space conditions (less than 1% free space) in the 151262306a36Sopenharmony_ci * scan. 151362306a36Sopenharmony_ci * 151462306a36Sopenharmony_ci * Callers must not hold any inode's ILOCK. If requesting a synchronous scan 151562306a36Sopenharmony_ci * (XFS_ICWALK_FLAG_SYNC), the caller also must not hold any inode's IOLOCK or 151662306a36Sopenharmony_ci * MMAPLOCK. 151762306a36Sopenharmony_ci */ 151862306a36Sopenharmony_ciint 151962306a36Sopenharmony_cixfs_blockgc_free_dquots( 152062306a36Sopenharmony_ci struct xfs_mount *mp, 152162306a36Sopenharmony_ci struct xfs_dquot *udqp, 152262306a36Sopenharmony_ci struct xfs_dquot *gdqp, 152362306a36Sopenharmony_ci struct xfs_dquot *pdqp, 152462306a36Sopenharmony_ci unsigned int iwalk_flags) 152562306a36Sopenharmony_ci{ 152662306a36Sopenharmony_ci struct xfs_icwalk icw = {0}; 152762306a36Sopenharmony_ci bool do_work = false; 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci if (!udqp && !gdqp && !pdqp) 153062306a36Sopenharmony_ci return 0; 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci /* 153362306a36Sopenharmony_ci * Run a scan to free blocks using the union filter to cover all 153462306a36Sopenharmony_ci * applicable quotas in a single scan. 153562306a36Sopenharmony_ci */ 153662306a36Sopenharmony_ci icw.icw_flags = XFS_ICWALK_FLAG_UNION | iwalk_flags; 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_ci if (XFS_IS_UQUOTA_ENFORCED(mp) && udqp && xfs_dquot_lowsp(udqp)) { 153962306a36Sopenharmony_ci icw.icw_uid = make_kuid(mp->m_super->s_user_ns, udqp->q_id); 154062306a36Sopenharmony_ci icw.icw_flags |= XFS_ICWALK_FLAG_UID; 154162306a36Sopenharmony_ci do_work = true; 154262306a36Sopenharmony_ci } 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci if (XFS_IS_UQUOTA_ENFORCED(mp) && gdqp && xfs_dquot_lowsp(gdqp)) { 154562306a36Sopenharmony_ci icw.icw_gid = make_kgid(mp->m_super->s_user_ns, gdqp->q_id); 154662306a36Sopenharmony_ci icw.icw_flags |= XFS_ICWALK_FLAG_GID; 154762306a36Sopenharmony_ci do_work = true; 154862306a36Sopenharmony_ci } 154962306a36Sopenharmony_ci 155062306a36Sopenharmony_ci if (XFS_IS_PQUOTA_ENFORCED(mp) && pdqp && xfs_dquot_lowsp(pdqp)) { 155162306a36Sopenharmony_ci icw.icw_prid = pdqp->q_id; 155262306a36Sopenharmony_ci icw.icw_flags |= XFS_ICWALK_FLAG_PRID; 155362306a36Sopenharmony_ci do_work = true; 155462306a36Sopenharmony_ci } 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci if (!do_work) 155762306a36Sopenharmony_ci return 0; 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci return xfs_blockgc_free_space(mp, &icw); 156062306a36Sopenharmony_ci} 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci/* Run cow/eofblocks scans on the quotas attached to the inode. */ 156362306a36Sopenharmony_ciint 156462306a36Sopenharmony_cixfs_blockgc_free_quota( 156562306a36Sopenharmony_ci struct xfs_inode *ip, 156662306a36Sopenharmony_ci unsigned int iwalk_flags) 156762306a36Sopenharmony_ci{ 156862306a36Sopenharmony_ci return xfs_blockgc_free_dquots(ip->i_mount, 156962306a36Sopenharmony_ci xfs_inode_dquot(ip, XFS_DQTYPE_USER), 157062306a36Sopenharmony_ci xfs_inode_dquot(ip, XFS_DQTYPE_GROUP), 157162306a36Sopenharmony_ci xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), iwalk_flags); 157262306a36Sopenharmony_ci} 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci/* XFS Inode Cache Walking Code */ 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci/* 157762306a36Sopenharmony_ci * The inode lookup is done in batches to keep the amount of lock traffic and 157862306a36Sopenharmony_ci * radix tree lookups to a minimum. The batch size is a trade off between 157962306a36Sopenharmony_ci * lookup reduction and stack usage. This is in the reclaim path, so we can't 158062306a36Sopenharmony_ci * be too greedy. 158162306a36Sopenharmony_ci */ 158262306a36Sopenharmony_ci#define XFS_LOOKUP_BATCH 32 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci/* 158662306a36Sopenharmony_ci * Decide if we want to grab this inode in anticipation of doing work towards 158762306a36Sopenharmony_ci * the goal. 158862306a36Sopenharmony_ci */ 158962306a36Sopenharmony_cistatic inline bool 159062306a36Sopenharmony_cixfs_icwalk_igrab( 159162306a36Sopenharmony_ci enum xfs_icwalk_goal goal, 159262306a36Sopenharmony_ci struct xfs_inode *ip, 159362306a36Sopenharmony_ci struct xfs_icwalk *icw) 159462306a36Sopenharmony_ci{ 159562306a36Sopenharmony_ci switch (goal) { 159662306a36Sopenharmony_ci case XFS_ICWALK_BLOCKGC: 159762306a36Sopenharmony_ci return xfs_blockgc_igrab(ip); 159862306a36Sopenharmony_ci case XFS_ICWALK_RECLAIM: 159962306a36Sopenharmony_ci return xfs_reclaim_igrab(ip, icw); 160062306a36Sopenharmony_ci default: 160162306a36Sopenharmony_ci return false; 160262306a36Sopenharmony_ci } 160362306a36Sopenharmony_ci} 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci/* 160662306a36Sopenharmony_ci * Process an inode. Each processing function must handle any state changes 160762306a36Sopenharmony_ci * made by the icwalk igrab function. Return -EAGAIN to skip an inode. 160862306a36Sopenharmony_ci */ 160962306a36Sopenharmony_cistatic inline int 161062306a36Sopenharmony_cixfs_icwalk_process_inode( 161162306a36Sopenharmony_ci enum xfs_icwalk_goal goal, 161262306a36Sopenharmony_ci struct xfs_inode *ip, 161362306a36Sopenharmony_ci struct xfs_perag *pag, 161462306a36Sopenharmony_ci struct xfs_icwalk *icw) 161562306a36Sopenharmony_ci{ 161662306a36Sopenharmony_ci int error = 0; 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci switch (goal) { 161962306a36Sopenharmony_ci case XFS_ICWALK_BLOCKGC: 162062306a36Sopenharmony_ci error = xfs_blockgc_scan_inode(ip, icw); 162162306a36Sopenharmony_ci break; 162262306a36Sopenharmony_ci case XFS_ICWALK_RECLAIM: 162362306a36Sopenharmony_ci xfs_reclaim_inode(ip, pag); 162462306a36Sopenharmony_ci break; 162562306a36Sopenharmony_ci } 162662306a36Sopenharmony_ci return error; 162762306a36Sopenharmony_ci} 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci/* 163062306a36Sopenharmony_ci * For a given per-AG structure @pag and a goal, grab qualifying inodes and 163162306a36Sopenharmony_ci * process them in some manner. 163262306a36Sopenharmony_ci */ 163362306a36Sopenharmony_cistatic int 163462306a36Sopenharmony_cixfs_icwalk_ag( 163562306a36Sopenharmony_ci struct xfs_perag *pag, 163662306a36Sopenharmony_ci enum xfs_icwalk_goal goal, 163762306a36Sopenharmony_ci struct xfs_icwalk *icw) 163862306a36Sopenharmony_ci{ 163962306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 164062306a36Sopenharmony_ci uint32_t first_index; 164162306a36Sopenharmony_ci int last_error = 0; 164262306a36Sopenharmony_ci int skipped; 164362306a36Sopenharmony_ci bool done; 164462306a36Sopenharmony_ci int nr_found; 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_cirestart: 164762306a36Sopenharmony_ci done = false; 164862306a36Sopenharmony_ci skipped = 0; 164962306a36Sopenharmony_ci if (goal == XFS_ICWALK_RECLAIM) 165062306a36Sopenharmony_ci first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); 165162306a36Sopenharmony_ci else 165262306a36Sopenharmony_ci first_index = 0; 165362306a36Sopenharmony_ci nr_found = 0; 165462306a36Sopenharmony_ci do { 165562306a36Sopenharmony_ci struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 165662306a36Sopenharmony_ci int error = 0; 165762306a36Sopenharmony_ci int i; 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci rcu_read_lock(); 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, 166262306a36Sopenharmony_ci (void **) batch, first_index, 166362306a36Sopenharmony_ci XFS_LOOKUP_BATCH, goal); 166462306a36Sopenharmony_ci if (!nr_found) { 166562306a36Sopenharmony_ci done = true; 166662306a36Sopenharmony_ci rcu_read_unlock(); 166762306a36Sopenharmony_ci break; 166862306a36Sopenharmony_ci } 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ci /* 167162306a36Sopenharmony_ci * Grab the inodes before we drop the lock. if we found 167262306a36Sopenharmony_ci * nothing, nr == 0 and the loop will be skipped. 167362306a36Sopenharmony_ci */ 167462306a36Sopenharmony_ci for (i = 0; i < nr_found; i++) { 167562306a36Sopenharmony_ci struct xfs_inode *ip = batch[i]; 167662306a36Sopenharmony_ci 167762306a36Sopenharmony_ci if (done || !xfs_icwalk_igrab(goal, ip, icw)) 167862306a36Sopenharmony_ci batch[i] = NULL; 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci /* 168162306a36Sopenharmony_ci * Update the index for the next lookup. Catch 168262306a36Sopenharmony_ci * overflows into the next AG range which can occur if 168362306a36Sopenharmony_ci * we have inodes in the last block of the AG and we 168462306a36Sopenharmony_ci * are currently pointing to the last inode. 168562306a36Sopenharmony_ci * 168662306a36Sopenharmony_ci * Because we may see inodes that are from the wrong AG 168762306a36Sopenharmony_ci * due to RCU freeing and reallocation, only update the 168862306a36Sopenharmony_ci * index if it lies in this AG. It was a race that lead 168962306a36Sopenharmony_ci * us to see this inode, so another lookup from the 169062306a36Sopenharmony_ci * same index will not find it again. 169162306a36Sopenharmony_ci */ 169262306a36Sopenharmony_ci if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) 169362306a36Sopenharmony_ci continue; 169462306a36Sopenharmony_ci first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 169562306a36Sopenharmony_ci if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 169662306a36Sopenharmony_ci done = true; 169762306a36Sopenharmony_ci } 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci /* unlock now we've grabbed the inodes. */ 170062306a36Sopenharmony_ci rcu_read_unlock(); 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci for (i = 0; i < nr_found; i++) { 170362306a36Sopenharmony_ci if (!batch[i]) 170462306a36Sopenharmony_ci continue; 170562306a36Sopenharmony_ci error = xfs_icwalk_process_inode(goal, batch[i], pag, 170662306a36Sopenharmony_ci icw); 170762306a36Sopenharmony_ci if (error == -EAGAIN) { 170862306a36Sopenharmony_ci skipped++; 170962306a36Sopenharmony_ci continue; 171062306a36Sopenharmony_ci } 171162306a36Sopenharmony_ci if (error && last_error != -EFSCORRUPTED) 171262306a36Sopenharmony_ci last_error = error; 171362306a36Sopenharmony_ci } 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci /* bail out if the filesystem is corrupted. */ 171662306a36Sopenharmony_ci if (error == -EFSCORRUPTED) 171762306a36Sopenharmony_ci break; 171862306a36Sopenharmony_ci 171962306a36Sopenharmony_ci cond_resched(); 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_ci if (icw && (icw->icw_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) { 172262306a36Sopenharmony_ci icw->icw_scan_limit -= XFS_LOOKUP_BATCH; 172362306a36Sopenharmony_ci if (icw->icw_scan_limit <= 0) 172462306a36Sopenharmony_ci break; 172562306a36Sopenharmony_ci } 172662306a36Sopenharmony_ci } while (nr_found && !done); 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci if (goal == XFS_ICWALK_RECLAIM) { 172962306a36Sopenharmony_ci if (done) 173062306a36Sopenharmony_ci first_index = 0; 173162306a36Sopenharmony_ci WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); 173262306a36Sopenharmony_ci } 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci if (skipped) { 173562306a36Sopenharmony_ci delay(1); 173662306a36Sopenharmony_ci goto restart; 173762306a36Sopenharmony_ci } 173862306a36Sopenharmony_ci return last_error; 173962306a36Sopenharmony_ci} 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci/* Walk all incore inodes to achieve a given goal. */ 174262306a36Sopenharmony_cistatic int 174362306a36Sopenharmony_cixfs_icwalk( 174462306a36Sopenharmony_ci struct xfs_mount *mp, 174562306a36Sopenharmony_ci enum xfs_icwalk_goal goal, 174662306a36Sopenharmony_ci struct xfs_icwalk *icw) 174762306a36Sopenharmony_ci{ 174862306a36Sopenharmony_ci struct xfs_perag *pag; 174962306a36Sopenharmony_ci int error = 0; 175062306a36Sopenharmony_ci int last_error = 0; 175162306a36Sopenharmony_ci xfs_agnumber_t agno; 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci for_each_perag_tag(mp, agno, pag, goal) { 175462306a36Sopenharmony_ci error = xfs_icwalk_ag(pag, goal, icw); 175562306a36Sopenharmony_ci if (error) { 175662306a36Sopenharmony_ci last_error = error; 175762306a36Sopenharmony_ci if (error == -EFSCORRUPTED) { 175862306a36Sopenharmony_ci xfs_perag_rele(pag); 175962306a36Sopenharmony_ci break; 176062306a36Sopenharmony_ci } 176162306a36Sopenharmony_ci } 176262306a36Sopenharmony_ci } 176362306a36Sopenharmony_ci return last_error; 176462306a36Sopenharmony_ci BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_ICWALK_FLAGS_VALID); 176562306a36Sopenharmony_ci} 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ci#ifdef DEBUG 176862306a36Sopenharmony_cistatic void 176962306a36Sopenharmony_cixfs_check_delalloc( 177062306a36Sopenharmony_ci struct xfs_inode *ip, 177162306a36Sopenharmony_ci int whichfork) 177262306a36Sopenharmony_ci{ 177362306a36Sopenharmony_ci struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 177462306a36Sopenharmony_ci struct xfs_bmbt_irec got; 177562306a36Sopenharmony_ci struct xfs_iext_cursor icur; 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) 177862306a36Sopenharmony_ci return; 177962306a36Sopenharmony_ci do { 178062306a36Sopenharmony_ci if (isnullstartblock(got.br_startblock)) { 178162306a36Sopenharmony_ci xfs_warn(ip->i_mount, 178262306a36Sopenharmony_ci "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", 178362306a36Sopenharmony_ci ip->i_ino, 178462306a36Sopenharmony_ci whichfork == XFS_DATA_FORK ? "data" : "cow", 178562306a36Sopenharmony_ci got.br_startoff, got.br_blockcount); 178662306a36Sopenharmony_ci } 178762306a36Sopenharmony_ci } while (xfs_iext_next_extent(ifp, &icur, &got)); 178862306a36Sopenharmony_ci} 178962306a36Sopenharmony_ci#else 179062306a36Sopenharmony_ci#define xfs_check_delalloc(ip, whichfork) do { } while (0) 179162306a36Sopenharmony_ci#endif 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci/* Schedule the inode for reclaim. */ 179462306a36Sopenharmony_cistatic void 179562306a36Sopenharmony_cixfs_inodegc_set_reclaimable( 179662306a36Sopenharmony_ci struct xfs_inode *ip) 179762306a36Sopenharmony_ci{ 179862306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 179962306a36Sopenharmony_ci struct xfs_perag *pag; 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci if (!xfs_is_shutdown(mp) && ip->i_delayed_blks) { 180262306a36Sopenharmony_ci xfs_check_delalloc(ip, XFS_DATA_FORK); 180362306a36Sopenharmony_ci xfs_check_delalloc(ip, XFS_COW_FORK); 180462306a36Sopenharmony_ci ASSERT(0); 180562306a36Sopenharmony_ci } 180662306a36Sopenharmony_ci 180762306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 180862306a36Sopenharmony_ci spin_lock(&pag->pag_ici_lock); 180962306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 181062306a36Sopenharmony_ci 181162306a36Sopenharmony_ci trace_xfs_inode_set_reclaimable(ip); 181262306a36Sopenharmony_ci ip->i_flags &= ~(XFS_NEED_INACTIVE | XFS_INACTIVATING); 181362306a36Sopenharmony_ci ip->i_flags |= XFS_IRECLAIMABLE; 181462306a36Sopenharmony_ci xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 181562306a36Sopenharmony_ci XFS_ICI_RECLAIM_TAG); 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 181862306a36Sopenharmony_ci spin_unlock(&pag->pag_ici_lock); 181962306a36Sopenharmony_ci xfs_perag_put(pag); 182062306a36Sopenharmony_ci} 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ci/* 182362306a36Sopenharmony_ci * Free all speculative preallocations and possibly even the inode itself. 182462306a36Sopenharmony_ci * This is the last chance to make changes to an otherwise unreferenced file 182562306a36Sopenharmony_ci * before incore reclamation happens. 182662306a36Sopenharmony_ci */ 182762306a36Sopenharmony_cistatic int 182862306a36Sopenharmony_cixfs_inodegc_inactivate( 182962306a36Sopenharmony_ci struct xfs_inode *ip) 183062306a36Sopenharmony_ci{ 183162306a36Sopenharmony_ci int error; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci trace_xfs_inode_inactivating(ip); 183462306a36Sopenharmony_ci error = xfs_inactive(ip); 183562306a36Sopenharmony_ci xfs_inodegc_set_reclaimable(ip); 183662306a36Sopenharmony_ci return error; 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_ci} 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_civoid 184162306a36Sopenharmony_cixfs_inodegc_worker( 184262306a36Sopenharmony_ci struct work_struct *work) 184362306a36Sopenharmony_ci{ 184462306a36Sopenharmony_ci struct xfs_inodegc *gc = container_of(to_delayed_work(work), 184562306a36Sopenharmony_ci struct xfs_inodegc, work); 184662306a36Sopenharmony_ci struct llist_node *node = llist_del_all(&gc->list); 184762306a36Sopenharmony_ci struct xfs_inode *ip, *n; 184862306a36Sopenharmony_ci struct xfs_mount *mp = gc->mp; 184962306a36Sopenharmony_ci unsigned int nofs_flag; 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci /* 185262306a36Sopenharmony_ci * Clear the cpu mask bit and ensure that we have seen the latest 185362306a36Sopenharmony_ci * update of the gc structure associated with this CPU. This matches 185462306a36Sopenharmony_ci * with the release semantics used when setting the cpumask bit in 185562306a36Sopenharmony_ci * xfs_inodegc_queue. 185662306a36Sopenharmony_ci */ 185762306a36Sopenharmony_ci cpumask_clear_cpu(gc->cpu, &mp->m_inodegc_cpumask); 185862306a36Sopenharmony_ci smp_mb__after_atomic(); 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci WRITE_ONCE(gc->items, 0); 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci if (!node) 186362306a36Sopenharmony_ci return; 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci /* 186662306a36Sopenharmony_ci * We can allocate memory here while doing writeback on behalf of 186762306a36Sopenharmony_ci * memory reclaim. To avoid memory allocation deadlocks set the 186862306a36Sopenharmony_ci * task-wide nofs context for the following operations. 186962306a36Sopenharmony_ci */ 187062306a36Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 187162306a36Sopenharmony_ci 187262306a36Sopenharmony_ci ip = llist_entry(node, struct xfs_inode, i_gclist); 187362306a36Sopenharmony_ci trace_xfs_inodegc_worker(mp, READ_ONCE(gc->shrinker_hits)); 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci WRITE_ONCE(gc->shrinker_hits, 0); 187662306a36Sopenharmony_ci llist_for_each_entry_safe(ip, n, node, i_gclist) { 187762306a36Sopenharmony_ci int error; 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci xfs_iflags_set(ip, XFS_INACTIVATING); 188062306a36Sopenharmony_ci error = xfs_inodegc_inactivate(ip); 188162306a36Sopenharmony_ci if (error && !gc->error) 188262306a36Sopenharmony_ci gc->error = error; 188362306a36Sopenharmony_ci } 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 188662306a36Sopenharmony_ci} 188762306a36Sopenharmony_ci 188862306a36Sopenharmony_ci/* 188962306a36Sopenharmony_ci * Expedite all pending inodegc work to run immediately. This does not wait for 189062306a36Sopenharmony_ci * completion of the work. 189162306a36Sopenharmony_ci */ 189262306a36Sopenharmony_civoid 189362306a36Sopenharmony_cixfs_inodegc_push( 189462306a36Sopenharmony_ci struct xfs_mount *mp) 189562306a36Sopenharmony_ci{ 189662306a36Sopenharmony_ci if (!xfs_is_inodegc_enabled(mp)) 189762306a36Sopenharmony_ci return; 189862306a36Sopenharmony_ci trace_xfs_inodegc_push(mp, __return_address); 189962306a36Sopenharmony_ci xfs_inodegc_queue_all(mp); 190062306a36Sopenharmony_ci} 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci/* 190362306a36Sopenharmony_ci * Force all currently queued inode inactivation work to run immediately and 190462306a36Sopenharmony_ci * wait for the work to finish. 190562306a36Sopenharmony_ci */ 190662306a36Sopenharmony_ciint 190762306a36Sopenharmony_cixfs_inodegc_flush( 190862306a36Sopenharmony_ci struct xfs_mount *mp) 190962306a36Sopenharmony_ci{ 191062306a36Sopenharmony_ci xfs_inodegc_push(mp); 191162306a36Sopenharmony_ci trace_xfs_inodegc_flush(mp, __return_address); 191262306a36Sopenharmony_ci return xfs_inodegc_wait_all(mp); 191362306a36Sopenharmony_ci} 191462306a36Sopenharmony_ci 191562306a36Sopenharmony_ci/* 191662306a36Sopenharmony_ci * Flush all the pending work and then disable the inode inactivation background 191762306a36Sopenharmony_ci * workers and wait for them to stop. Caller must hold sb->s_umount to 191862306a36Sopenharmony_ci * coordinate changes in the inodegc_enabled state. 191962306a36Sopenharmony_ci */ 192062306a36Sopenharmony_civoid 192162306a36Sopenharmony_cixfs_inodegc_stop( 192262306a36Sopenharmony_ci struct xfs_mount *mp) 192362306a36Sopenharmony_ci{ 192462306a36Sopenharmony_ci bool rerun; 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci if (!xfs_clear_inodegc_enabled(mp)) 192762306a36Sopenharmony_ci return; 192862306a36Sopenharmony_ci 192962306a36Sopenharmony_ci /* 193062306a36Sopenharmony_ci * Drain all pending inodegc work, including inodes that could be 193162306a36Sopenharmony_ci * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan 193262306a36Sopenharmony_ci * threads that sample the inodegc state just prior to us clearing it. 193362306a36Sopenharmony_ci * The inodegc flag state prevents new threads from queuing more 193462306a36Sopenharmony_ci * inodes, so we queue pending work items and flush the workqueue until 193562306a36Sopenharmony_ci * all inodegc lists are empty. IOWs, we cannot use drain_workqueue 193662306a36Sopenharmony_ci * here because it does not allow other unserialized mechanisms to 193762306a36Sopenharmony_ci * reschedule inodegc work while this draining is in progress. 193862306a36Sopenharmony_ci */ 193962306a36Sopenharmony_ci xfs_inodegc_queue_all(mp); 194062306a36Sopenharmony_ci do { 194162306a36Sopenharmony_ci flush_workqueue(mp->m_inodegc_wq); 194262306a36Sopenharmony_ci rerun = xfs_inodegc_queue_all(mp); 194362306a36Sopenharmony_ci } while (rerun); 194462306a36Sopenharmony_ci 194562306a36Sopenharmony_ci trace_xfs_inodegc_stop(mp, __return_address); 194662306a36Sopenharmony_ci} 194762306a36Sopenharmony_ci 194862306a36Sopenharmony_ci/* 194962306a36Sopenharmony_ci * Enable the inode inactivation background workers and schedule deferred inode 195062306a36Sopenharmony_ci * inactivation work if there is any. Caller must hold sb->s_umount to 195162306a36Sopenharmony_ci * coordinate changes in the inodegc_enabled state. 195262306a36Sopenharmony_ci */ 195362306a36Sopenharmony_civoid 195462306a36Sopenharmony_cixfs_inodegc_start( 195562306a36Sopenharmony_ci struct xfs_mount *mp) 195662306a36Sopenharmony_ci{ 195762306a36Sopenharmony_ci if (xfs_set_inodegc_enabled(mp)) 195862306a36Sopenharmony_ci return; 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci trace_xfs_inodegc_start(mp, __return_address); 196162306a36Sopenharmony_ci xfs_inodegc_queue_all(mp); 196262306a36Sopenharmony_ci} 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ci#ifdef CONFIG_XFS_RT 196562306a36Sopenharmony_cistatic inline bool 196662306a36Sopenharmony_cixfs_inodegc_want_queue_rt_file( 196762306a36Sopenharmony_ci struct xfs_inode *ip) 196862306a36Sopenharmony_ci{ 196962306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci if (!XFS_IS_REALTIME_INODE(ip)) 197262306a36Sopenharmony_ci return false; 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_ci if (__percpu_counter_compare(&mp->m_frextents, 197562306a36Sopenharmony_ci mp->m_low_rtexts[XFS_LOWSP_5_PCNT], 197662306a36Sopenharmony_ci XFS_FDBLOCKS_BATCH) < 0) 197762306a36Sopenharmony_ci return true; 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_ci return false; 198062306a36Sopenharmony_ci} 198162306a36Sopenharmony_ci#else 198262306a36Sopenharmony_ci# define xfs_inodegc_want_queue_rt_file(ip) (false) 198362306a36Sopenharmony_ci#endif /* CONFIG_XFS_RT */ 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci/* 198662306a36Sopenharmony_ci * Schedule the inactivation worker when: 198762306a36Sopenharmony_ci * 198862306a36Sopenharmony_ci * - We've accumulated more than one inode cluster buffer's worth of inodes. 198962306a36Sopenharmony_ci * - There is less than 5% free space left. 199062306a36Sopenharmony_ci * - Any of the quotas for this inode are near an enforcement limit. 199162306a36Sopenharmony_ci */ 199262306a36Sopenharmony_cistatic inline bool 199362306a36Sopenharmony_cixfs_inodegc_want_queue_work( 199462306a36Sopenharmony_ci struct xfs_inode *ip, 199562306a36Sopenharmony_ci unsigned int items) 199662306a36Sopenharmony_ci{ 199762306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci if (items > mp->m_ino_geo.inodes_per_cluster) 200062306a36Sopenharmony_ci return true; 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci if (__percpu_counter_compare(&mp->m_fdblocks, 200362306a36Sopenharmony_ci mp->m_low_space[XFS_LOWSP_5_PCNT], 200462306a36Sopenharmony_ci XFS_FDBLOCKS_BATCH) < 0) 200562306a36Sopenharmony_ci return true; 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci if (xfs_inodegc_want_queue_rt_file(ip)) 200862306a36Sopenharmony_ci return true; 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_ci if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_USER)) 201162306a36Sopenharmony_ci return true; 201262306a36Sopenharmony_ci 201362306a36Sopenharmony_ci if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_GROUP)) 201462306a36Sopenharmony_ci return true; 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_PROJ)) 201762306a36Sopenharmony_ci return true; 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci return false; 202062306a36Sopenharmony_ci} 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci/* 202362306a36Sopenharmony_ci * Upper bound on the number of inodes in each AG that can be queued for 202462306a36Sopenharmony_ci * inactivation at any given time, to avoid monopolizing the workqueue. 202562306a36Sopenharmony_ci */ 202662306a36Sopenharmony_ci#define XFS_INODEGC_MAX_BACKLOG (4 * XFS_INODES_PER_CHUNK) 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci/* 202962306a36Sopenharmony_ci * Make the frontend wait for inactivations when: 203062306a36Sopenharmony_ci * 203162306a36Sopenharmony_ci * - Memory shrinkers queued the inactivation worker and it hasn't finished. 203262306a36Sopenharmony_ci * - The queue depth exceeds the maximum allowable percpu backlog. 203362306a36Sopenharmony_ci * 203462306a36Sopenharmony_ci * Note: If the current thread is running a transaction, we don't ever want to 203562306a36Sopenharmony_ci * wait for other transactions because that could introduce a deadlock. 203662306a36Sopenharmony_ci */ 203762306a36Sopenharmony_cistatic inline bool 203862306a36Sopenharmony_cixfs_inodegc_want_flush_work( 203962306a36Sopenharmony_ci struct xfs_inode *ip, 204062306a36Sopenharmony_ci unsigned int items, 204162306a36Sopenharmony_ci unsigned int shrinker_hits) 204262306a36Sopenharmony_ci{ 204362306a36Sopenharmony_ci if (current->journal_info) 204462306a36Sopenharmony_ci return false; 204562306a36Sopenharmony_ci 204662306a36Sopenharmony_ci if (shrinker_hits > 0) 204762306a36Sopenharmony_ci return true; 204862306a36Sopenharmony_ci 204962306a36Sopenharmony_ci if (items > XFS_INODEGC_MAX_BACKLOG) 205062306a36Sopenharmony_ci return true; 205162306a36Sopenharmony_ci 205262306a36Sopenharmony_ci return false; 205362306a36Sopenharmony_ci} 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci/* 205662306a36Sopenharmony_ci * Queue a background inactivation worker if there are inodes that need to be 205762306a36Sopenharmony_ci * inactivated and higher level xfs code hasn't disabled the background 205862306a36Sopenharmony_ci * workers. 205962306a36Sopenharmony_ci */ 206062306a36Sopenharmony_cistatic void 206162306a36Sopenharmony_cixfs_inodegc_queue( 206262306a36Sopenharmony_ci struct xfs_inode *ip) 206362306a36Sopenharmony_ci{ 206462306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 206562306a36Sopenharmony_ci struct xfs_inodegc *gc; 206662306a36Sopenharmony_ci int items; 206762306a36Sopenharmony_ci unsigned int shrinker_hits; 206862306a36Sopenharmony_ci unsigned int cpu_nr; 206962306a36Sopenharmony_ci unsigned long queue_delay = 1; 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci trace_xfs_inode_set_need_inactive(ip); 207262306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 207362306a36Sopenharmony_ci ip->i_flags |= XFS_NEED_INACTIVE; 207462306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_ci cpu_nr = get_cpu(); 207762306a36Sopenharmony_ci gc = this_cpu_ptr(mp->m_inodegc); 207862306a36Sopenharmony_ci llist_add(&ip->i_gclist, &gc->list); 207962306a36Sopenharmony_ci items = READ_ONCE(gc->items); 208062306a36Sopenharmony_ci WRITE_ONCE(gc->items, items + 1); 208162306a36Sopenharmony_ci shrinker_hits = READ_ONCE(gc->shrinker_hits); 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci /* 208462306a36Sopenharmony_ci * Ensure the list add is always seen by anyone who finds the cpumask 208562306a36Sopenharmony_ci * bit set. This effectively gives the cpumask bit set operation 208662306a36Sopenharmony_ci * release ordering semantics. 208762306a36Sopenharmony_ci */ 208862306a36Sopenharmony_ci smp_mb__before_atomic(); 208962306a36Sopenharmony_ci if (!cpumask_test_cpu(cpu_nr, &mp->m_inodegc_cpumask)) 209062306a36Sopenharmony_ci cpumask_test_and_set_cpu(cpu_nr, &mp->m_inodegc_cpumask); 209162306a36Sopenharmony_ci 209262306a36Sopenharmony_ci /* 209362306a36Sopenharmony_ci * We queue the work while holding the current CPU so that the work 209462306a36Sopenharmony_ci * is scheduled to run on this CPU. 209562306a36Sopenharmony_ci */ 209662306a36Sopenharmony_ci if (!xfs_is_inodegc_enabled(mp)) { 209762306a36Sopenharmony_ci put_cpu(); 209862306a36Sopenharmony_ci return; 209962306a36Sopenharmony_ci } 210062306a36Sopenharmony_ci 210162306a36Sopenharmony_ci if (xfs_inodegc_want_queue_work(ip, items)) 210262306a36Sopenharmony_ci queue_delay = 0; 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci trace_xfs_inodegc_queue(mp, __return_address); 210562306a36Sopenharmony_ci mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 210662306a36Sopenharmony_ci queue_delay); 210762306a36Sopenharmony_ci put_cpu(); 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { 211062306a36Sopenharmony_ci trace_xfs_inodegc_throttle(mp, __return_address); 211162306a36Sopenharmony_ci flush_delayed_work(&gc->work); 211262306a36Sopenharmony_ci } 211362306a36Sopenharmony_ci} 211462306a36Sopenharmony_ci 211562306a36Sopenharmony_ci/* 211662306a36Sopenharmony_ci * We set the inode flag atomically with the radix tree tag. Once we get tag 211762306a36Sopenharmony_ci * lookups on the radix tree, this inode flag can go away. 211862306a36Sopenharmony_ci * 211962306a36Sopenharmony_ci * We always use background reclaim here because even if the inode is clean, it 212062306a36Sopenharmony_ci * still may be under IO and hence we have wait for IO completion to occur 212162306a36Sopenharmony_ci * before we can reclaim the inode. The background reclaim path handles this 212262306a36Sopenharmony_ci * more efficiently than we can here, so simply let background reclaim tear down 212362306a36Sopenharmony_ci * all inodes. 212462306a36Sopenharmony_ci */ 212562306a36Sopenharmony_civoid 212662306a36Sopenharmony_cixfs_inode_mark_reclaimable( 212762306a36Sopenharmony_ci struct xfs_inode *ip) 212862306a36Sopenharmony_ci{ 212962306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 213062306a36Sopenharmony_ci bool need_inactive; 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci XFS_STATS_INC(mp, vn_reclaim); 213362306a36Sopenharmony_ci 213462306a36Sopenharmony_ci /* 213562306a36Sopenharmony_ci * We should never get here with any of the reclaim flags already set. 213662306a36Sopenharmony_ci */ 213762306a36Sopenharmony_ci ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_ALL_IRECLAIM_FLAGS)); 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_ci need_inactive = xfs_inode_needs_inactive(ip); 214062306a36Sopenharmony_ci if (need_inactive) { 214162306a36Sopenharmony_ci xfs_inodegc_queue(ip); 214262306a36Sopenharmony_ci return; 214362306a36Sopenharmony_ci } 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci /* Going straight to reclaim, so drop the dquots. */ 214662306a36Sopenharmony_ci xfs_qm_dqdetach(ip); 214762306a36Sopenharmony_ci xfs_inodegc_set_reclaimable(ip); 214862306a36Sopenharmony_ci} 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_ci/* 215162306a36Sopenharmony_ci * Register a phony shrinker so that we can run background inodegc sooner when 215262306a36Sopenharmony_ci * there's memory pressure. Inactivation does not itself free any memory but 215362306a36Sopenharmony_ci * it does make inodes reclaimable, which eventually frees memory. 215462306a36Sopenharmony_ci * 215562306a36Sopenharmony_ci * The count function, seek value, and batch value are crafted to trigger the 215662306a36Sopenharmony_ci * scan function during the second round of scanning. Hopefully this means 215762306a36Sopenharmony_ci * that we reclaimed enough memory that initiating metadata transactions won't 215862306a36Sopenharmony_ci * make things worse. 215962306a36Sopenharmony_ci */ 216062306a36Sopenharmony_ci#define XFS_INODEGC_SHRINKER_COUNT (1UL << DEF_PRIORITY) 216162306a36Sopenharmony_ci#define XFS_INODEGC_SHRINKER_BATCH ((XFS_INODEGC_SHRINKER_COUNT / 2) + 1) 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_cistatic unsigned long 216462306a36Sopenharmony_cixfs_inodegc_shrinker_count( 216562306a36Sopenharmony_ci struct shrinker *shrink, 216662306a36Sopenharmony_ci struct shrink_control *sc) 216762306a36Sopenharmony_ci{ 216862306a36Sopenharmony_ci struct xfs_mount *mp = container_of(shrink, struct xfs_mount, 216962306a36Sopenharmony_ci m_inodegc_shrinker); 217062306a36Sopenharmony_ci struct xfs_inodegc *gc; 217162306a36Sopenharmony_ci int cpu; 217262306a36Sopenharmony_ci 217362306a36Sopenharmony_ci if (!xfs_is_inodegc_enabled(mp)) 217462306a36Sopenharmony_ci return 0; 217562306a36Sopenharmony_ci 217662306a36Sopenharmony_ci for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 217762306a36Sopenharmony_ci gc = per_cpu_ptr(mp->m_inodegc, cpu); 217862306a36Sopenharmony_ci if (!llist_empty(&gc->list)) 217962306a36Sopenharmony_ci return XFS_INODEGC_SHRINKER_COUNT; 218062306a36Sopenharmony_ci } 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ci return 0; 218362306a36Sopenharmony_ci} 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_cistatic unsigned long 218662306a36Sopenharmony_cixfs_inodegc_shrinker_scan( 218762306a36Sopenharmony_ci struct shrinker *shrink, 218862306a36Sopenharmony_ci struct shrink_control *sc) 218962306a36Sopenharmony_ci{ 219062306a36Sopenharmony_ci struct xfs_mount *mp = container_of(shrink, struct xfs_mount, 219162306a36Sopenharmony_ci m_inodegc_shrinker); 219262306a36Sopenharmony_ci struct xfs_inodegc *gc; 219362306a36Sopenharmony_ci int cpu; 219462306a36Sopenharmony_ci bool no_items = true; 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci if (!xfs_is_inodegc_enabled(mp)) 219762306a36Sopenharmony_ci return SHRINK_STOP; 219862306a36Sopenharmony_ci 219962306a36Sopenharmony_ci trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address); 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 220262306a36Sopenharmony_ci gc = per_cpu_ptr(mp->m_inodegc, cpu); 220362306a36Sopenharmony_ci if (!llist_empty(&gc->list)) { 220462306a36Sopenharmony_ci unsigned int h = READ_ONCE(gc->shrinker_hits); 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci WRITE_ONCE(gc->shrinker_hits, h + 1); 220762306a36Sopenharmony_ci mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); 220862306a36Sopenharmony_ci no_items = false; 220962306a36Sopenharmony_ci } 221062306a36Sopenharmony_ci } 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci /* 221362306a36Sopenharmony_ci * If there are no inodes to inactivate, we don't want the shrinker 221462306a36Sopenharmony_ci * to think there's deferred work to call us back about. 221562306a36Sopenharmony_ci */ 221662306a36Sopenharmony_ci if (no_items) 221762306a36Sopenharmony_ci return LONG_MAX; 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci return SHRINK_STOP; 222062306a36Sopenharmony_ci} 222162306a36Sopenharmony_ci 222262306a36Sopenharmony_ci/* Register a shrinker so we can accelerate inodegc and throttle queuing. */ 222362306a36Sopenharmony_ciint 222462306a36Sopenharmony_cixfs_inodegc_register_shrinker( 222562306a36Sopenharmony_ci struct xfs_mount *mp) 222662306a36Sopenharmony_ci{ 222762306a36Sopenharmony_ci struct shrinker *shrink = &mp->m_inodegc_shrinker; 222862306a36Sopenharmony_ci 222962306a36Sopenharmony_ci shrink->count_objects = xfs_inodegc_shrinker_count; 223062306a36Sopenharmony_ci shrink->scan_objects = xfs_inodegc_shrinker_scan; 223162306a36Sopenharmony_ci shrink->seeks = 0; 223262306a36Sopenharmony_ci shrink->flags = SHRINKER_NONSLAB; 223362306a36Sopenharmony_ci shrink->batch = XFS_INODEGC_SHRINKER_BATCH; 223462306a36Sopenharmony_ci 223562306a36Sopenharmony_ci return register_shrinker(shrink, "xfs-inodegc:%s", mp->m_super->s_id); 223662306a36Sopenharmony_ci} 2237