162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc.
462306a36Sopenharmony_ci * All Rights Reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_log_format.h"
1162306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1262306a36Sopenharmony_ci#include "xfs_mount.h"
1362306a36Sopenharmony_ci#include "xfs_inode.h"
1462306a36Sopenharmony_ci#include "xfs_trans.h"
1562306a36Sopenharmony_ci#include "xfs_trans_priv.h"
1662306a36Sopenharmony_ci#include "xfs_inode_item.h"
1762306a36Sopenharmony_ci#include "xfs_quota.h"
1862306a36Sopenharmony_ci#include "xfs_trace.h"
1962306a36Sopenharmony_ci#include "xfs_icache.h"
2062306a36Sopenharmony_ci#include "xfs_bmap_util.h"
2162306a36Sopenharmony_ci#include "xfs_dquot_item.h"
2262306a36Sopenharmony_ci#include "xfs_dquot.h"
2362306a36Sopenharmony_ci#include "xfs_reflink.h"
2462306a36Sopenharmony_ci#include "xfs_ialloc.h"
2562306a36Sopenharmony_ci#include "xfs_ag.h"
2662306a36Sopenharmony_ci#include "xfs_log_priv.h"
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#include <linux/iversion.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/* Radix tree tags for incore inode tree. */
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci/* inode is to be reclaimed */
3362306a36Sopenharmony_ci#define XFS_ICI_RECLAIM_TAG	0
3462306a36Sopenharmony_ci/* Inode has speculative preallocations (posteof or cow) to clean. */
3562306a36Sopenharmony_ci#define XFS_ICI_BLOCKGC_TAG	1
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * The goal for walking incore inodes.  These can correspond with incore inode
3962306a36Sopenharmony_ci * radix tree tags when convenient.  Avoid existing XFS_IWALK namespace.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_cienum xfs_icwalk_goal {
4262306a36Sopenharmony_ci	/* Goals directly associated with tagged inodes. */
4362306a36Sopenharmony_ci	XFS_ICWALK_BLOCKGC	= XFS_ICI_BLOCKGC_TAG,
4462306a36Sopenharmony_ci	XFS_ICWALK_RECLAIM	= XFS_ICI_RECLAIM_TAG,
4562306a36Sopenharmony_ci};
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic int xfs_icwalk(struct xfs_mount *mp,
4862306a36Sopenharmony_ci		enum xfs_icwalk_goal goal, struct xfs_icwalk *icw);
4962306a36Sopenharmony_cistatic int xfs_icwalk_ag(struct xfs_perag *pag,
5062306a36Sopenharmony_ci		enum xfs_icwalk_goal goal, struct xfs_icwalk *icw);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci/*
5362306a36Sopenharmony_ci * Private inode cache walk flags for struct xfs_icwalk.  Must not
5462306a36Sopenharmony_ci * coincide with XFS_ICWALK_FLAGS_VALID.
5562306a36Sopenharmony_ci */
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci/* Stop scanning after icw_scan_limit inodes. */
5862306a36Sopenharmony_ci#define XFS_ICWALK_FLAG_SCAN_LIMIT	(1U << 28)
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#define XFS_ICWALK_FLAG_RECLAIM_SICK	(1U << 27)
6162306a36Sopenharmony_ci#define XFS_ICWALK_FLAG_UNION		(1U << 26) /* union filter algorithm */
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci#define XFS_ICWALK_PRIVATE_FLAGS	(XFS_ICWALK_FLAG_SCAN_LIMIT | \
6462306a36Sopenharmony_ci					 XFS_ICWALK_FLAG_RECLAIM_SICK | \
6562306a36Sopenharmony_ci					 XFS_ICWALK_FLAG_UNION)
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci/*
6862306a36Sopenharmony_ci * Allocate and initialise an xfs_inode.
6962306a36Sopenharmony_ci */
7062306a36Sopenharmony_cistruct xfs_inode *
7162306a36Sopenharmony_cixfs_inode_alloc(
7262306a36Sopenharmony_ci	struct xfs_mount	*mp,
7362306a36Sopenharmony_ci	xfs_ino_t		ino)
7462306a36Sopenharmony_ci{
7562306a36Sopenharmony_ci	struct xfs_inode	*ip;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	/*
7862306a36Sopenharmony_ci	 * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL
7962306a36Sopenharmony_ci	 * and return NULL here on ENOMEM.
8062306a36Sopenharmony_ci	 */
8162306a36Sopenharmony_ci	ip = alloc_inode_sb(mp->m_super, xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (inode_init_always(mp->m_super, VFS_I(ip))) {
8462306a36Sopenharmony_ci		kmem_cache_free(xfs_inode_cache, ip);
8562306a36Sopenharmony_ci		return NULL;
8662306a36Sopenharmony_ci	}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	/* VFS doesn't initialise i_mode or i_state! */
8962306a36Sopenharmony_ci	VFS_I(ip)->i_mode = 0;
9062306a36Sopenharmony_ci	VFS_I(ip)->i_state = 0;
9162306a36Sopenharmony_ci	mapping_set_large_folios(VFS_I(ip)->i_mapping);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	XFS_STATS_INC(mp, vn_active);
9462306a36Sopenharmony_ci	ASSERT(atomic_read(&ip->i_pincount) == 0);
9562306a36Sopenharmony_ci	ASSERT(ip->i_ino == 0);
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	/* initialise the xfs inode */
9862306a36Sopenharmony_ci	ip->i_ino = ino;
9962306a36Sopenharmony_ci	ip->i_mount = mp;
10062306a36Sopenharmony_ci	memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
10162306a36Sopenharmony_ci	ip->i_cowfp = NULL;
10262306a36Sopenharmony_ci	memset(&ip->i_af, 0, sizeof(ip->i_af));
10362306a36Sopenharmony_ci	ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS;
10462306a36Sopenharmony_ci	memset(&ip->i_df, 0, sizeof(ip->i_df));
10562306a36Sopenharmony_ci	ip->i_flags = 0;
10662306a36Sopenharmony_ci	ip->i_delayed_blks = 0;
10762306a36Sopenharmony_ci	ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
10862306a36Sopenharmony_ci	ip->i_nblocks = 0;
10962306a36Sopenharmony_ci	ip->i_forkoff = 0;
11062306a36Sopenharmony_ci	ip->i_sick = 0;
11162306a36Sopenharmony_ci	ip->i_checked = 0;
11262306a36Sopenharmony_ci	INIT_WORK(&ip->i_ioend_work, xfs_end_io);
11362306a36Sopenharmony_ci	INIT_LIST_HEAD(&ip->i_ioend_list);
11462306a36Sopenharmony_ci	spin_lock_init(&ip->i_ioend_lock);
11562306a36Sopenharmony_ci	ip->i_next_unlinked = NULLAGINO;
11662306a36Sopenharmony_ci	ip->i_prev_unlinked = 0;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	return ip;
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ciSTATIC void
12262306a36Sopenharmony_cixfs_inode_free_callback(
12362306a36Sopenharmony_ci	struct rcu_head		*head)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	struct inode		*inode = container_of(head, struct inode, i_rcu);
12662306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(inode);
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	switch (VFS_I(ip)->i_mode & S_IFMT) {
12962306a36Sopenharmony_ci	case S_IFREG:
13062306a36Sopenharmony_ci	case S_IFDIR:
13162306a36Sopenharmony_ci	case S_IFLNK:
13262306a36Sopenharmony_ci		xfs_idestroy_fork(&ip->i_df);
13362306a36Sopenharmony_ci		break;
13462306a36Sopenharmony_ci	}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	xfs_ifork_zap_attr(ip);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	if (ip->i_cowfp) {
13962306a36Sopenharmony_ci		xfs_idestroy_fork(ip->i_cowfp);
14062306a36Sopenharmony_ci		kmem_cache_free(xfs_ifork_cache, ip->i_cowfp);
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci	if (ip->i_itemp) {
14362306a36Sopenharmony_ci		ASSERT(!test_bit(XFS_LI_IN_AIL,
14462306a36Sopenharmony_ci				 &ip->i_itemp->ili_item.li_flags));
14562306a36Sopenharmony_ci		xfs_inode_item_destroy(ip);
14662306a36Sopenharmony_ci		ip->i_itemp = NULL;
14762306a36Sopenharmony_ci	}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	kmem_cache_free(xfs_inode_cache, ip);
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_cistatic void
15362306a36Sopenharmony_ci__xfs_inode_free(
15462306a36Sopenharmony_ci	struct xfs_inode	*ip)
15562306a36Sopenharmony_ci{
15662306a36Sopenharmony_ci	/* asserts to verify all state is correct here */
15762306a36Sopenharmony_ci	ASSERT(atomic_read(&ip->i_pincount) == 0);
15862306a36Sopenharmony_ci	ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list));
15962306a36Sopenharmony_ci	XFS_STATS_DEC(ip->i_mount, vn_active);
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_civoid
16562306a36Sopenharmony_cixfs_inode_free(
16662306a36Sopenharmony_ci	struct xfs_inode	*ip)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	/*
17162306a36Sopenharmony_ci	 * Because we use RCU freeing we need to ensure the inode always
17262306a36Sopenharmony_ci	 * appears to be reclaimed with an invalid inode number when in the
17362306a36Sopenharmony_ci	 * free state. The ip->i_flags_lock provides the barrier against lookup
17462306a36Sopenharmony_ci	 * races.
17562306a36Sopenharmony_ci	 */
17662306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
17762306a36Sopenharmony_ci	ip->i_flags = XFS_IRECLAIM;
17862306a36Sopenharmony_ci	ip->i_ino = 0;
17962306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	__xfs_inode_free(ip);
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci/*
18562306a36Sopenharmony_ci * Queue background inode reclaim work if there are reclaimable inodes and there
18662306a36Sopenharmony_ci * isn't reclaim work already scheduled or in progress.
18762306a36Sopenharmony_ci */
18862306a36Sopenharmony_cistatic void
18962306a36Sopenharmony_cixfs_reclaim_work_queue(
19062306a36Sopenharmony_ci	struct xfs_mount        *mp)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	rcu_read_lock();
19462306a36Sopenharmony_ci	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
19562306a36Sopenharmony_ci		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
19662306a36Sopenharmony_ci			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci	rcu_read_unlock();
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci/*
20262306a36Sopenharmony_ci * Background scanning to trim preallocated space. This is queued based on the
20362306a36Sopenharmony_ci * 'speculative_prealloc_lifetime' tunable (5m by default).
20462306a36Sopenharmony_ci */
20562306a36Sopenharmony_cistatic inline void
20662306a36Sopenharmony_cixfs_blockgc_queue(
20762306a36Sopenharmony_ci	struct xfs_perag	*pag)
20862306a36Sopenharmony_ci{
20962306a36Sopenharmony_ci	struct xfs_mount	*mp = pag->pag_mount;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	if (!xfs_is_blockgc_enabled(mp))
21262306a36Sopenharmony_ci		return;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	rcu_read_lock();
21562306a36Sopenharmony_ci	if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG))
21662306a36Sopenharmony_ci		queue_delayed_work(pag->pag_mount->m_blockgc_wq,
21762306a36Sopenharmony_ci				   &pag->pag_blockgc_work,
21862306a36Sopenharmony_ci				   msecs_to_jiffies(xfs_blockgc_secs * 1000));
21962306a36Sopenharmony_ci	rcu_read_unlock();
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci/* Set a tag on both the AG incore inode tree and the AG radix tree. */
22362306a36Sopenharmony_cistatic void
22462306a36Sopenharmony_cixfs_perag_set_inode_tag(
22562306a36Sopenharmony_ci	struct xfs_perag	*pag,
22662306a36Sopenharmony_ci	xfs_agino_t		agino,
22762306a36Sopenharmony_ci	unsigned int		tag)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	struct xfs_mount	*mp = pag->pag_mount;
23062306a36Sopenharmony_ci	bool			was_tagged;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	lockdep_assert_held(&pag->pag_ici_lock);
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	was_tagged = radix_tree_tagged(&pag->pag_ici_root, tag);
23562306a36Sopenharmony_ci	radix_tree_tag_set(&pag->pag_ici_root, agino, tag);
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	if (tag == XFS_ICI_RECLAIM_TAG)
23862306a36Sopenharmony_ci		pag->pag_ici_reclaimable++;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	if (was_tagged)
24162306a36Sopenharmony_ci		return;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	/* propagate the tag up into the perag radix tree */
24462306a36Sopenharmony_ci	spin_lock(&mp->m_perag_lock);
24562306a36Sopenharmony_ci	radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag);
24662306a36Sopenharmony_ci	spin_unlock(&mp->m_perag_lock);
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	/* start background work */
24962306a36Sopenharmony_ci	switch (tag) {
25062306a36Sopenharmony_ci	case XFS_ICI_RECLAIM_TAG:
25162306a36Sopenharmony_ci		xfs_reclaim_work_queue(mp);
25262306a36Sopenharmony_ci		break;
25362306a36Sopenharmony_ci	case XFS_ICI_BLOCKGC_TAG:
25462306a36Sopenharmony_ci		xfs_blockgc_queue(pag);
25562306a36Sopenharmony_ci		break;
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	trace_xfs_perag_set_inode_tag(pag, _RET_IP_);
25962306a36Sopenharmony_ci}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci/* Clear a tag on both the AG incore inode tree and the AG radix tree. */
26262306a36Sopenharmony_cistatic void
26362306a36Sopenharmony_cixfs_perag_clear_inode_tag(
26462306a36Sopenharmony_ci	struct xfs_perag	*pag,
26562306a36Sopenharmony_ci	xfs_agino_t		agino,
26662306a36Sopenharmony_ci	unsigned int		tag)
26762306a36Sopenharmony_ci{
26862306a36Sopenharmony_ci	struct xfs_mount	*mp = pag->pag_mount;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	lockdep_assert_held(&pag->pag_ici_lock);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	/*
27362306a36Sopenharmony_ci	 * Reclaim can signal (with a null agino) that it cleared its own tag
27462306a36Sopenharmony_ci	 * by removing the inode from the radix tree.
27562306a36Sopenharmony_ci	 */
27662306a36Sopenharmony_ci	if (agino != NULLAGINO)
27762306a36Sopenharmony_ci		radix_tree_tag_clear(&pag->pag_ici_root, agino, tag);
27862306a36Sopenharmony_ci	else
27962306a36Sopenharmony_ci		ASSERT(tag == XFS_ICI_RECLAIM_TAG);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	if (tag == XFS_ICI_RECLAIM_TAG)
28262306a36Sopenharmony_ci		pag->pag_ici_reclaimable--;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	if (radix_tree_tagged(&pag->pag_ici_root, tag))
28562306a36Sopenharmony_ci		return;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	/* clear the tag from the perag radix tree */
28862306a36Sopenharmony_ci	spin_lock(&mp->m_perag_lock);
28962306a36Sopenharmony_ci	radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag);
29062306a36Sopenharmony_ci	spin_unlock(&mp->m_perag_lock);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci/*
29662306a36Sopenharmony_ci * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
29762306a36Sopenharmony_ci * part of the structure. This is made more complex by the fact we store
29862306a36Sopenharmony_ci * information about the on-disk values in the VFS inode and so we can't just
29962306a36Sopenharmony_ci * overwrite the values unconditionally. Hence we save the parameters we
30062306a36Sopenharmony_ci * need to retain across reinitialisation, and rewrite them into the VFS inode
30162306a36Sopenharmony_ci * after reinitialisation even if it fails.
30262306a36Sopenharmony_ci */
30362306a36Sopenharmony_cistatic int
30462306a36Sopenharmony_cixfs_reinit_inode(
30562306a36Sopenharmony_ci	struct xfs_mount	*mp,
30662306a36Sopenharmony_ci	struct inode		*inode)
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci	int			error;
30962306a36Sopenharmony_ci	uint32_t		nlink = inode->i_nlink;
31062306a36Sopenharmony_ci	uint32_t		generation = inode->i_generation;
31162306a36Sopenharmony_ci	uint64_t		version = inode_peek_iversion(inode);
31262306a36Sopenharmony_ci	umode_t			mode = inode->i_mode;
31362306a36Sopenharmony_ci	dev_t			dev = inode->i_rdev;
31462306a36Sopenharmony_ci	kuid_t			uid = inode->i_uid;
31562306a36Sopenharmony_ci	kgid_t			gid = inode->i_gid;
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	error = inode_init_always(mp->m_super, inode);
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	set_nlink(inode, nlink);
32062306a36Sopenharmony_ci	inode->i_generation = generation;
32162306a36Sopenharmony_ci	inode_set_iversion_queried(inode, version);
32262306a36Sopenharmony_ci	inode->i_mode = mode;
32362306a36Sopenharmony_ci	inode->i_rdev = dev;
32462306a36Sopenharmony_ci	inode->i_uid = uid;
32562306a36Sopenharmony_ci	inode->i_gid = gid;
32662306a36Sopenharmony_ci	mapping_set_large_folios(inode->i_mapping);
32762306a36Sopenharmony_ci	return error;
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci/*
33162306a36Sopenharmony_ci * Carefully nudge an inode whose VFS state has been torn down back into a
33262306a36Sopenharmony_ci * usable state.  Drops the i_flags_lock and the rcu read lock.
33362306a36Sopenharmony_ci */
33462306a36Sopenharmony_cistatic int
33562306a36Sopenharmony_cixfs_iget_recycle(
33662306a36Sopenharmony_ci	struct xfs_perag	*pag,
33762306a36Sopenharmony_ci	struct xfs_inode	*ip) __releases(&ip->i_flags_lock)
33862306a36Sopenharmony_ci{
33962306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
34062306a36Sopenharmony_ci	struct inode		*inode = VFS_I(ip);
34162306a36Sopenharmony_ci	int			error;
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	trace_xfs_iget_recycle(ip);
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
34662306a36Sopenharmony_ci		return -EAGAIN;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	/*
34962306a36Sopenharmony_ci	 * We need to make it look like the inode is being reclaimed to prevent
35062306a36Sopenharmony_ci	 * the actual reclaim workers from stomping over us while we recycle
35162306a36Sopenharmony_ci	 * the inode.  We can't clear the radix tree tag yet as it requires
35262306a36Sopenharmony_ci	 * pag_ici_lock to be held exclusive.
35362306a36Sopenharmony_ci	 */
35462306a36Sopenharmony_ci	ip->i_flags |= XFS_IRECLAIM;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
35762306a36Sopenharmony_ci	rcu_read_unlock();
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
36062306a36Sopenharmony_ci	error = xfs_reinit_inode(mp, inode);
36162306a36Sopenharmony_ci	xfs_iunlock(ip, XFS_ILOCK_EXCL);
36262306a36Sopenharmony_ci	if (error) {
36362306a36Sopenharmony_ci		/*
36462306a36Sopenharmony_ci		 * Re-initializing the inode failed, and we are in deep
36562306a36Sopenharmony_ci		 * trouble.  Try to re-add it to the reclaim list.
36662306a36Sopenharmony_ci		 */
36762306a36Sopenharmony_ci		rcu_read_lock();
36862306a36Sopenharmony_ci		spin_lock(&ip->i_flags_lock);
36962306a36Sopenharmony_ci		ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
37062306a36Sopenharmony_ci		ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
37162306a36Sopenharmony_ci		spin_unlock(&ip->i_flags_lock);
37262306a36Sopenharmony_ci		rcu_read_unlock();
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci		trace_xfs_iget_recycle_fail(ip);
37562306a36Sopenharmony_ci		return error;
37662306a36Sopenharmony_ci	}
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	spin_lock(&pag->pag_ici_lock);
37962306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	/*
38262306a36Sopenharmony_ci	 * Clear the per-lifetime state in the inode as we are now effectively
38362306a36Sopenharmony_ci	 * a new inode and need to return to the initial state before reuse
38462306a36Sopenharmony_ci	 * occurs.
38562306a36Sopenharmony_ci	 */
38662306a36Sopenharmony_ci	ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
38762306a36Sopenharmony_ci	ip->i_flags |= XFS_INEW;
38862306a36Sopenharmony_ci	xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
38962306a36Sopenharmony_ci			XFS_ICI_RECLAIM_TAG);
39062306a36Sopenharmony_ci	inode->i_state = I_NEW;
39162306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
39262306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	return 0;
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci/*
39862306a36Sopenharmony_ci * If we are allocating a new inode, then check what was returned is
39962306a36Sopenharmony_ci * actually a free, empty inode. If we are not allocating an inode,
40062306a36Sopenharmony_ci * then check we didn't find a free inode.
40162306a36Sopenharmony_ci *
40262306a36Sopenharmony_ci * Returns:
40362306a36Sopenharmony_ci *	0		if the inode free state matches the lookup context
40462306a36Sopenharmony_ci *	-ENOENT		if the inode is free and we are not allocating
40562306a36Sopenharmony_ci *	-EFSCORRUPTED	if there is any state mismatch at all
40662306a36Sopenharmony_ci */
40762306a36Sopenharmony_cistatic int
40862306a36Sopenharmony_cixfs_iget_check_free_state(
40962306a36Sopenharmony_ci	struct xfs_inode	*ip,
41062306a36Sopenharmony_ci	int			flags)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	if (flags & XFS_IGET_CREATE) {
41362306a36Sopenharmony_ci		/* should be a free inode */
41462306a36Sopenharmony_ci		if (VFS_I(ip)->i_mode != 0) {
41562306a36Sopenharmony_ci			xfs_warn(ip->i_mount,
41662306a36Sopenharmony_ci"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
41762306a36Sopenharmony_ci				ip->i_ino, VFS_I(ip)->i_mode);
41862306a36Sopenharmony_ci			return -EFSCORRUPTED;
41962306a36Sopenharmony_ci		}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci		if (ip->i_nblocks != 0) {
42262306a36Sopenharmony_ci			xfs_warn(ip->i_mount,
42362306a36Sopenharmony_ci"Corruption detected! Free inode 0x%llx has blocks allocated!",
42462306a36Sopenharmony_ci				ip->i_ino);
42562306a36Sopenharmony_ci			return -EFSCORRUPTED;
42662306a36Sopenharmony_ci		}
42762306a36Sopenharmony_ci		return 0;
42862306a36Sopenharmony_ci	}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	/* should be an allocated inode */
43162306a36Sopenharmony_ci	if (VFS_I(ip)->i_mode == 0)
43262306a36Sopenharmony_ci		return -ENOENT;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	return 0;
43562306a36Sopenharmony_ci}
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci/* Make all pending inactivation work start immediately. */
43862306a36Sopenharmony_cistatic bool
43962306a36Sopenharmony_cixfs_inodegc_queue_all(
44062306a36Sopenharmony_ci	struct xfs_mount	*mp)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	struct xfs_inodegc	*gc;
44362306a36Sopenharmony_ci	int			cpu;
44462306a36Sopenharmony_ci	bool			ret = false;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
44762306a36Sopenharmony_ci		gc = per_cpu_ptr(mp->m_inodegc, cpu);
44862306a36Sopenharmony_ci		if (!llist_empty(&gc->list)) {
44962306a36Sopenharmony_ci			mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
45062306a36Sopenharmony_ci			ret = true;
45162306a36Sopenharmony_ci		}
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	return ret;
45562306a36Sopenharmony_ci}
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci/* Wait for all queued work and collect errors */
45862306a36Sopenharmony_cistatic int
45962306a36Sopenharmony_cixfs_inodegc_wait_all(
46062306a36Sopenharmony_ci	struct xfs_mount	*mp)
46162306a36Sopenharmony_ci{
46262306a36Sopenharmony_ci	int			cpu;
46362306a36Sopenharmony_ci	int			error = 0;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	flush_workqueue(mp->m_inodegc_wq);
46662306a36Sopenharmony_ci	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
46762306a36Sopenharmony_ci		struct xfs_inodegc	*gc;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci		gc = per_cpu_ptr(mp->m_inodegc, cpu);
47062306a36Sopenharmony_ci		if (gc->error && !error)
47162306a36Sopenharmony_ci			error = gc->error;
47262306a36Sopenharmony_ci		gc->error = 0;
47362306a36Sopenharmony_ci	}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	return error;
47662306a36Sopenharmony_ci}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci/*
47962306a36Sopenharmony_ci * Check the validity of the inode we just found it the cache
48062306a36Sopenharmony_ci */
48162306a36Sopenharmony_cistatic int
48262306a36Sopenharmony_cixfs_iget_cache_hit(
48362306a36Sopenharmony_ci	struct xfs_perag	*pag,
48462306a36Sopenharmony_ci	struct xfs_inode	*ip,
48562306a36Sopenharmony_ci	xfs_ino_t		ino,
48662306a36Sopenharmony_ci	int			flags,
48762306a36Sopenharmony_ci	int			lock_flags) __releases(RCU)
48862306a36Sopenharmony_ci{
48962306a36Sopenharmony_ci	struct inode		*inode = VFS_I(ip);
49062306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
49162306a36Sopenharmony_ci	int			error;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	/*
49462306a36Sopenharmony_ci	 * check for re-use of an inode within an RCU grace period due to the
49562306a36Sopenharmony_ci	 * radix tree nodes not being updated yet. We monitor for this by
49662306a36Sopenharmony_ci	 * setting the inode number to zero before freeing the inode structure.
49762306a36Sopenharmony_ci	 * If the inode has been reallocated and set up, then the inode number
49862306a36Sopenharmony_ci	 * will not match, so check for that, too.
49962306a36Sopenharmony_ci	 */
50062306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
50162306a36Sopenharmony_ci	if (ip->i_ino != ino)
50262306a36Sopenharmony_ci		goto out_skip;
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	/*
50562306a36Sopenharmony_ci	 * If we are racing with another cache hit that is currently
50662306a36Sopenharmony_ci	 * instantiating this inode or currently recycling it out of
50762306a36Sopenharmony_ci	 * reclaimable state, wait for the initialisation to complete
50862306a36Sopenharmony_ci	 * before continuing.
50962306a36Sopenharmony_ci	 *
51062306a36Sopenharmony_ci	 * If we're racing with the inactivation worker we also want to wait.
51162306a36Sopenharmony_ci	 * If we're creating a new file, it's possible that the worker
51262306a36Sopenharmony_ci	 * previously marked the inode as free on disk but hasn't finished
51362306a36Sopenharmony_ci	 * updating the incore state yet.  The AGI buffer will be dirty and
51462306a36Sopenharmony_ci	 * locked to the icreate transaction, so a synchronous push of the
51562306a36Sopenharmony_ci	 * inodegc workers would result in deadlock.  For a regular iget, the
51662306a36Sopenharmony_ci	 * worker is running already, so we might as well wait.
51762306a36Sopenharmony_ci	 *
51862306a36Sopenharmony_ci	 * XXX(hch): eventually we should do something equivalent to
51962306a36Sopenharmony_ci	 *	     wait_on_inode to wait for these flags to be cleared
52062306a36Sopenharmony_ci	 *	     instead of polling for it.
52162306a36Sopenharmony_ci	 */
52262306a36Sopenharmony_ci	if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING))
52362306a36Sopenharmony_ci		goto out_skip;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	if (ip->i_flags & XFS_NEED_INACTIVE) {
52662306a36Sopenharmony_ci		/* Unlinked inodes cannot be re-grabbed. */
52762306a36Sopenharmony_ci		if (VFS_I(ip)->i_nlink == 0) {
52862306a36Sopenharmony_ci			error = -ENOENT;
52962306a36Sopenharmony_ci			goto out_error;
53062306a36Sopenharmony_ci		}
53162306a36Sopenharmony_ci		goto out_inodegc_flush;
53262306a36Sopenharmony_ci	}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	/*
53562306a36Sopenharmony_ci	 * Check the inode free state is valid. This also detects lookup
53662306a36Sopenharmony_ci	 * racing with unlinks.
53762306a36Sopenharmony_ci	 */
53862306a36Sopenharmony_ci	error = xfs_iget_check_free_state(ip, flags);
53962306a36Sopenharmony_ci	if (error)
54062306a36Sopenharmony_ci		goto out_error;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	/* Skip inodes that have no vfs state. */
54362306a36Sopenharmony_ci	if ((flags & XFS_IGET_INCORE) &&
54462306a36Sopenharmony_ci	    (ip->i_flags & XFS_IRECLAIMABLE))
54562306a36Sopenharmony_ci		goto out_skip;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	/* The inode fits the selection criteria; process it. */
54862306a36Sopenharmony_ci	if (ip->i_flags & XFS_IRECLAIMABLE) {
54962306a36Sopenharmony_ci		/* Drops i_flags_lock and RCU read lock. */
55062306a36Sopenharmony_ci		error = xfs_iget_recycle(pag, ip);
55162306a36Sopenharmony_ci		if (error == -EAGAIN)
55262306a36Sopenharmony_ci			goto out_skip;
55362306a36Sopenharmony_ci		if (error)
55462306a36Sopenharmony_ci			return error;
55562306a36Sopenharmony_ci	} else {
55662306a36Sopenharmony_ci		/* If the VFS inode is being torn down, pause and try again. */
55762306a36Sopenharmony_ci		if (!igrab(inode))
55862306a36Sopenharmony_ci			goto out_skip;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci		/* We've got a live one. */
56162306a36Sopenharmony_ci		spin_unlock(&ip->i_flags_lock);
56262306a36Sopenharmony_ci		rcu_read_unlock();
56362306a36Sopenharmony_ci		trace_xfs_iget_hit(ip);
56462306a36Sopenharmony_ci	}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci	if (lock_flags != 0)
56762306a36Sopenharmony_ci		xfs_ilock(ip, lock_flags);
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	if (!(flags & XFS_IGET_INCORE))
57062306a36Sopenharmony_ci		xfs_iflags_clear(ip, XFS_ISTALE);
57162306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_ig_found);
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	return 0;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ciout_skip:
57662306a36Sopenharmony_ci	trace_xfs_iget_skip(ip);
57762306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_ig_frecycle);
57862306a36Sopenharmony_ci	error = -EAGAIN;
57962306a36Sopenharmony_ciout_error:
58062306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
58162306a36Sopenharmony_ci	rcu_read_unlock();
58262306a36Sopenharmony_ci	return error;
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ciout_inodegc_flush:
58562306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
58662306a36Sopenharmony_ci	rcu_read_unlock();
58762306a36Sopenharmony_ci	/*
58862306a36Sopenharmony_ci	 * Do not wait for the workers, because the caller could hold an AGI
58962306a36Sopenharmony_ci	 * buffer lock.  We're just going to sleep in a loop anyway.
59062306a36Sopenharmony_ci	 */
59162306a36Sopenharmony_ci	if (xfs_is_inodegc_enabled(mp))
59262306a36Sopenharmony_ci		xfs_inodegc_queue_all(mp);
59362306a36Sopenharmony_ci	return -EAGAIN;
59462306a36Sopenharmony_ci}
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_cistatic int
59762306a36Sopenharmony_cixfs_iget_cache_miss(
59862306a36Sopenharmony_ci	struct xfs_mount	*mp,
59962306a36Sopenharmony_ci	struct xfs_perag	*pag,
60062306a36Sopenharmony_ci	xfs_trans_t		*tp,
60162306a36Sopenharmony_ci	xfs_ino_t		ino,
60262306a36Sopenharmony_ci	struct xfs_inode	**ipp,
60362306a36Sopenharmony_ci	int			flags,
60462306a36Sopenharmony_ci	int			lock_flags)
60562306a36Sopenharmony_ci{
60662306a36Sopenharmony_ci	struct xfs_inode	*ip;
60762306a36Sopenharmony_ci	int			error;
60862306a36Sopenharmony_ci	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ino);
60962306a36Sopenharmony_ci	int			iflags;
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	ip = xfs_inode_alloc(mp, ino);
61262306a36Sopenharmony_ci	if (!ip)
61362306a36Sopenharmony_ci		return -ENOMEM;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags);
61662306a36Sopenharmony_ci	if (error)
61762306a36Sopenharmony_ci		goto out_destroy;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	/*
62062306a36Sopenharmony_ci	 * For version 5 superblocks, if we are initialising a new inode and we
62162306a36Sopenharmony_ci	 * are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can
62262306a36Sopenharmony_ci	 * simply build the new inode core with a random generation number.
62362306a36Sopenharmony_ci	 *
62462306a36Sopenharmony_ci	 * For version 4 (and older) superblocks, log recovery is dependent on
62562306a36Sopenharmony_ci	 * the i_flushiter field being initialised from the current on-disk
62662306a36Sopenharmony_ci	 * value and hence we must also read the inode off disk even when
62762306a36Sopenharmony_ci	 * initializing new inodes.
62862306a36Sopenharmony_ci	 */
62962306a36Sopenharmony_ci	if (xfs_has_v3inodes(mp) &&
63062306a36Sopenharmony_ci	    (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
63162306a36Sopenharmony_ci		VFS_I(ip)->i_generation = get_random_u32();
63262306a36Sopenharmony_ci	} else {
63362306a36Sopenharmony_ci		struct xfs_buf		*bp;
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp);
63662306a36Sopenharmony_ci		if (error)
63762306a36Sopenharmony_ci			goto out_destroy;
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci		error = xfs_inode_from_disk(ip,
64062306a36Sopenharmony_ci				xfs_buf_offset(bp, ip->i_imap.im_boffset));
64162306a36Sopenharmony_ci		if (!error)
64262306a36Sopenharmony_ci			xfs_buf_set_ref(bp, XFS_INO_REF);
64362306a36Sopenharmony_ci		xfs_trans_brelse(tp, bp);
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci		if (error)
64662306a36Sopenharmony_ci			goto out_destroy;
64762306a36Sopenharmony_ci	}
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	trace_xfs_iget_miss(ip);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	/*
65262306a36Sopenharmony_ci	 * Check the inode free state is valid. This also detects lookup
65362306a36Sopenharmony_ci	 * racing with unlinks.
65462306a36Sopenharmony_ci	 */
65562306a36Sopenharmony_ci	error = xfs_iget_check_free_state(ip, flags);
65662306a36Sopenharmony_ci	if (error)
65762306a36Sopenharmony_ci		goto out_destroy;
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	/*
66062306a36Sopenharmony_ci	 * Preload the radix tree so we can insert safely under the
66162306a36Sopenharmony_ci	 * write spinlock. Note that we cannot sleep inside the preload
66262306a36Sopenharmony_ci	 * region. Since we can be called from transaction context, don't
66362306a36Sopenharmony_ci	 * recurse into the file system.
66462306a36Sopenharmony_ci	 */
66562306a36Sopenharmony_ci	if (radix_tree_preload(GFP_NOFS)) {
66662306a36Sopenharmony_ci		error = -EAGAIN;
66762306a36Sopenharmony_ci		goto out_destroy;
66862306a36Sopenharmony_ci	}
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	/*
67162306a36Sopenharmony_ci	 * Because the inode hasn't been added to the radix-tree yet it can't
67262306a36Sopenharmony_ci	 * be found by another thread, so we can do the non-sleeping lock here.
67362306a36Sopenharmony_ci	 */
67462306a36Sopenharmony_ci	if (lock_flags) {
67562306a36Sopenharmony_ci		if (!xfs_ilock_nowait(ip, lock_flags))
67662306a36Sopenharmony_ci			BUG();
67762306a36Sopenharmony_ci	}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_ci	/*
68062306a36Sopenharmony_ci	 * These values must be set before inserting the inode into the radix
68162306a36Sopenharmony_ci	 * tree as the moment it is inserted a concurrent lookup (allowed by the
68262306a36Sopenharmony_ci	 * RCU locking mechanism) can find it and that lookup must see that this
68362306a36Sopenharmony_ci	 * is an inode currently under construction (i.e. that XFS_INEW is set).
68462306a36Sopenharmony_ci	 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
68562306a36Sopenharmony_ci	 * memory barrier that ensures this detection works correctly at lookup
68662306a36Sopenharmony_ci	 * time.
68762306a36Sopenharmony_ci	 */
68862306a36Sopenharmony_ci	iflags = XFS_INEW;
68962306a36Sopenharmony_ci	if (flags & XFS_IGET_DONTCACHE)
69062306a36Sopenharmony_ci		d_mark_dontcache(VFS_I(ip));
69162306a36Sopenharmony_ci	ip->i_udquot = NULL;
69262306a36Sopenharmony_ci	ip->i_gdquot = NULL;
69362306a36Sopenharmony_ci	ip->i_pdquot = NULL;
69462306a36Sopenharmony_ci	xfs_iflags_set(ip, iflags);
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci	/* insert the new inode */
69762306a36Sopenharmony_ci	spin_lock(&pag->pag_ici_lock);
69862306a36Sopenharmony_ci	error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
69962306a36Sopenharmony_ci	if (unlikely(error)) {
70062306a36Sopenharmony_ci		WARN_ON(error != -EEXIST);
70162306a36Sopenharmony_ci		XFS_STATS_INC(mp, xs_ig_dup);
70262306a36Sopenharmony_ci		error = -EAGAIN;
70362306a36Sopenharmony_ci		goto out_preload_end;
70462306a36Sopenharmony_ci	}
70562306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
70662306a36Sopenharmony_ci	radix_tree_preload_end();
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	*ipp = ip;
70962306a36Sopenharmony_ci	return 0;
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ciout_preload_end:
71262306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
71362306a36Sopenharmony_ci	radix_tree_preload_end();
71462306a36Sopenharmony_ci	if (lock_flags)
71562306a36Sopenharmony_ci		xfs_iunlock(ip, lock_flags);
71662306a36Sopenharmony_ciout_destroy:
71762306a36Sopenharmony_ci	__destroy_inode(VFS_I(ip));
71862306a36Sopenharmony_ci	xfs_inode_free(ip);
71962306a36Sopenharmony_ci	return error;
72062306a36Sopenharmony_ci}
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci/*
72362306a36Sopenharmony_ci * Look up an inode by number in the given file system.  The inode is looked up
72462306a36Sopenharmony_ci * in the cache held in each AG.  If the inode is found in the cache, initialise
72562306a36Sopenharmony_ci * the vfs inode if necessary.
72662306a36Sopenharmony_ci *
72762306a36Sopenharmony_ci * If it is not in core, read it in from the file system's device, add it to the
72862306a36Sopenharmony_ci * cache and initialise the vfs inode.
72962306a36Sopenharmony_ci *
73062306a36Sopenharmony_ci * The inode is locked according to the value of the lock_flags parameter.
73162306a36Sopenharmony_ci * Inode lookup is only done during metadata operations and not as part of the
73262306a36Sopenharmony_ci * data IO path. Hence we only allow locking of the XFS_ILOCK during lookup.
73362306a36Sopenharmony_ci */
73462306a36Sopenharmony_ciint
73562306a36Sopenharmony_cixfs_iget(
73662306a36Sopenharmony_ci	struct xfs_mount	*mp,
73762306a36Sopenharmony_ci	struct xfs_trans	*tp,
73862306a36Sopenharmony_ci	xfs_ino_t		ino,
73962306a36Sopenharmony_ci	uint			flags,
74062306a36Sopenharmony_ci	uint			lock_flags,
74162306a36Sopenharmony_ci	struct xfs_inode	**ipp)
74262306a36Sopenharmony_ci{
74362306a36Sopenharmony_ci	struct xfs_inode	*ip;
74462306a36Sopenharmony_ci	struct xfs_perag	*pag;
74562306a36Sopenharmony_ci	xfs_agino_t		agino;
74662306a36Sopenharmony_ci	int			error;
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	/* reject inode numbers outside existing AGs */
75162306a36Sopenharmony_ci	if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
75262306a36Sopenharmony_ci		return -EINVAL;
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_ig_attempts);
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci	/* get the perag structure and ensure that it's inode capable */
75762306a36Sopenharmony_ci	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
75862306a36Sopenharmony_ci	agino = XFS_INO_TO_AGINO(mp, ino);
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ciagain:
76162306a36Sopenharmony_ci	error = 0;
76262306a36Sopenharmony_ci	rcu_read_lock();
76362306a36Sopenharmony_ci	ip = radix_tree_lookup(&pag->pag_ici_root, agino);
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	if (ip) {
76662306a36Sopenharmony_ci		error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
76762306a36Sopenharmony_ci		if (error)
76862306a36Sopenharmony_ci			goto out_error_or_again;
76962306a36Sopenharmony_ci	} else {
77062306a36Sopenharmony_ci		rcu_read_unlock();
77162306a36Sopenharmony_ci		if (flags & XFS_IGET_INCORE) {
77262306a36Sopenharmony_ci			error = -ENODATA;
77362306a36Sopenharmony_ci			goto out_error_or_again;
77462306a36Sopenharmony_ci		}
77562306a36Sopenharmony_ci		XFS_STATS_INC(mp, xs_ig_missed);
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci		error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
77862306a36Sopenharmony_ci							flags, lock_flags);
77962306a36Sopenharmony_ci		if (error)
78062306a36Sopenharmony_ci			goto out_error_or_again;
78162306a36Sopenharmony_ci	}
78262306a36Sopenharmony_ci	xfs_perag_put(pag);
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	*ipp = ip;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci	/*
78762306a36Sopenharmony_ci	 * If we have a real type for an on-disk inode, we can setup the inode
78862306a36Sopenharmony_ci	 * now.	 If it's a new inode being created, xfs_init_new_inode will
78962306a36Sopenharmony_ci	 * handle it.
79062306a36Sopenharmony_ci	 */
79162306a36Sopenharmony_ci	if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
79262306a36Sopenharmony_ci		xfs_setup_existing_inode(ip);
79362306a36Sopenharmony_ci	return 0;
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ciout_error_or_again:
79662306a36Sopenharmony_ci	if (!(flags & (XFS_IGET_INCORE | XFS_IGET_NORETRY)) &&
79762306a36Sopenharmony_ci	    error == -EAGAIN) {
79862306a36Sopenharmony_ci		delay(1);
79962306a36Sopenharmony_ci		goto again;
80062306a36Sopenharmony_ci	}
80162306a36Sopenharmony_ci	xfs_perag_put(pag);
80262306a36Sopenharmony_ci	return error;
80362306a36Sopenharmony_ci}
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci/*
80662306a36Sopenharmony_ci * Grab the inode for reclaim exclusively.
80762306a36Sopenharmony_ci *
80862306a36Sopenharmony_ci * We have found this inode via a lookup under RCU, so the inode may have
80962306a36Sopenharmony_ci * already been freed, or it may be in the process of being recycled by
81062306a36Sopenharmony_ci * xfs_iget(). In both cases, the inode will have XFS_IRECLAIM set. If the inode
81162306a36Sopenharmony_ci * has been fully recycled by the time we get the i_flags_lock, XFS_IRECLAIMABLE
81262306a36Sopenharmony_ci * will not be set. Hence we need to check for both these flag conditions to
81362306a36Sopenharmony_ci * avoid inodes that are no longer reclaim candidates.
81462306a36Sopenharmony_ci *
81562306a36Sopenharmony_ci * Note: checking for other state flags here, under the i_flags_lock or not, is
81662306a36Sopenharmony_ci * racy and should be avoided. Those races should be resolved only after we have
81762306a36Sopenharmony_ci * ensured that we are able to reclaim this inode and the world can see that we
81862306a36Sopenharmony_ci * are going to reclaim it.
81962306a36Sopenharmony_ci *
82062306a36Sopenharmony_ci * Return true if we grabbed it, false otherwise.
82162306a36Sopenharmony_ci */
82262306a36Sopenharmony_cistatic bool
82362306a36Sopenharmony_cixfs_reclaim_igrab(
82462306a36Sopenharmony_ci	struct xfs_inode	*ip,
82562306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
82662306a36Sopenharmony_ci{
82762306a36Sopenharmony_ci	ASSERT(rcu_read_lock_held());
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
83062306a36Sopenharmony_ci	if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
83162306a36Sopenharmony_ci	    __xfs_iflags_test(ip, XFS_IRECLAIM)) {
83262306a36Sopenharmony_ci		/* not a reclaim candidate. */
83362306a36Sopenharmony_ci		spin_unlock(&ip->i_flags_lock);
83462306a36Sopenharmony_ci		return false;
83562306a36Sopenharmony_ci	}
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci	/* Don't reclaim a sick inode unless the caller asked for it. */
83862306a36Sopenharmony_ci	if (ip->i_sick &&
83962306a36Sopenharmony_ci	    (!icw || !(icw->icw_flags & XFS_ICWALK_FLAG_RECLAIM_SICK))) {
84062306a36Sopenharmony_ci		spin_unlock(&ip->i_flags_lock);
84162306a36Sopenharmony_ci		return false;
84262306a36Sopenharmony_ci	}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	__xfs_iflags_set(ip, XFS_IRECLAIM);
84562306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
84662306a36Sopenharmony_ci	return true;
84762306a36Sopenharmony_ci}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci/*
85062306a36Sopenharmony_ci * Inode reclaim is non-blocking, so the default action if progress cannot be
85162306a36Sopenharmony_ci * made is to "requeue" the inode for reclaim by unlocking it and clearing the
85262306a36Sopenharmony_ci * XFS_IRECLAIM flag.  If we are in a shutdown state, we don't care about
85362306a36Sopenharmony_ci * blocking anymore and hence we can wait for the inode to be able to reclaim
85462306a36Sopenharmony_ci * it.
85562306a36Sopenharmony_ci *
85662306a36Sopenharmony_ci * We do no IO here - if callers require inodes to be cleaned they must push the
85762306a36Sopenharmony_ci * AIL first to trigger writeback of dirty inodes.  This enables writeback to be
85862306a36Sopenharmony_ci * done in the background in a non-blocking manner, and enables memory reclaim
85962306a36Sopenharmony_ci * to make progress without blocking.
86062306a36Sopenharmony_ci */
86162306a36Sopenharmony_cistatic void
86262306a36Sopenharmony_cixfs_reclaim_inode(
86362306a36Sopenharmony_ci	struct xfs_inode	*ip,
86462306a36Sopenharmony_ci	struct xfs_perag	*pag)
86562306a36Sopenharmony_ci{
86662306a36Sopenharmony_ci	xfs_ino_t		ino = ip->i_ino; /* for radix_tree_delete */
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
86962306a36Sopenharmony_ci		goto out;
87062306a36Sopenharmony_ci	if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
87162306a36Sopenharmony_ci		goto out_iunlock;
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	/*
87462306a36Sopenharmony_ci	 * Check for log shutdown because aborting the inode can move the log
87562306a36Sopenharmony_ci	 * tail and corrupt in memory state. This is fine if the log is shut
87662306a36Sopenharmony_ci	 * down, but if the log is still active and only the mount is shut down
87762306a36Sopenharmony_ci	 * then the in-memory log tail movement caused by the abort can be
87862306a36Sopenharmony_ci	 * incorrectly propagated to disk.
87962306a36Sopenharmony_ci	 */
88062306a36Sopenharmony_ci	if (xlog_is_shutdown(ip->i_mount->m_log)) {
88162306a36Sopenharmony_ci		xfs_iunpin_wait(ip);
88262306a36Sopenharmony_ci		xfs_iflush_shutdown_abort(ip);
88362306a36Sopenharmony_ci		goto reclaim;
88462306a36Sopenharmony_ci	}
88562306a36Sopenharmony_ci	if (xfs_ipincount(ip))
88662306a36Sopenharmony_ci		goto out_clear_flush;
88762306a36Sopenharmony_ci	if (!xfs_inode_clean(ip))
88862306a36Sopenharmony_ci		goto out_clear_flush;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	xfs_iflags_clear(ip, XFS_IFLUSHING);
89162306a36Sopenharmony_cireclaim:
89262306a36Sopenharmony_ci	trace_xfs_inode_reclaiming(ip);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	/*
89562306a36Sopenharmony_ci	 * Because we use RCU freeing we need to ensure the inode always appears
89662306a36Sopenharmony_ci	 * to be reclaimed with an invalid inode number when in the free state.
89762306a36Sopenharmony_ci	 * We do this as early as possible under the ILOCK so that
89862306a36Sopenharmony_ci	 * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to
89962306a36Sopenharmony_ci	 * detect races with us here. By doing this, we guarantee that once
90062306a36Sopenharmony_ci	 * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that
90162306a36Sopenharmony_ci	 * it will see either a valid inode that will serialise correctly, or it
90262306a36Sopenharmony_ci	 * will see an invalid inode that it can skip.
90362306a36Sopenharmony_ci	 */
90462306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
90562306a36Sopenharmony_ci	ip->i_flags = XFS_IRECLAIM;
90662306a36Sopenharmony_ci	ip->i_ino = 0;
90762306a36Sopenharmony_ci	ip->i_sick = 0;
90862306a36Sopenharmony_ci	ip->i_checked = 0;
90962306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	ASSERT(!ip->i_itemp || ip->i_itemp->ili_item.li_buf == NULL);
91262306a36Sopenharmony_ci	xfs_iunlock(ip, XFS_ILOCK_EXCL);
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
91562306a36Sopenharmony_ci	/*
91662306a36Sopenharmony_ci	 * Remove the inode from the per-AG radix tree.
91762306a36Sopenharmony_ci	 *
91862306a36Sopenharmony_ci	 * Because radix_tree_delete won't complain even if the item was never
91962306a36Sopenharmony_ci	 * added to the tree assert that it's been there before to catch
92062306a36Sopenharmony_ci	 * problems with the inode life time early on.
92162306a36Sopenharmony_ci	 */
92262306a36Sopenharmony_ci	spin_lock(&pag->pag_ici_lock);
92362306a36Sopenharmony_ci	if (!radix_tree_delete(&pag->pag_ici_root,
92462306a36Sopenharmony_ci				XFS_INO_TO_AGINO(ip->i_mount, ino)))
92562306a36Sopenharmony_ci		ASSERT(0);
92662306a36Sopenharmony_ci	xfs_perag_clear_inode_tag(pag, NULLAGINO, XFS_ICI_RECLAIM_TAG);
92762306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	/*
93062306a36Sopenharmony_ci	 * Here we do an (almost) spurious inode lock in order to coordinate
93162306a36Sopenharmony_ci	 * with inode cache radix tree lookups.  This is because the lookup
93262306a36Sopenharmony_ci	 * can reference the inodes in the cache without taking references.
93362306a36Sopenharmony_ci	 *
93462306a36Sopenharmony_ci	 * We make that OK here by ensuring that we wait until the inode is
93562306a36Sopenharmony_ci	 * unlocked after the lookup before we go ahead and free it.
93662306a36Sopenharmony_ci	 */
93762306a36Sopenharmony_ci	xfs_ilock(ip, XFS_ILOCK_EXCL);
93862306a36Sopenharmony_ci	ASSERT(!ip->i_udquot && !ip->i_gdquot && !ip->i_pdquot);
93962306a36Sopenharmony_ci	xfs_iunlock(ip, XFS_ILOCK_EXCL);
94062306a36Sopenharmony_ci	ASSERT(xfs_inode_clean(ip));
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	__xfs_inode_free(ip);
94362306a36Sopenharmony_ci	return;
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ciout_clear_flush:
94662306a36Sopenharmony_ci	xfs_iflags_clear(ip, XFS_IFLUSHING);
94762306a36Sopenharmony_ciout_iunlock:
94862306a36Sopenharmony_ci	xfs_iunlock(ip, XFS_ILOCK_EXCL);
94962306a36Sopenharmony_ciout:
95062306a36Sopenharmony_ci	xfs_iflags_clear(ip, XFS_IRECLAIM);
95162306a36Sopenharmony_ci}
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ci/* Reclaim sick inodes if we're unmounting or the fs went down. */
95462306a36Sopenharmony_cistatic inline bool
95562306a36Sopenharmony_cixfs_want_reclaim_sick(
95662306a36Sopenharmony_ci	struct xfs_mount	*mp)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	return xfs_is_unmounting(mp) || xfs_has_norecovery(mp) ||
95962306a36Sopenharmony_ci	       xfs_is_shutdown(mp);
96062306a36Sopenharmony_ci}
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_civoid
96362306a36Sopenharmony_cixfs_reclaim_inodes(
96462306a36Sopenharmony_ci	struct xfs_mount	*mp)
96562306a36Sopenharmony_ci{
96662306a36Sopenharmony_ci	struct xfs_icwalk	icw = {
96762306a36Sopenharmony_ci		.icw_flags	= 0,
96862306a36Sopenharmony_ci	};
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	if (xfs_want_reclaim_sick(mp))
97162306a36Sopenharmony_ci		icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK;
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
97462306a36Sopenharmony_ci		xfs_ail_push_all_sync(mp->m_ail);
97562306a36Sopenharmony_ci		xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw);
97662306a36Sopenharmony_ci	}
97762306a36Sopenharmony_ci}
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci/*
98062306a36Sopenharmony_ci * The shrinker infrastructure determines how many inodes we should scan for
98162306a36Sopenharmony_ci * reclaim. We want as many clean inodes ready to reclaim as possible, so we
98262306a36Sopenharmony_ci * push the AIL here. We also want to proactively free up memory if we can to
98362306a36Sopenharmony_ci * minimise the amount of work memory reclaim has to do so we kick the
98462306a36Sopenharmony_ci * background reclaim if it isn't already scheduled.
98562306a36Sopenharmony_ci */
98662306a36Sopenharmony_cilong
98762306a36Sopenharmony_cixfs_reclaim_inodes_nr(
98862306a36Sopenharmony_ci	struct xfs_mount	*mp,
98962306a36Sopenharmony_ci	unsigned long		nr_to_scan)
99062306a36Sopenharmony_ci{
99162306a36Sopenharmony_ci	struct xfs_icwalk	icw = {
99262306a36Sopenharmony_ci		.icw_flags	= XFS_ICWALK_FLAG_SCAN_LIMIT,
99362306a36Sopenharmony_ci		.icw_scan_limit	= min_t(unsigned long, LONG_MAX, nr_to_scan),
99462306a36Sopenharmony_ci	};
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	if (xfs_want_reclaim_sick(mp))
99762306a36Sopenharmony_ci		icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK;
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	/* kick background reclaimer and push the AIL */
100062306a36Sopenharmony_ci	xfs_reclaim_work_queue(mp);
100162306a36Sopenharmony_ci	xfs_ail_push_all(mp->m_ail);
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci	xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw);
100462306a36Sopenharmony_ci	return 0;
100562306a36Sopenharmony_ci}
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci/*
100862306a36Sopenharmony_ci * Return the number of reclaimable inodes in the filesystem for
100962306a36Sopenharmony_ci * the shrinker to determine how much to reclaim.
101062306a36Sopenharmony_ci */
101162306a36Sopenharmony_cilong
101262306a36Sopenharmony_cixfs_reclaim_inodes_count(
101362306a36Sopenharmony_ci	struct xfs_mount	*mp)
101462306a36Sopenharmony_ci{
101562306a36Sopenharmony_ci	struct xfs_perag	*pag;
101662306a36Sopenharmony_ci	xfs_agnumber_t		ag = 0;
101762306a36Sopenharmony_ci	long			reclaimable = 0;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
102062306a36Sopenharmony_ci		ag = pag->pag_agno + 1;
102162306a36Sopenharmony_ci		reclaimable += pag->pag_ici_reclaimable;
102262306a36Sopenharmony_ci		xfs_perag_put(pag);
102362306a36Sopenharmony_ci	}
102462306a36Sopenharmony_ci	return reclaimable;
102562306a36Sopenharmony_ci}
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ciSTATIC bool
102862306a36Sopenharmony_cixfs_icwalk_match_id(
102962306a36Sopenharmony_ci	struct xfs_inode	*ip,
103062306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
103162306a36Sopenharmony_ci{
103262306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_UID) &&
103362306a36Sopenharmony_ci	    !uid_eq(VFS_I(ip)->i_uid, icw->icw_uid))
103462306a36Sopenharmony_ci		return false;
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_GID) &&
103762306a36Sopenharmony_ci	    !gid_eq(VFS_I(ip)->i_gid, icw->icw_gid))
103862306a36Sopenharmony_ci		return false;
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_PRID) &&
104162306a36Sopenharmony_ci	    ip->i_projid != icw->icw_prid)
104262306a36Sopenharmony_ci		return false;
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci	return true;
104562306a36Sopenharmony_ci}
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci/*
104862306a36Sopenharmony_ci * A union-based inode filtering algorithm. Process the inode if any of the
104962306a36Sopenharmony_ci * criteria match. This is for global/internal scans only.
105062306a36Sopenharmony_ci */
105162306a36Sopenharmony_ciSTATIC bool
105262306a36Sopenharmony_cixfs_icwalk_match_id_union(
105362306a36Sopenharmony_ci	struct xfs_inode	*ip,
105462306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
105562306a36Sopenharmony_ci{
105662306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_UID) &&
105762306a36Sopenharmony_ci	    uid_eq(VFS_I(ip)->i_uid, icw->icw_uid))
105862306a36Sopenharmony_ci		return true;
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_GID) &&
106162306a36Sopenharmony_ci	    gid_eq(VFS_I(ip)->i_gid, icw->icw_gid))
106262306a36Sopenharmony_ci		return true;
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_PRID) &&
106562306a36Sopenharmony_ci	    ip->i_projid == icw->icw_prid)
106662306a36Sopenharmony_ci		return true;
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci	return false;
106962306a36Sopenharmony_ci}
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci/*
107262306a36Sopenharmony_ci * Is this inode @ip eligible for eof/cow block reclamation, given some
107362306a36Sopenharmony_ci * filtering parameters @icw?  The inode is eligible if @icw is null or
107462306a36Sopenharmony_ci * if the predicate functions match.
107562306a36Sopenharmony_ci */
107662306a36Sopenharmony_cistatic bool
107762306a36Sopenharmony_cixfs_icwalk_match(
107862306a36Sopenharmony_ci	struct xfs_inode	*ip,
107962306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
108062306a36Sopenharmony_ci{
108162306a36Sopenharmony_ci	bool			match;
108262306a36Sopenharmony_ci
108362306a36Sopenharmony_ci	if (!icw)
108462306a36Sopenharmony_ci		return true;
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	if (icw->icw_flags & XFS_ICWALK_FLAG_UNION)
108762306a36Sopenharmony_ci		match = xfs_icwalk_match_id_union(ip, icw);
108862306a36Sopenharmony_ci	else
108962306a36Sopenharmony_ci		match = xfs_icwalk_match_id(ip, icw);
109062306a36Sopenharmony_ci	if (!match)
109162306a36Sopenharmony_ci		return false;
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	/* skip the inode if the file size is too small */
109462306a36Sopenharmony_ci	if ((icw->icw_flags & XFS_ICWALK_FLAG_MINFILESIZE) &&
109562306a36Sopenharmony_ci	    XFS_ISIZE(ip) < icw->icw_min_file_size)
109662306a36Sopenharmony_ci		return false;
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	return true;
109962306a36Sopenharmony_ci}
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci/*
110262306a36Sopenharmony_ci * This is a fast pass over the inode cache to try to get reclaim moving on as
110362306a36Sopenharmony_ci * many inodes as possible in a short period of time. It kicks itself every few
110462306a36Sopenharmony_ci * seconds, as well as being kicked by the inode cache shrinker when memory
110562306a36Sopenharmony_ci * goes low.
110662306a36Sopenharmony_ci */
110762306a36Sopenharmony_civoid
110862306a36Sopenharmony_cixfs_reclaim_worker(
110962306a36Sopenharmony_ci	struct work_struct *work)
111062306a36Sopenharmony_ci{
111162306a36Sopenharmony_ci	struct xfs_mount *mp = container_of(to_delayed_work(work),
111262306a36Sopenharmony_ci					struct xfs_mount, m_reclaim_work);
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
111562306a36Sopenharmony_ci	xfs_reclaim_work_queue(mp);
111662306a36Sopenharmony_ci}
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ciSTATIC int
111962306a36Sopenharmony_cixfs_inode_free_eofblocks(
112062306a36Sopenharmony_ci	struct xfs_inode	*ip,
112162306a36Sopenharmony_ci	struct xfs_icwalk	*icw,
112262306a36Sopenharmony_ci	unsigned int		*lockflags)
112362306a36Sopenharmony_ci{
112462306a36Sopenharmony_ci	bool			wait;
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	wait = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC);
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	if (!xfs_iflags_test(ip, XFS_IEOFBLOCKS))
112962306a36Sopenharmony_ci		return 0;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci	/*
113262306a36Sopenharmony_ci	 * If the mapping is dirty the operation can block and wait for some
113362306a36Sopenharmony_ci	 * time. Unless we are waiting, skip it.
113462306a36Sopenharmony_ci	 */
113562306a36Sopenharmony_ci	if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
113662306a36Sopenharmony_ci		return 0;
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	if (!xfs_icwalk_match(ip, icw))
113962306a36Sopenharmony_ci		return 0;
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci	/*
114262306a36Sopenharmony_ci	 * If the caller is waiting, return -EAGAIN to keep the background
114362306a36Sopenharmony_ci	 * scanner moving and revisit the inode in a subsequent pass.
114462306a36Sopenharmony_ci	 */
114562306a36Sopenharmony_ci	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
114662306a36Sopenharmony_ci		if (wait)
114762306a36Sopenharmony_ci			return -EAGAIN;
114862306a36Sopenharmony_ci		return 0;
114962306a36Sopenharmony_ci	}
115062306a36Sopenharmony_ci	*lockflags |= XFS_IOLOCK_EXCL;
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	if (xfs_can_free_eofblocks(ip, false))
115362306a36Sopenharmony_ci		return xfs_free_eofblocks(ip);
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	/* inode could be preallocated or append-only */
115662306a36Sopenharmony_ci	trace_xfs_inode_free_eofblocks_invalid(ip);
115762306a36Sopenharmony_ci	xfs_inode_clear_eofblocks_tag(ip);
115862306a36Sopenharmony_ci	return 0;
115962306a36Sopenharmony_ci}
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_cistatic void
116262306a36Sopenharmony_cixfs_blockgc_set_iflag(
116362306a36Sopenharmony_ci	struct xfs_inode	*ip,
116462306a36Sopenharmony_ci	unsigned long		iflag)
116562306a36Sopenharmony_ci{
116662306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
116762306a36Sopenharmony_ci	struct xfs_perag	*pag;
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0);
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci	/*
117262306a36Sopenharmony_ci	 * Don't bother locking the AG and looking up in the radix trees
117362306a36Sopenharmony_ci	 * if we already know that we have the tag set.
117462306a36Sopenharmony_ci	 */
117562306a36Sopenharmony_ci	if (ip->i_flags & iflag)
117662306a36Sopenharmony_ci		return;
117762306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
117862306a36Sopenharmony_ci	ip->i_flags |= iflag;
117962306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
118262306a36Sopenharmony_ci	spin_lock(&pag->pag_ici_lock);
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_ci	xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
118562306a36Sopenharmony_ci			XFS_ICI_BLOCKGC_TAG);
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
118862306a36Sopenharmony_ci	xfs_perag_put(pag);
118962306a36Sopenharmony_ci}
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_civoid
119262306a36Sopenharmony_cixfs_inode_set_eofblocks_tag(
119362306a36Sopenharmony_ci	xfs_inode_t	*ip)
119462306a36Sopenharmony_ci{
119562306a36Sopenharmony_ci	trace_xfs_inode_set_eofblocks_tag(ip);
119662306a36Sopenharmony_ci	return xfs_blockgc_set_iflag(ip, XFS_IEOFBLOCKS);
119762306a36Sopenharmony_ci}
119862306a36Sopenharmony_ci
119962306a36Sopenharmony_cistatic void
120062306a36Sopenharmony_cixfs_blockgc_clear_iflag(
120162306a36Sopenharmony_ci	struct xfs_inode	*ip,
120262306a36Sopenharmony_ci	unsigned long		iflag)
120362306a36Sopenharmony_ci{
120462306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
120562306a36Sopenharmony_ci	struct xfs_perag	*pag;
120662306a36Sopenharmony_ci	bool			clear_tag;
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci	ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0);
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
121162306a36Sopenharmony_ci	ip->i_flags &= ~iflag;
121262306a36Sopenharmony_ci	clear_tag = (ip->i_flags & (XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0;
121362306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	if (!clear_tag)
121662306a36Sopenharmony_ci		return;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
121962306a36Sopenharmony_ci	spin_lock(&pag->pag_ici_lock);
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci	xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
122262306a36Sopenharmony_ci			XFS_ICI_BLOCKGC_TAG);
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
122562306a36Sopenharmony_ci	xfs_perag_put(pag);
122662306a36Sopenharmony_ci}
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_civoid
122962306a36Sopenharmony_cixfs_inode_clear_eofblocks_tag(
123062306a36Sopenharmony_ci	xfs_inode_t	*ip)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	trace_xfs_inode_clear_eofblocks_tag(ip);
123362306a36Sopenharmony_ci	return xfs_blockgc_clear_iflag(ip, XFS_IEOFBLOCKS);
123462306a36Sopenharmony_ci}
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci/*
123762306a36Sopenharmony_ci * Set ourselves up to free CoW blocks from this file.  If it's already clean
123862306a36Sopenharmony_ci * then we can bail out quickly, but otherwise we must back off if the file
123962306a36Sopenharmony_ci * is undergoing some kind of write.
124062306a36Sopenharmony_ci */
124162306a36Sopenharmony_cistatic bool
124262306a36Sopenharmony_cixfs_prep_free_cowblocks(
124362306a36Sopenharmony_ci	struct xfs_inode	*ip)
124462306a36Sopenharmony_ci{
124562306a36Sopenharmony_ci	/*
124662306a36Sopenharmony_ci	 * Just clear the tag if we have an empty cow fork or none at all. It's
124762306a36Sopenharmony_ci	 * possible the inode was fully unshared since it was originally tagged.
124862306a36Sopenharmony_ci	 */
124962306a36Sopenharmony_ci	if (!xfs_inode_has_cow_data(ip)) {
125062306a36Sopenharmony_ci		trace_xfs_inode_free_cowblocks_invalid(ip);
125162306a36Sopenharmony_ci		xfs_inode_clear_cowblocks_tag(ip);
125262306a36Sopenharmony_ci		return false;
125362306a36Sopenharmony_ci	}
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_ci	/*
125662306a36Sopenharmony_ci	 * If the mapping is dirty or under writeback we cannot touch the
125762306a36Sopenharmony_ci	 * CoW fork.  Leave it alone if we're in the midst of a directio.
125862306a36Sopenharmony_ci	 */
125962306a36Sopenharmony_ci	if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
126062306a36Sopenharmony_ci	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
126162306a36Sopenharmony_ci	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
126262306a36Sopenharmony_ci	    atomic_read(&VFS_I(ip)->i_dio_count))
126362306a36Sopenharmony_ci		return false;
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	return true;
126662306a36Sopenharmony_ci}
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci/*
126962306a36Sopenharmony_ci * Automatic CoW Reservation Freeing
127062306a36Sopenharmony_ci *
127162306a36Sopenharmony_ci * These functions automatically garbage collect leftover CoW reservations
127262306a36Sopenharmony_ci * that were made on behalf of a cowextsize hint when we start to run out
127362306a36Sopenharmony_ci * of quota or when the reservations sit around for too long.  If the file
127462306a36Sopenharmony_ci * has dirty pages or is undergoing writeback, its CoW reservations will
127562306a36Sopenharmony_ci * be retained.
127662306a36Sopenharmony_ci *
127762306a36Sopenharmony_ci * The actual garbage collection piggybacks off the same code that runs
127862306a36Sopenharmony_ci * the speculative EOF preallocation garbage collector.
127962306a36Sopenharmony_ci */
128062306a36Sopenharmony_ciSTATIC int
128162306a36Sopenharmony_cixfs_inode_free_cowblocks(
128262306a36Sopenharmony_ci	struct xfs_inode	*ip,
128362306a36Sopenharmony_ci	struct xfs_icwalk	*icw,
128462306a36Sopenharmony_ci	unsigned int		*lockflags)
128562306a36Sopenharmony_ci{
128662306a36Sopenharmony_ci	bool			wait;
128762306a36Sopenharmony_ci	int			ret = 0;
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_ci	wait = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC);
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_ci	if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS))
129262306a36Sopenharmony_ci		return 0;
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	if (!xfs_prep_free_cowblocks(ip))
129562306a36Sopenharmony_ci		return 0;
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	if (!xfs_icwalk_match(ip, icw))
129862306a36Sopenharmony_ci		return 0;
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_ci	/*
130162306a36Sopenharmony_ci	 * If the caller is waiting, return -EAGAIN to keep the background
130262306a36Sopenharmony_ci	 * scanner moving and revisit the inode in a subsequent pass.
130362306a36Sopenharmony_ci	 */
130462306a36Sopenharmony_ci	if (!(*lockflags & XFS_IOLOCK_EXCL) &&
130562306a36Sopenharmony_ci	    !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
130662306a36Sopenharmony_ci		if (wait)
130762306a36Sopenharmony_ci			return -EAGAIN;
130862306a36Sopenharmony_ci		return 0;
130962306a36Sopenharmony_ci	}
131062306a36Sopenharmony_ci	*lockflags |= XFS_IOLOCK_EXCL;
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci	if (!xfs_ilock_nowait(ip, XFS_MMAPLOCK_EXCL)) {
131362306a36Sopenharmony_ci		if (wait)
131462306a36Sopenharmony_ci			return -EAGAIN;
131562306a36Sopenharmony_ci		return 0;
131662306a36Sopenharmony_ci	}
131762306a36Sopenharmony_ci	*lockflags |= XFS_MMAPLOCK_EXCL;
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	/*
132062306a36Sopenharmony_ci	 * Check again, nobody else should be able to dirty blocks or change
132162306a36Sopenharmony_ci	 * the reflink iflag now that we have the first two locks held.
132262306a36Sopenharmony_ci	 */
132362306a36Sopenharmony_ci	if (xfs_prep_free_cowblocks(ip))
132462306a36Sopenharmony_ci		ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
132562306a36Sopenharmony_ci	return ret;
132662306a36Sopenharmony_ci}
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_civoid
132962306a36Sopenharmony_cixfs_inode_set_cowblocks_tag(
133062306a36Sopenharmony_ci	xfs_inode_t	*ip)
133162306a36Sopenharmony_ci{
133262306a36Sopenharmony_ci	trace_xfs_inode_set_cowblocks_tag(ip);
133362306a36Sopenharmony_ci	return xfs_blockgc_set_iflag(ip, XFS_ICOWBLOCKS);
133462306a36Sopenharmony_ci}
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_civoid
133762306a36Sopenharmony_cixfs_inode_clear_cowblocks_tag(
133862306a36Sopenharmony_ci	xfs_inode_t	*ip)
133962306a36Sopenharmony_ci{
134062306a36Sopenharmony_ci	trace_xfs_inode_clear_cowblocks_tag(ip);
134162306a36Sopenharmony_ci	return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS);
134262306a36Sopenharmony_ci}
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci/* Disable post-EOF and CoW block auto-reclamation. */
134562306a36Sopenharmony_civoid
134662306a36Sopenharmony_cixfs_blockgc_stop(
134762306a36Sopenharmony_ci	struct xfs_mount	*mp)
134862306a36Sopenharmony_ci{
134962306a36Sopenharmony_ci	struct xfs_perag	*pag;
135062306a36Sopenharmony_ci	xfs_agnumber_t		agno;
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci	if (!xfs_clear_blockgc_enabled(mp))
135362306a36Sopenharmony_ci		return;
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ci	for_each_perag(mp, agno, pag)
135662306a36Sopenharmony_ci		cancel_delayed_work_sync(&pag->pag_blockgc_work);
135762306a36Sopenharmony_ci	trace_xfs_blockgc_stop(mp, __return_address);
135862306a36Sopenharmony_ci}
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci/* Enable post-EOF and CoW block auto-reclamation. */
136162306a36Sopenharmony_civoid
136262306a36Sopenharmony_cixfs_blockgc_start(
136362306a36Sopenharmony_ci	struct xfs_mount	*mp)
136462306a36Sopenharmony_ci{
136562306a36Sopenharmony_ci	struct xfs_perag	*pag;
136662306a36Sopenharmony_ci	xfs_agnumber_t		agno;
136762306a36Sopenharmony_ci
136862306a36Sopenharmony_ci	if (xfs_set_blockgc_enabled(mp))
136962306a36Sopenharmony_ci		return;
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	trace_xfs_blockgc_start(mp, __return_address);
137262306a36Sopenharmony_ci	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
137362306a36Sopenharmony_ci		xfs_blockgc_queue(pag);
137462306a36Sopenharmony_ci}
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci/* Don't try to run block gc on an inode that's in any of these states. */
137762306a36Sopenharmony_ci#define XFS_BLOCKGC_NOGRAB_IFLAGS	(XFS_INEW | \
137862306a36Sopenharmony_ci					 XFS_NEED_INACTIVE | \
137962306a36Sopenharmony_ci					 XFS_INACTIVATING | \
138062306a36Sopenharmony_ci					 XFS_IRECLAIMABLE | \
138162306a36Sopenharmony_ci					 XFS_IRECLAIM)
138262306a36Sopenharmony_ci/*
138362306a36Sopenharmony_ci * Decide if the given @ip is eligible for garbage collection of speculative
138462306a36Sopenharmony_ci * preallocations, and grab it if so.  Returns true if it's ready to go or
138562306a36Sopenharmony_ci * false if we should just ignore it.
138662306a36Sopenharmony_ci */
138762306a36Sopenharmony_cistatic bool
138862306a36Sopenharmony_cixfs_blockgc_igrab(
138962306a36Sopenharmony_ci	struct xfs_inode	*ip)
139062306a36Sopenharmony_ci{
139162306a36Sopenharmony_ci	struct inode		*inode = VFS_I(ip);
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	ASSERT(rcu_read_lock_held());
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci	/* Check for stale RCU freed inode */
139662306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
139762306a36Sopenharmony_ci	if (!ip->i_ino)
139862306a36Sopenharmony_ci		goto out_unlock_noent;
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	if (ip->i_flags & XFS_BLOCKGC_NOGRAB_IFLAGS)
140162306a36Sopenharmony_ci		goto out_unlock_noent;
140262306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci	/* nothing to sync during shutdown */
140562306a36Sopenharmony_ci	if (xfs_is_shutdown(ip->i_mount))
140662306a36Sopenharmony_ci		return false;
140762306a36Sopenharmony_ci
140862306a36Sopenharmony_ci	/* If we can't grab the inode, it must on it's way to reclaim. */
140962306a36Sopenharmony_ci	if (!igrab(inode))
141062306a36Sopenharmony_ci		return false;
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci	/* inode is valid */
141362306a36Sopenharmony_ci	return true;
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ciout_unlock_noent:
141662306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
141762306a36Sopenharmony_ci	return false;
141862306a36Sopenharmony_ci}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci/* Scan one incore inode for block preallocations that we can remove. */
142162306a36Sopenharmony_cistatic int
142262306a36Sopenharmony_cixfs_blockgc_scan_inode(
142362306a36Sopenharmony_ci	struct xfs_inode	*ip,
142462306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
142562306a36Sopenharmony_ci{
142662306a36Sopenharmony_ci	unsigned int		lockflags = 0;
142762306a36Sopenharmony_ci	int			error;
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	error = xfs_inode_free_eofblocks(ip, icw, &lockflags);
143062306a36Sopenharmony_ci	if (error)
143162306a36Sopenharmony_ci		goto unlock;
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_ci	error = xfs_inode_free_cowblocks(ip, icw, &lockflags);
143462306a36Sopenharmony_ciunlock:
143562306a36Sopenharmony_ci	if (lockflags)
143662306a36Sopenharmony_ci		xfs_iunlock(ip, lockflags);
143762306a36Sopenharmony_ci	xfs_irele(ip);
143862306a36Sopenharmony_ci	return error;
143962306a36Sopenharmony_ci}
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci/* Background worker that trims preallocated space. */
144262306a36Sopenharmony_civoid
144362306a36Sopenharmony_cixfs_blockgc_worker(
144462306a36Sopenharmony_ci	struct work_struct	*work)
144562306a36Sopenharmony_ci{
144662306a36Sopenharmony_ci	struct xfs_perag	*pag = container_of(to_delayed_work(work),
144762306a36Sopenharmony_ci					struct xfs_perag, pag_blockgc_work);
144862306a36Sopenharmony_ci	struct xfs_mount	*mp = pag->pag_mount;
144962306a36Sopenharmony_ci	int			error;
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci	trace_xfs_blockgc_worker(mp, __return_address);
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci	error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL);
145462306a36Sopenharmony_ci	if (error)
145562306a36Sopenharmony_ci		xfs_info(mp, "AG %u preallocation gc worker failed, err=%d",
145662306a36Sopenharmony_ci				pag->pag_agno, error);
145762306a36Sopenharmony_ci	xfs_blockgc_queue(pag);
145862306a36Sopenharmony_ci}
145962306a36Sopenharmony_ci
146062306a36Sopenharmony_ci/*
146162306a36Sopenharmony_ci * Try to free space in the filesystem by purging inactive inodes, eofblocks
146262306a36Sopenharmony_ci * and cowblocks.
146362306a36Sopenharmony_ci */
146462306a36Sopenharmony_ciint
146562306a36Sopenharmony_cixfs_blockgc_free_space(
146662306a36Sopenharmony_ci	struct xfs_mount	*mp,
146762306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
146862306a36Sopenharmony_ci{
146962306a36Sopenharmony_ci	int			error;
147062306a36Sopenharmony_ci
147162306a36Sopenharmony_ci	trace_xfs_blockgc_free_space(mp, icw, _RET_IP_);
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	error = xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw);
147462306a36Sopenharmony_ci	if (error)
147562306a36Sopenharmony_ci		return error;
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci	return xfs_inodegc_flush(mp);
147862306a36Sopenharmony_ci}
147962306a36Sopenharmony_ci
148062306a36Sopenharmony_ci/*
148162306a36Sopenharmony_ci * Reclaim all the free space that we can by scheduling the background blockgc
148262306a36Sopenharmony_ci * and inodegc workers immediately and waiting for them all to clear.
148362306a36Sopenharmony_ci */
148462306a36Sopenharmony_ciint
148562306a36Sopenharmony_cixfs_blockgc_flush_all(
148662306a36Sopenharmony_ci	struct xfs_mount	*mp)
148762306a36Sopenharmony_ci{
148862306a36Sopenharmony_ci	struct xfs_perag	*pag;
148962306a36Sopenharmony_ci	xfs_agnumber_t		agno;
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci	trace_xfs_blockgc_flush_all(mp, __return_address);
149262306a36Sopenharmony_ci
149362306a36Sopenharmony_ci	/*
149462306a36Sopenharmony_ci	 * For each blockgc worker, move its queue time up to now.  If it
149562306a36Sopenharmony_ci	 * wasn't queued, it will not be requeued.  Then flush whatever's
149662306a36Sopenharmony_ci	 * left.
149762306a36Sopenharmony_ci	 */
149862306a36Sopenharmony_ci	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
149962306a36Sopenharmony_ci		mod_delayed_work(pag->pag_mount->m_blockgc_wq,
150062306a36Sopenharmony_ci				&pag->pag_blockgc_work, 0);
150162306a36Sopenharmony_ci
150262306a36Sopenharmony_ci	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
150362306a36Sopenharmony_ci		flush_delayed_work(&pag->pag_blockgc_work);
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	return xfs_inodegc_flush(mp);
150662306a36Sopenharmony_ci}
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci/*
150962306a36Sopenharmony_ci * Run cow/eofblocks scans on the supplied dquots.  We don't know exactly which
151062306a36Sopenharmony_ci * quota caused an allocation failure, so we make a best effort by including
151162306a36Sopenharmony_ci * each quota under low free space conditions (less than 1% free space) in the
151262306a36Sopenharmony_ci * scan.
151362306a36Sopenharmony_ci *
151462306a36Sopenharmony_ci * Callers must not hold any inode's ILOCK.  If requesting a synchronous scan
151562306a36Sopenharmony_ci * (XFS_ICWALK_FLAG_SYNC), the caller also must not hold any inode's IOLOCK or
151662306a36Sopenharmony_ci * MMAPLOCK.
151762306a36Sopenharmony_ci */
151862306a36Sopenharmony_ciint
151962306a36Sopenharmony_cixfs_blockgc_free_dquots(
152062306a36Sopenharmony_ci	struct xfs_mount	*mp,
152162306a36Sopenharmony_ci	struct xfs_dquot	*udqp,
152262306a36Sopenharmony_ci	struct xfs_dquot	*gdqp,
152362306a36Sopenharmony_ci	struct xfs_dquot	*pdqp,
152462306a36Sopenharmony_ci	unsigned int		iwalk_flags)
152562306a36Sopenharmony_ci{
152662306a36Sopenharmony_ci	struct xfs_icwalk	icw = {0};
152762306a36Sopenharmony_ci	bool			do_work = false;
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	if (!udqp && !gdqp && !pdqp)
153062306a36Sopenharmony_ci		return 0;
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci	/*
153362306a36Sopenharmony_ci	 * Run a scan to free blocks using the union filter to cover all
153462306a36Sopenharmony_ci	 * applicable quotas in a single scan.
153562306a36Sopenharmony_ci	 */
153662306a36Sopenharmony_ci	icw.icw_flags = XFS_ICWALK_FLAG_UNION | iwalk_flags;
153762306a36Sopenharmony_ci
153862306a36Sopenharmony_ci	if (XFS_IS_UQUOTA_ENFORCED(mp) && udqp && xfs_dquot_lowsp(udqp)) {
153962306a36Sopenharmony_ci		icw.icw_uid = make_kuid(mp->m_super->s_user_ns, udqp->q_id);
154062306a36Sopenharmony_ci		icw.icw_flags |= XFS_ICWALK_FLAG_UID;
154162306a36Sopenharmony_ci		do_work = true;
154262306a36Sopenharmony_ci	}
154362306a36Sopenharmony_ci
154462306a36Sopenharmony_ci	if (XFS_IS_UQUOTA_ENFORCED(mp) && gdqp && xfs_dquot_lowsp(gdqp)) {
154562306a36Sopenharmony_ci		icw.icw_gid = make_kgid(mp->m_super->s_user_ns, gdqp->q_id);
154662306a36Sopenharmony_ci		icw.icw_flags |= XFS_ICWALK_FLAG_GID;
154762306a36Sopenharmony_ci		do_work = true;
154862306a36Sopenharmony_ci	}
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_ci	if (XFS_IS_PQUOTA_ENFORCED(mp) && pdqp && xfs_dquot_lowsp(pdqp)) {
155162306a36Sopenharmony_ci		icw.icw_prid = pdqp->q_id;
155262306a36Sopenharmony_ci		icw.icw_flags |= XFS_ICWALK_FLAG_PRID;
155362306a36Sopenharmony_ci		do_work = true;
155462306a36Sopenharmony_ci	}
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci	if (!do_work)
155762306a36Sopenharmony_ci		return 0;
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_ci	return xfs_blockgc_free_space(mp, &icw);
156062306a36Sopenharmony_ci}
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci/* Run cow/eofblocks scans on the quotas attached to the inode. */
156362306a36Sopenharmony_ciint
156462306a36Sopenharmony_cixfs_blockgc_free_quota(
156562306a36Sopenharmony_ci	struct xfs_inode	*ip,
156662306a36Sopenharmony_ci	unsigned int		iwalk_flags)
156762306a36Sopenharmony_ci{
156862306a36Sopenharmony_ci	return xfs_blockgc_free_dquots(ip->i_mount,
156962306a36Sopenharmony_ci			xfs_inode_dquot(ip, XFS_DQTYPE_USER),
157062306a36Sopenharmony_ci			xfs_inode_dquot(ip, XFS_DQTYPE_GROUP),
157162306a36Sopenharmony_ci			xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), iwalk_flags);
157262306a36Sopenharmony_ci}
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ci/* XFS Inode Cache Walking Code */
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci/*
157762306a36Sopenharmony_ci * The inode lookup is done in batches to keep the amount of lock traffic and
157862306a36Sopenharmony_ci * radix tree lookups to a minimum. The batch size is a trade off between
157962306a36Sopenharmony_ci * lookup reduction and stack usage. This is in the reclaim path, so we can't
158062306a36Sopenharmony_ci * be too greedy.
158162306a36Sopenharmony_ci */
158262306a36Sopenharmony_ci#define XFS_LOOKUP_BATCH	32
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_ci
158562306a36Sopenharmony_ci/*
158662306a36Sopenharmony_ci * Decide if we want to grab this inode in anticipation of doing work towards
158762306a36Sopenharmony_ci * the goal.
158862306a36Sopenharmony_ci */
158962306a36Sopenharmony_cistatic inline bool
159062306a36Sopenharmony_cixfs_icwalk_igrab(
159162306a36Sopenharmony_ci	enum xfs_icwalk_goal	goal,
159262306a36Sopenharmony_ci	struct xfs_inode	*ip,
159362306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
159462306a36Sopenharmony_ci{
159562306a36Sopenharmony_ci	switch (goal) {
159662306a36Sopenharmony_ci	case XFS_ICWALK_BLOCKGC:
159762306a36Sopenharmony_ci		return xfs_blockgc_igrab(ip);
159862306a36Sopenharmony_ci	case XFS_ICWALK_RECLAIM:
159962306a36Sopenharmony_ci		return xfs_reclaim_igrab(ip, icw);
160062306a36Sopenharmony_ci	default:
160162306a36Sopenharmony_ci		return false;
160262306a36Sopenharmony_ci	}
160362306a36Sopenharmony_ci}
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci/*
160662306a36Sopenharmony_ci * Process an inode.  Each processing function must handle any state changes
160762306a36Sopenharmony_ci * made by the icwalk igrab function.  Return -EAGAIN to skip an inode.
160862306a36Sopenharmony_ci */
160962306a36Sopenharmony_cistatic inline int
161062306a36Sopenharmony_cixfs_icwalk_process_inode(
161162306a36Sopenharmony_ci	enum xfs_icwalk_goal	goal,
161262306a36Sopenharmony_ci	struct xfs_inode	*ip,
161362306a36Sopenharmony_ci	struct xfs_perag	*pag,
161462306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
161562306a36Sopenharmony_ci{
161662306a36Sopenharmony_ci	int			error = 0;
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	switch (goal) {
161962306a36Sopenharmony_ci	case XFS_ICWALK_BLOCKGC:
162062306a36Sopenharmony_ci		error = xfs_blockgc_scan_inode(ip, icw);
162162306a36Sopenharmony_ci		break;
162262306a36Sopenharmony_ci	case XFS_ICWALK_RECLAIM:
162362306a36Sopenharmony_ci		xfs_reclaim_inode(ip, pag);
162462306a36Sopenharmony_ci		break;
162562306a36Sopenharmony_ci	}
162662306a36Sopenharmony_ci	return error;
162762306a36Sopenharmony_ci}
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_ci/*
163062306a36Sopenharmony_ci * For a given per-AG structure @pag and a goal, grab qualifying inodes and
163162306a36Sopenharmony_ci * process them in some manner.
163262306a36Sopenharmony_ci */
163362306a36Sopenharmony_cistatic int
163462306a36Sopenharmony_cixfs_icwalk_ag(
163562306a36Sopenharmony_ci	struct xfs_perag	*pag,
163662306a36Sopenharmony_ci	enum xfs_icwalk_goal	goal,
163762306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
163862306a36Sopenharmony_ci{
163962306a36Sopenharmony_ci	struct xfs_mount	*mp = pag->pag_mount;
164062306a36Sopenharmony_ci	uint32_t		first_index;
164162306a36Sopenharmony_ci	int			last_error = 0;
164262306a36Sopenharmony_ci	int			skipped;
164362306a36Sopenharmony_ci	bool			done;
164462306a36Sopenharmony_ci	int			nr_found;
164562306a36Sopenharmony_ci
164662306a36Sopenharmony_cirestart:
164762306a36Sopenharmony_ci	done = false;
164862306a36Sopenharmony_ci	skipped = 0;
164962306a36Sopenharmony_ci	if (goal == XFS_ICWALK_RECLAIM)
165062306a36Sopenharmony_ci		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
165162306a36Sopenharmony_ci	else
165262306a36Sopenharmony_ci		first_index = 0;
165362306a36Sopenharmony_ci	nr_found = 0;
165462306a36Sopenharmony_ci	do {
165562306a36Sopenharmony_ci		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
165662306a36Sopenharmony_ci		int		error = 0;
165762306a36Sopenharmony_ci		int		i;
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci		rcu_read_lock();
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_ci		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
166262306a36Sopenharmony_ci				(void **) batch, first_index,
166362306a36Sopenharmony_ci				XFS_LOOKUP_BATCH, goal);
166462306a36Sopenharmony_ci		if (!nr_found) {
166562306a36Sopenharmony_ci			done = true;
166662306a36Sopenharmony_ci			rcu_read_unlock();
166762306a36Sopenharmony_ci			break;
166862306a36Sopenharmony_ci		}
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_ci		/*
167162306a36Sopenharmony_ci		 * Grab the inodes before we drop the lock. if we found
167262306a36Sopenharmony_ci		 * nothing, nr == 0 and the loop will be skipped.
167362306a36Sopenharmony_ci		 */
167462306a36Sopenharmony_ci		for (i = 0; i < nr_found; i++) {
167562306a36Sopenharmony_ci			struct xfs_inode *ip = batch[i];
167662306a36Sopenharmony_ci
167762306a36Sopenharmony_ci			if (done || !xfs_icwalk_igrab(goal, ip, icw))
167862306a36Sopenharmony_ci				batch[i] = NULL;
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci			/*
168162306a36Sopenharmony_ci			 * Update the index for the next lookup. Catch
168262306a36Sopenharmony_ci			 * overflows into the next AG range which can occur if
168362306a36Sopenharmony_ci			 * we have inodes in the last block of the AG and we
168462306a36Sopenharmony_ci			 * are currently pointing to the last inode.
168562306a36Sopenharmony_ci			 *
168662306a36Sopenharmony_ci			 * Because we may see inodes that are from the wrong AG
168762306a36Sopenharmony_ci			 * due to RCU freeing and reallocation, only update the
168862306a36Sopenharmony_ci			 * index if it lies in this AG. It was a race that lead
168962306a36Sopenharmony_ci			 * us to see this inode, so another lookup from the
169062306a36Sopenharmony_ci			 * same index will not find it again.
169162306a36Sopenharmony_ci			 */
169262306a36Sopenharmony_ci			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
169362306a36Sopenharmony_ci				continue;
169462306a36Sopenharmony_ci			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
169562306a36Sopenharmony_ci			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
169662306a36Sopenharmony_ci				done = true;
169762306a36Sopenharmony_ci		}
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci		/* unlock now we've grabbed the inodes. */
170062306a36Sopenharmony_ci		rcu_read_unlock();
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci		for (i = 0; i < nr_found; i++) {
170362306a36Sopenharmony_ci			if (!batch[i])
170462306a36Sopenharmony_ci				continue;
170562306a36Sopenharmony_ci			error = xfs_icwalk_process_inode(goal, batch[i], pag,
170662306a36Sopenharmony_ci					icw);
170762306a36Sopenharmony_ci			if (error == -EAGAIN) {
170862306a36Sopenharmony_ci				skipped++;
170962306a36Sopenharmony_ci				continue;
171062306a36Sopenharmony_ci			}
171162306a36Sopenharmony_ci			if (error && last_error != -EFSCORRUPTED)
171262306a36Sopenharmony_ci				last_error = error;
171362306a36Sopenharmony_ci		}
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci		/* bail out if the filesystem is corrupted.  */
171662306a36Sopenharmony_ci		if (error == -EFSCORRUPTED)
171762306a36Sopenharmony_ci			break;
171862306a36Sopenharmony_ci
171962306a36Sopenharmony_ci		cond_resched();
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_ci		if (icw && (icw->icw_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
172262306a36Sopenharmony_ci			icw->icw_scan_limit -= XFS_LOOKUP_BATCH;
172362306a36Sopenharmony_ci			if (icw->icw_scan_limit <= 0)
172462306a36Sopenharmony_ci				break;
172562306a36Sopenharmony_ci		}
172662306a36Sopenharmony_ci	} while (nr_found && !done);
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	if (goal == XFS_ICWALK_RECLAIM) {
172962306a36Sopenharmony_ci		if (done)
173062306a36Sopenharmony_ci			first_index = 0;
173162306a36Sopenharmony_ci		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
173262306a36Sopenharmony_ci	}
173362306a36Sopenharmony_ci
173462306a36Sopenharmony_ci	if (skipped) {
173562306a36Sopenharmony_ci		delay(1);
173662306a36Sopenharmony_ci		goto restart;
173762306a36Sopenharmony_ci	}
173862306a36Sopenharmony_ci	return last_error;
173962306a36Sopenharmony_ci}
174062306a36Sopenharmony_ci
174162306a36Sopenharmony_ci/* Walk all incore inodes to achieve a given goal. */
174262306a36Sopenharmony_cistatic int
174362306a36Sopenharmony_cixfs_icwalk(
174462306a36Sopenharmony_ci	struct xfs_mount	*mp,
174562306a36Sopenharmony_ci	enum xfs_icwalk_goal	goal,
174662306a36Sopenharmony_ci	struct xfs_icwalk	*icw)
174762306a36Sopenharmony_ci{
174862306a36Sopenharmony_ci	struct xfs_perag	*pag;
174962306a36Sopenharmony_ci	int			error = 0;
175062306a36Sopenharmony_ci	int			last_error = 0;
175162306a36Sopenharmony_ci	xfs_agnumber_t		agno;
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_ci	for_each_perag_tag(mp, agno, pag, goal) {
175462306a36Sopenharmony_ci		error = xfs_icwalk_ag(pag, goal, icw);
175562306a36Sopenharmony_ci		if (error) {
175662306a36Sopenharmony_ci			last_error = error;
175762306a36Sopenharmony_ci			if (error == -EFSCORRUPTED) {
175862306a36Sopenharmony_ci				xfs_perag_rele(pag);
175962306a36Sopenharmony_ci				break;
176062306a36Sopenharmony_ci			}
176162306a36Sopenharmony_ci		}
176262306a36Sopenharmony_ci	}
176362306a36Sopenharmony_ci	return last_error;
176462306a36Sopenharmony_ci	BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_ICWALK_FLAGS_VALID);
176562306a36Sopenharmony_ci}
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_ci#ifdef DEBUG
176862306a36Sopenharmony_cistatic void
176962306a36Sopenharmony_cixfs_check_delalloc(
177062306a36Sopenharmony_ci	struct xfs_inode	*ip,
177162306a36Sopenharmony_ci	int			whichfork)
177262306a36Sopenharmony_ci{
177362306a36Sopenharmony_ci	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
177462306a36Sopenharmony_ci	struct xfs_bmbt_irec	got;
177562306a36Sopenharmony_ci	struct xfs_iext_cursor	icur;
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ci	if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
177862306a36Sopenharmony_ci		return;
177962306a36Sopenharmony_ci	do {
178062306a36Sopenharmony_ci		if (isnullstartblock(got.br_startblock)) {
178162306a36Sopenharmony_ci			xfs_warn(ip->i_mount,
178262306a36Sopenharmony_ci	"ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
178362306a36Sopenharmony_ci				ip->i_ino,
178462306a36Sopenharmony_ci				whichfork == XFS_DATA_FORK ? "data" : "cow",
178562306a36Sopenharmony_ci				got.br_startoff, got.br_blockcount);
178662306a36Sopenharmony_ci		}
178762306a36Sopenharmony_ci	} while (xfs_iext_next_extent(ifp, &icur, &got));
178862306a36Sopenharmony_ci}
178962306a36Sopenharmony_ci#else
179062306a36Sopenharmony_ci#define xfs_check_delalloc(ip, whichfork)	do { } while (0)
179162306a36Sopenharmony_ci#endif
179262306a36Sopenharmony_ci
179362306a36Sopenharmony_ci/* Schedule the inode for reclaim. */
179462306a36Sopenharmony_cistatic void
179562306a36Sopenharmony_cixfs_inodegc_set_reclaimable(
179662306a36Sopenharmony_ci	struct xfs_inode	*ip)
179762306a36Sopenharmony_ci{
179862306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
179962306a36Sopenharmony_ci	struct xfs_perag	*pag;
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	if (!xfs_is_shutdown(mp) && ip->i_delayed_blks) {
180262306a36Sopenharmony_ci		xfs_check_delalloc(ip, XFS_DATA_FORK);
180362306a36Sopenharmony_ci		xfs_check_delalloc(ip, XFS_COW_FORK);
180462306a36Sopenharmony_ci		ASSERT(0);
180562306a36Sopenharmony_ci	}
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
180862306a36Sopenharmony_ci	spin_lock(&pag->pag_ici_lock);
180962306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
181062306a36Sopenharmony_ci
181162306a36Sopenharmony_ci	trace_xfs_inode_set_reclaimable(ip);
181262306a36Sopenharmony_ci	ip->i_flags &= ~(XFS_NEED_INACTIVE | XFS_INACTIVATING);
181362306a36Sopenharmony_ci	ip->i_flags |= XFS_IRECLAIMABLE;
181462306a36Sopenharmony_ci	xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
181562306a36Sopenharmony_ci			XFS_ICI_RECLAIM_TAG);
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
181862306a36Sopenharmony_ci	spin_unlock(&pag->pag_ici_lock);
181962306a36Sopenharmony_ci	xfs_perag_put(pag);
182062306a36Sopenharmony_ci}
182162306a36Sopenharmony_ci
182262306a36Sopenharmony_ci/*
182362306a36Sopenharmony_ci * Free all speculative preallocations and possibly even the inode itself.
182462306a36Sopenharmony_ci * This is the last chance to make changes to an otherwise unreferenced file
182562306a36Sopenharmony_ci * before incore reclamation happens.
182662306a36Sopenharmony_ci */
182762306a36Sopenharmony_cistatic int
182862306a36Sopenharmony_cixfs_inodegc_inactivate(
182962306a36Sopenharmony_ci	struct xfs_inode	*ip)
183062306a36Sopenharmony_ci{
183162306a36Sopenharmony_ci	int			error;
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci	trace_xfs_inode_inactivating(ip);
183462306a36Sopenharmony_ci	error = xfs_inactive(ip);
183562306a36Sopenharmony_ci	xfs_inodegc_set_reclaimable(ip);
183662306a36Sopenharmony_ci	return error;
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci}
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_civoid
184162306a36Sopenharmony_cixfs_inodegc_worker(
184262306a36Sopenharmony_ci	struct work_struct	*work)
184362306a36Sopenharmony_ci{
184462306a36Sopenharmony_ci	struct xfs_inodegc	*gc = container_of(to_delayed_work(work),
184562306a36Sopenharmony_ci						struct xfs_inodegc, work);
184662306a36Sopenharmony_ci	struct llist_node	*node = llist_del_all(&gc->list);
184762306a36Sopenharmony_ci	struct xfs_inode	*ip, *n;
184862306a36Sopenharmony_ci	struct xfs_mount	*mp = gc->mp;
184962306a36Sopenharmony_ci	unsigned int		nofs_flag;
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci	/*
185262306a36Sopenharmony_ci	 * Clear the cpu mask bit and ensure that we have seen the latest
185362306a36Sopenharmony_ci	 * update of the gc structure associated with this CPU. This matches
185462306a36Sopenharmony_ci	 * with the release semantics used when setting the cpumask bit in
185562306a36Sopenharmony_ci	 * xfs_inodegc_queue.
185662306a36Sopenharmony_ci	 */
185762306a36Sopenharmony_ci	cpumask_clear_cpu(gc->cpu, &mp->m_inodegc_cpumask);
185862306a36Sopenharmony_ci	smp_mb__after_atomic();
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci	WRITE_ONCE(gc->items, 0);
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_ci	if (!node)
186362306a36Sopenharmony_ci		return;
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci	/*
186662306a36Sopenharmony_ci	 * We can allocate memory here while doing writeback on behalf of
186762306a36Sopenharmony_ci	 * memory reclaim.  To avoid memory allocation deadlocks set the
186862306a36Sopenharmony_ci	 * task-wide nofs context for the following operations.
186962306a36Sopenharmony_ci	 */
187062306a36Sopenharmony_ci	nofs_flag = memalloc_nofs_save();
187162306a36Sopenharmony_ci
187262306a36Sopenharmony_ci	ip = llist_entry(node, struct xfs_inode, i_gclist);
187362306a36Sopenharmony_ci	trace_xfs_inodegc_worker(mp, READ_ONCE(gc->shrinker_hits));
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci	WRITE_ONCE(gc->shrinker_hits, 0);
187662306a36Sopenharmony_ci	llist_for_each_entry_safe(ip, n, node, i_gclist) {
187762306a36Sopenharmony_ci		int	error;
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_ci		xfs_iflags_set(ip, XFS_INACTIVATING);
188062306a36Sopenharmony_ci		error = xfs_inodegc_inactivate(ip);
188162306a36Sopenharmony_ci		if (error && !gc->error)
188262306a36Sopenharmony_ci			gc->error = error;
188362306a36Sopenharmony_ci	}
188462306a36Sopenharmony_ci
188562306a36Sopenharmony_ci	memalloc_nofs_restore(nofs_flag);
188662306a36Sopenharmony_ci}
188762306a36Sopenharmony_ci
188862306a36Sopenharmony_ci/*
188962306a36Sopenharmony_ci * Expedite all pending inodegc work to run immediately. This does not wait for
189062306a36Sopenharmony_ci * completion of the work.
189162306a36Sopenharmony_ci */
189262306a36Sopenharmony_civoid
189362306a36Sopenharmony_cixfs_inodegc_push(
189462306a36Sopenharmony_ci	struct xfs_mount	*mp)
189562306a36Sopenharmony_ci{
189662306a36Sopenharmony_ci	if (!xfs_is_inodegc_enabled(mp))
189762306a36Sopenharmony_ci		return;
189862306a36Sopenharmony_ci	trace_xfs_inodegc_push(mp, __return_address);
189962306a36Sopenharmony_ci	xfs_inodegc_queue_all(mp);
190062306a36Sopenharmony_ci}
190162306a36Sopenharmony_ci
190262306a36Sopenharmony_ci/*
190362306a36Sopenharmony_ci * Force all currently queued inode inactivation work to run immediately and
190462306a36Sopenharmony_ci * wait for the work to finish.
190562306a36Sopenharmony_ci */
190662306a36Sopenharmony_ciint
190762306a36Sopenharmony_cixfs_inodegc_flush(
190862306a36Sopenharmony_ci	struct xfs_mount	*mp)
190962306a36Sopenharmony_ci{
191062306a36Sopenharmony_ci	xfs_inodegc_push(mp);
191162306a36Sopenharmony_ci	trace_xfs_inodegc_flush(mp, __return_address);
191262306a36Sopenharmony_ci	return xfs_inodegc_wait_all(mp);
191362306a36Sopenharmony_ci}
191462306a36Sopenharmony_ci
191562306a36Sopenharmony_ci/*
191662306a36Sopenharmony_ci * Flush all the pending work and then disable the inode inactivation background
191762306a36Sopenharmony_ci * workers and wait for them to stop.  Caller must hold sb->s_umount to
191862306a36Sopenharmony_ci * coordinate changes in the inodegc_enabled state.
191962306a36Sopenharmony_ci */
192062306a36Sopenharmony_civoid
192162306a36Sopenharmony_cixfs_inodegc_stop(
192262306a36Sopenharmony_ci	struct xfs_mount	*mp)
192362306a36Sopenharmony_ci{
192462306a36Sopenharmony_ci	bool			rerun;
192562306a36Sopenharmony_ci
192662306a36Sopenharmony_ci	if (!xfs_clear_inodegc_enabled(mp))
192762306a36Sopenharmony_ci		return;
192862306a36Sopenharmony_ci
192962306a36Sopenharmony_ci	/*
193062306a36Sopenharmony_ci	 * Drain all pending inodegc work, including inodes that could be
193162306a36Sopenharmony_ci	 * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
193262306a36Sopenharmony_ci	 * threads that sample the inodegc state just prior to us clearing it.
193362306a36Sopenharmony_ci	 * The inodegc flag state prevents new threads from queuing more
193462306a36Sopenharmony_ci	 * inodes, so we queue pending work items and flush the workqueue until
193562306a36Sopenharmony_ci	 * all inodegc lists are empty.  IOWs, we cannot use drain_workqueue
193662306a36Sopenharmony_ci	 * here because it does not allow other unserialized mechanisms to
193762306a36Sopenharmony_ci	 * reschedule inodegc work while this draining is in progress.
193862306a36Sopenharmony_ci	 */
193962306a36Sopenharmony_ci	xfs_inodegc_queue_all(mp);
194062306a36Sopenharmony_ci	do {
194162306a36Sopenharmony_ci		flush_workqueue(mp->m_inodegc_wq);
194262306a36Sopenharmony_ci		rerun = xfs_inodegc_queue_all(mp);
194362306a36Sopenharmony_ci	} while (rerun);
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_ci	trace_xfs_inodegc_stop(mp, __return_address);
194662306a36Sopenharmony_ci}
194762306a36Sopenharmony_ci
194862306a36Sopenharmony_ci/*
194962306a36Sopenharmony_ci * Enable the inode inactivation background workers and schedule deferred inode
195062306a36Sopenharmony_ci * inactivation work if there is any.  Caller must hold sb->s_umount to
195162306a36Sopenharmony_ci * coordinate changes in the inodegc_enabled state.
195262306a36Sopenharmony_ci */
195362306a36Sopenharmony_civoid
195462306a36Sopenharmony_cixfs_inodegc_start(
195562306a36Sopenharmony_ci	struct xfs_mount	*mp)
195662306a36Sopenharmony_ci{
195762306a36Sopenharmony_ci	if (xfs_set_inodegc_enabled(mp))
195862306a36Sopenharmony_ci		return;
195962306a36Sopenharmony_ci
196062306a36Sopenharmony_ci	trace_xfs_inodegc_start(mp, __return_address);
196162306a36Sopenharmony_ci	xfs_inodegc_queue_all(mp);
196262306a36Sopenharmony_ci}
196362306a36Sopenharmony_ci
196462306a36Sopenharmony_ci#ifdef CONFIG_XFS_RT
196562306a36Sopenharmony_cistatic inline bool
196662306a36Sopenharmony_cixfs_inodegc_want_queue_rt_file(
196762306a36Sopenharmony_ci	struct xfs_inode	*ip)
196862306a36Sopenharmony_ci{
196962306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
197062306a36Sopenharmony_ci
197162306a36Sopenharmony_ci	if (!XFS_IS_REALTIME_INODE(ip))
197262306a36Sopenharmony_ci		return false;
197362306a36Sopenharmony_ci
197462306a36Sopenharmony_ci	if (__percpu_counter_compare(&mp->m_frextents,
197562306a36Sopenharmony_ci				mp->m_low_rtexts[XFS_LOWSP_5_PCNT],
197662306a36Sopenharmony_ci				XFS_FDBLOCKS_BATCH) < 0)
197762306a36Sopenharmony_ci		return true;
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_ci	return false;
198062306a36Sopenharmony_ci}
198162306a36Sopenharmony_ci#else
198262306a36Sopenharmony_ci# define xfs_inodegc_want_queue_rt_file(ip)	(false)
198362306a36Sopenharmony_ci#endif /* CONFIG_XFS_RT */
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci/*
198662306a36Sopenharmony_ci * Schedule the inactivation worker when:
198762306a36Sopenharmony_ci *
198862306a36Sopenharmony_ci *  - We've accumulated more than one inode cluster buffer's worth of inodes.
198962306a36Sopenharmony_ci *  - There is less than 5% free space left.
199062306a36Sopenharmony_ci *  - Any of the quotas for this inode are near an enforcement limit.
199162306a36Sopenharmony_ci */
199262306a36Sopenharmony_cistatic inline bool
199362306a36Sopenharmony_cixfs_inodegc_want_queue_work(
199462306a36Sopenharmony_ci	struct xfs_inode	*ip,
199562306a36Sopenharmony_ci	unsigned int		items)
199662306a36Sopenharmony_ci{
199762306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci	if (items > mp->m_ino_geo.inodes_per_cluster)
200062306a36Sopenharmony_ci		return true;
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci	if (__percpu_counter_compare(&mp->m_fdblocks,
200362306a36Sopenharmony_ci				mp->m_low_space[XFS_LOWSP_5_PCNT],
200462306a36Sopenharmony_ci				XFS_FDBLOCKS_BATCH) < 0)
200562306a36Sopenharmony_ci		return true;
200662306a36Sopenharmony_ci
200762306a36Sopenharmony_ci	if (xfs_inodegc_want_queue_rt_file(ip))
200862306a36Sopenharmony_ci		return true;
200962306a36Sopenharmony_ci
201062306a36Sopenharmony_ci	if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_USER))
201162306a36Sopenharmony_ci		return true;
201262306a36Sopenharmony_ci
201362306a36Sopenharmony_ci	if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_GROUP))
201462306a36Sopenharmony_ci		return true;
201562306a36Sopenharmony_ci
201662306a36Sopenharmony_ci	if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_PROJ))
201762306a36Sopenharmony_ci		return true;
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_ci	return false;
202062306a36Sopenharmony_ci}
202162306a36Sopenharmony_ci
202262306a36Sopenharmony_ci/*
202362306a36Sopenharmony_ci * Upper bound on the number of inodes in each AG that can be queued for
202462306a36Sopenharmony_ci * inactivation at any given time, to avoid monopolizing the workqueue.
202562306a36Sopenharmony_ci */
202662306a36Sopenharmony_ci#define XFS_INODEGC_MAX_BACKLOG		(4 * XFS_INODES_PER_CHUNK)
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci/*
202962306a36Sopenharmony_ci * Make the frontend wait for inactivations when:
203062306a36Sopenharmony_ci *
203162306a36Sopenharmony_ci *  - Memory shrinkers queued the inactivation worker and it hasn't finished.
203262306a36Sopenharmony_ci *  - The queue depth exceeds the maximum allowable percpu backlog.
203362306a36Sopenharmony_ci *
203462306a36Sopenharmony_ci * Note: If the current thread is running a transaction, we don't ever want to
203562306a36Sopenharmony_ci * wait for other transactions because that could introduce a deadlock.
203662306a36Sopenharmony_ci */
203762306a36Sopenharmony_cistatic inline bool
203862306a36Sopenharmony_cixfs_inodegc_want_flush_work(
203962306a36Sopenharmony_ci	struct xfs_inode	*ip,
204062306a36Sopenharmony_ci	unsigned int		items,
204162306a36Sopenharmony_ci	unsigned int		shrinker_hits)
204262306a36Sopenharmony_ci{
204362306a36Sopenharmony_ci	if (current->journal_info)
204462306a36Sopenharmony_ci		return false;
204562306a36Sopenharmony_ci
204662306a36Sopenharmony_ci	if (shrinker_hits > 0)
204762306a36Sopenharmony_ci		return true;
204862306a36Sopenharmony_ci
204962306a36Sopenharmony_ci	if (items > XFS_INODEGC_MAX_BACKLOG)
205062306a36Sopenharmony_ci		return true;
205162306a36Sopenharmony_ci
205262306a36Sopenharmony_ci	return false;
205362306a36Sopenharmony_ci}
205462306a36Sopenharmony_ci
205562306a36Sopenharmony_ci/*
205662306a36Sopenharmony_ci * Queue a background inactivation worker if there are inodes that need to be
205762306a36Sopenharmony_ci * inactivated and higher level xfs code hasn't disabled the background
205862306a36Sopenharmony_ci * workers.
205962306a36Sopenharmony_ci */
206062306a36Sopenharmony_cistatic void
206162306a36Sopenharmony_cixfs_inodegc_queue(
206262306a36Sopenharmony_ci	struct xfs_inode	*ip)
206362306a36Sopenharmony_ci{
206462306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
206562306a36Sopenharmony_ci	struct xfs_inodegc	*gc;
206662306a36Sopenharmony_ci	int			items;
206762306a36Sopenharmony_ci	unsigned int		shrinker_hits;
206862306a36Sopenharmony_ci	unsigned int		cpu_nr;
206962306a36Sopenharmony_ci	unsigned long		queue_delay = 1;
207062306a36Sopenharmony_ci
207162306a36Sopenharmony_ci	trace_xfs_inode_set_need_inactive(ip);
207262306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
207362306a36Sopenharmony_ci	ip->i_flags |= XFS_NEED_INACTIVE;
207462306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
207562306a36Sopenharmony_ci
207662306a36Sopenharmony_ci	cpu_nr = get_cpu();
207762306a36Sopenharmony_ci	gc = this_cpu_ptr(mp->m_inodegc);
207862306a36Sopenharmony_ci	llist_add(&ip->i_gclist, &gc->list);
207962306a36Sopenharmony_ci	items = READ_ONCE(gc->items);
208062306a36Sopenharmony_ci	WRITE_ONCE(gc->items, items + 1);
208162306a36Sopenharmony_ci	shrinker_hits = READ_ONCE(gc->shrinker_hits);
208262306a36Sopenharmony_ci
208362306a36Sopenharmony_ci	/*
208462306a36Sopenharmony_ci	 * Ensure the list add is always seen by anyone who finds the cpumask
208562306a36Sopenharmony_ci	 * bit set. This effectively gives the cpumask bit set operation
208662306a36Sopenharmony_ci	 * release ordering semantics.
208762306a36Sopenharmony_ci	 */
208862306a36Sopenharmony_ci	smp_mb__before_atomic();
208962306a36Sopenharmony_ci	if (!cpumask_test_cpu(cpu_nr, &mp->m_inodegc_cpumask))
209062306a36Sopenharmony_ci		cpumask_test_and_set_cpu(cpu_nr, &mp->m_inodegc_cpumask);
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_ci	/*
209362306a36Sopenharmony_ci	 * We queue the work while holding the current CPU so that the work
209462306a36Sopenharmony_ci	 * is scheduled to run on this CPU.
209562306a36Sopenharmony_ci	 */
209662306a36Sopenharmony_ci	if (!xfs_is_inodegc_enabled(mp)) {
209762306a36Sopenharmony_ci		put_cpu();
209862306a36Sopenharmony_ci		return;
209962306a36Sopenharmony_ci	}
210062306a36Sopenharmony_ci
210162306a36Sopenharmony_ci	if (xfs_inodegc_want_queue_work(ip, items))
210262306a36Sopenharmony_ci		queue_delay = 0;
210362306a36Sopenharmony_ci
210462306a36Sopenharmony_ci	trace_xfs_inodegc_queue(mp, __return_address);
210562306a36Sopenharmony_ci	mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
210662306a36Sopenharmony_ci			queue_delay);
210762306a36Sopenharmony_ci	put_cpu();
210862306a36Sopenharmony_ci
210962306a36Sopenharmony_ci	if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
211062306a36Sopenharmony_ci		trace_xfs_inodegc_throttle(mp, __return_address);
211162306a36Sopenharmony_ci		flush_delayed_work(&gc->work);
211262306a36Sopenharmony_ci	}
211362306a36Sopenharmony_ci}
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci/*
211662306a36Sopenharmony_ci * We set the inode flag atomically with the radix tree tag.  Once we get tag
211762306a36Sopenharmony_ci * lookups on the radix tree, this inode flag can go away.
211862306a36Sopenharmony_ci *
211962306a36Sopenharmony_ci * We always use background reclaim here because even if the inode is clean, it
212062306a36Sopenharmony_ci * still may be under IO and hence we have wait for IO completion to occur
212162306a36Sopenharmony_ci * before we can reclaim the inode. The background reclaim path handles this
212262306a36Sopenharmony_ci * more efficiently than we can here, so simply let background reclaim tear down
212362306a36Sopenharmony_ci * all inodes.
212462306a36Sopenharmony_ci */
212562306a36Sopenharmony_civoid
212662306a36Sopenharmony_cixfs_inode_mark_reclaimable(
212762306a36Sopenharmony_ci	struct xfs_inode	*ip)
212862306a36Sopenharmony_ci{
212962306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
213062306a36Sopenharmony_ci	bool			need_inactive;
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci	XFS_STATS_INC(mp, vn_reclaim);
213362306a36Sopenharmony_ci
213462306a36Sopenharmony_ci	/*
213562306a36Sopenharmony_ci	 * We should never get here with any of the reclaim flags already set.
213662306a36Sopenharmony_ci	 */
213762306a36Sopenharmony_ci	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_ALL_IRECLAIM_FLAGS));
213862306a36Sopenharmony_ci
213962306a36Sopenharmony_ci	need_inactive = xfs_inode_needs_inactive(ip);
214062306a36Sopenharmony_ci	if (need_inactive) {
214162306a36Sopenharmony_ci		xfs_inodegc_queue(ip);
214262306a36Sopenharmony_ci		return;
214362306a36Sopenharmony_ci	}
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	/* Going straight to reclaim, so drop the dquots. */
214662306a36Sopenharmony_ci	xfs_qm_dqdetach(ip);
214762306a36Sopenharmony_ci	xfs_inodegc_set_reclaimable(ip);
214862306a36Sopenharmony_ci}
214962306a36Sopenharmony_ci
215062306a36Sopenharmony_ci/*
215162306a36Sopenharmony_ci * Register a phony shrinker so that we can run background inodegc sooner when
215262306a36Sopenharmony_ci * there's memory pressure.  Inactivation does not itself free any memory but
215362306a36Sopenharmony_ci * it does make inodes reclaimable, which eventually frees memory.
215462306a36Sopenharmony_ci *
215562306a36Sopenharmony_ci * The count function, seek value, and batch value are crafted to trigger the
215662306a36Sopenharmony_ci * scan function during the second round of scanning.  Hopefully this means
215762306a36Sopenharmony_ci * that we reclaimed enough memory that initiating metadata transactions won't
215862306a36Sopenharmony_ci * make things worse.
215962306a36Sopenharmony_ci */
216062306a36Sopenharmony_ci#define XFS_INODEGC_SHRINKER_COUNT	(1UL << DEF_PRIORITY)
216162306a36Sopenharmony_ci#define XFS_INODEGC_SHRINKER_BATCH	((XFS_INODEGC_SHRINKER_COUNT / 2) + 1)
216262306a36Sopenharmony_ci
216362306a36Sopenharmony_cistatic unsigned long
216462306a36Sopenharmony_cixfs_inodegc_shrinker_count(
216562306a36Sopenharmony_ci	struct shrinker		*shrink,
216662306a36Sopenharmony_ci	struct shrink_control	*sc)
216762306a36Sopenharmony_ci{
216862306a36Sopenharmony_ci	struct xfs_mount	*mp = container_of(shrink, struct xfs_mount,
216962306a36Sopenharmony_ci						   m_inodegc_shrinker);
217062306a36Sopenharmony_ci	struct xfs_inodegc	*gc;
217162306a36Sopenharmony_ci	int			cpu;
217262306a36Sopenharmony_ci
217362306a36Sopenharmony_ci	if (!xfs_is_inodegc_enabled(mp))
217462306a36Sopenharmony_ci		return 0;
217562306a36Sopenharmony_ci
217662306a36Sopenharmony_ci	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
217762306a36Sopenharmony_ci		gc = per_cpu_ptr(mp->m_inodegc, cpu);
217862306a36Sopenharmony_ci		if (!llist_empty(&gc->list))
217962306a36Sopenharmony_ci			return XFS_INODEGC_SHRINKER_COUNT;
218062306a36Sopenharmony_ci	}
218162306a36Sopenharmony_ci
218262306a36Sopenharmony_ci	return 0;
218362306a36Sopenharmony_ci}
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_cistatic unsigned long
218662306a36Sopenharmony_cixfs_inodegc_shrinker_scan(
218762306a36Sopenharmony_ci	struct shrinker		*shrink,
218862306a36Sopenharmony_ci	struct shrink_control	*sc)
218962306a36Sopenharmony_ci{
219062306a36Sopenharmony_ci	struct xfs_mount	*mp = container_of(shrink, struct xfs_mount,
219162306a36Sopenharmony_ci						   m_inodegc_shrinker);
219262306a36Sopenharmony_ci	struct xfs_inodegc	*gc;
219362306a36Sopenharmony_ci	int			cpu;
219462306a36Sopenharmony_ci	bool			no_items = true;
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci	if (!xfs_is_inodegc_enabled(mp))
219762306a36Sopenharmony_ci		return SHRINK_STOP;
219862306a36Sopenharmony_ci
219962306a36Sopenharmony_ci	trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address);
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
220262306a36Sopenharmony_ci		gc = per_cpu_ptr(mp->m_inodegc, cpu);
220362306a36Sopenharmony_ci		if (!llist_empty(&gc->list)) {
220462306a36Sopenharmony_ci			unsigned int	h = READ_ONCE(gc->shrinker_hits);
220562306a36Sopenharmony_ci
220662306a36Sopenharmony_ci			WRITE_ONCE(gc->shrinker_hits, h + 1);
220762306a36Sopenharmony_ci			mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
220862306a36Sopenharmony_ci			no_items = false;
220962306a36Sopenharmony_ci		}
221062306a36Sopenharmony_ci	}
221162306a36Sopenharmony_ci
221262306a36Sopenharmony_ci	/*
221362306a36Sopenharmony_ci	 * If there are no inodes to inactivate, we don't want the shrinker
221462306a36Sopenharmony_ci	 * to think there's deferred work to call us back about.
221562306a36Sopenharmony_ci	 */
221662306a36Sopenharmony_ci	if (no_items)
221762306a36Sopenharmony_ci		return LONG_MAX;
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_ci	return SHRINK_STOP;
222062306a36Sopenharmony_ci}
222162306a36Sopenharmony_ci
222262306a36Sopenharmony_ci/* Register a shrinker so we can accelerate inodegc and throttle queuing. */
222362306a36Sopenharmony_ciint
222462306a36Sopenharmony_cixfs_inodegc_register_shrinker(
222562306a36Sopenharmony_ci	struct xfs_mount	*mp)
222662306a36Sopenharmony_ci{
222762306a36Sopenharmony_ci	struct shrinker		*shrink = &mp->m_inodegc_shrinker;
222862306a36Sopenharmony_ci
222962306a36Sopenharmony_ci	shrink->count_objects = xfs_inodegc_shrinker_count;
223062306a36Sopenharmony_ci	shrink->scan_objects = xfs_inodegc_shrinker_scan;
223162306a36Sopenharmony_ci	shrink->seeks = 0;
223262306a36Sopenharmony_ci	shrink->flags = SHRINKER_NONSLAB;
223362306a36Sopenharmony_ci	shrink->batch = XFS_INODEGC_SHRINKER_BATCH;
223462306a36Sopenharmony_ci
223562306a36Sopenharmony_ci	return register_shrinker(shrink, "xfs-inodegc:%s", mp->m_super->s_id);
223662306a36Sopenharmony_ci}
2237