xref: /kernel/linux/linux-6.6/fs/xfs/libxfs/xfs_defer.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2016 Oracle.  All Rights Reserved.
462306a36Sopenharmony_ci * Author: Darrick J. Wong <darrick.wong@oracle.com>
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_log_format.h"
1162306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1262306a36Sopenharmony_ci#include "xfs_mount.h"
1362306a36Sopenharmony_ci#include "xfs_defer.h"
1462306a36Sopenharmony_ci#include "xfs_trans.h"
1562306a36Sopenharmony_ci#include "xfs_buf_item.h"
1662306a36Sopenharmony_ci#include "xfs_inode.h"
1762306a36Sopenharmony_ci#include "xfs_inode_item.h"
1862306a36Sopenharmony_ci#include "xfs_trace.h"
1962306a36Sopenharmony_ci#include "xfs_icache.h"
2062306a36Sopenharmony_ci#include "xfs_log.h"
2162306a36Sopenharmony_ci#include "xfs_rmap.h"
2262306a36Sopenharmony_ci#include "xfs_refcount.h"
2362306a36Sopenharmony_ci#include "xfs_bmap.h"
2462306a36Sopenharmony_ci#include "xfs_alloc.h"
2562306a36Sopenharmony_ci#include "xfs_buf.h"
2662306a36Sopenharmony_ci#include "xfs_da_format.h"
2762306a36Sopenharmony_ci#include "xfs_da_btree.h"
2862306a36Sopenharmony_ci#include "xfs_attr.h"
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_cistatic struct kmem_cache	*xfs_defer_pending_cache;
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci/*
3362306a36Sopenharmony_ci * Deferred Operations in XFS
3462306a36Sopenharmony_ci *
3562306a36Sopenharmony_ci * Due to the way locking rules work in XFS, certain transactions (block
3662306a36Sopenharmony_ci * mapping and unmapping, typically) have permanent reservations so that
3762306a36Sopenharmony_ci * we can roll the transaction to adhere to AG locking order rules and
3862306a36Sopenharmony_ci * to unlock buffers between metadata updates.  Prior to rmap/reflink,
3962306a36Sopenharmony_ci * the mapping code had a mechanism to perform these deferrals for
4062306a36Sopenharmony_ci * extents that were going to be freed; this code makes that facility
4162306a36Sopenharmony_ci * more generic.
4262306a36Sopenharmony_ci *
4362306a36Sopenharmony_ci * When adding the reverse mapping and reflink features, it became
4462306a36Sopenharmony_ci * necessary to perform complex remapping multi-transactions to comply
4562306a36Sopenharmony_ci * with AG locking order rules, and to be able to spread a single
4662306a36Sopenharmony_ci * refcount update operation (an operation on an n-block extent can
4762306a36Sopenharmony_ci * update as many as n records!) among multiple transactions.  XFS can
4862306a36Sopenharmony_ci * roll a transaction to facilitate this, but using this facility
4962306a36Sopenharmony_ci * requires us to log "intent" items in case log recovery needs to
5062306a36Sopenharmony_ci * redo the operation, and to log "done" items to indicate that redo
5162306a36Sopenharmony_ci * is not necessary.
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci * Deferred work is tracked in xfs_defer_pending items.  Each pending
5462306a36Sopenharmony_ci * item tracks one type of deferred work.  Incoming work items (which
5562306a36Sopenharmony_ci * have not yet had an intent logged) are attached to a pending item
5662306a36Sopenharmony_ci * on the dop_intake list, where they wait for the caller to finish
5762306a36Sopenharmony_ci * the deferred operations.
5862306a36Sopenharmony_ci *
5962306a36Sopenharmony_ci * Finishing a set of deferred operations is an involved process.  To
6062306a36Sopenharmony_ci * start, we define "rolling a deferred-op transaction" as follows:
6162306a36Sopenharmony_ci *
6262306a36Sopenharmony_ci * > For each xfs_defer_pending item on the dop_intake list,
6362306a36Sopenharmony_ci *   - Sort the work items in AG order.  XFS locking
6462306a36Sopenharmony_ci *     order rules require us to lock buffers in AG order.
6562306a36Sopenharmony_ci *   - Create a log intent item for that type.
6662306a36Sopenharmony_ci *   - Attach it to the pending item.
6762306a36Sopenharmony_ci *   - Move the pending item from the dop_intake list to the
6862306a36Sopenharmony_ci *     dop_pending list.
6962306a36Sopenharmony_ci * > Roll the transaction.
7062306a36Sopenharmony_ci *
7162306a36Sopenharmony_ci * NOTE: To avoid exceeding the transaction reservation, we limit the
7262306a36Sopenharmony_ci * number of items that we attach to a given xfs_defer_pending.
7362306a36Sopenharmony_ci *
7462306a36Sopenharmony_ci * The actual finishing process looks like this:
7562306a36Sopenharmony_ci *
7662306a36Sopenharmony_ci * > For each xfs_defer_pending in the dop_pending list,
7762306a36Sopenharmony_ci *   - Roll the deferred-op transaction as above.
7862306a36Sopenharmony_ci *   - Create a log done item for that type, and attach it to the
7962306a36Sopenharmony_ci *     log intent item.
8062306a36Sopenharmony_ci *   - For each work item attached to the log intent item,
8162306a36Sopenharmony_ci *     * Perform the described action.
8262306a36Sopenharmony_ci *     * Attach the work item to the log done item.
8362306a36Sopenharmony_ci *     * If the result of doing the work was -EAGAIN, ->finish work
8462306a36Sopenharmony_ci *       wants a new transaction.  See the "Requesting a Fresh
8562306a36Sopenharmony_ci *       Transaction while Finishing Deferred Work" section below for
8662306a36Sopenharmony_ci *       details.
8762306a36Sopenharmony_ci *
8862306a36Sopenharmony_ci * The key here is that we must log an intent item for all pending
8962306a36Sopenharmony_ci * work items every time we roll the transaction, and that we must log
9062306a36Sopenharmony_ci * a done item as soon as the work is completed.  With this mechanism
9162306a36Sopenharmony_ci * we can perform complex remapping operations, chaining intent items
9262306a36Sopenharmony_ci * as needed.
9362306a36Sopenharmony_ci *
9462306a36Sopenharmony_ci * Requesting a Fresh Transaction while Finishing Deferred Work
9562306a36Sopenharmony_ci *
9662306a36Sopenharmony_ci * If ->finish_item decides that it needs a fresh transaction to
9762306a36Sopenharmony_ci * finish the work, it must ask its caller (xfs_defer_finish) for a
9862306a36Sopenharmony_ci * continuation.  The most likely cause of this circumstance are the
9962306a36Sopenharmony_ci * refcount adjust functions deciding that they've logged enough items
10062306a36Sopenharmony_ci * to be at risk of exceeding the transaction reservation.
10162306a36Sopenharmony_ci *
10262306a36Sopenharmony_ci * To get a fresh transaction, we want to log the existing log done
10362306a36Sopenharmony_ci * item to prevent the log intent item from replaying, immediately log
10462306a36Sopenharmony_ci * a new log intent item with the unfinished work items, roll the
10562306a36Sopenharmony_ci * transaction, and re-call ->finish_item wherever it left off.  The
10662306a36Sopenharmony_ci * log done item and the new log intent item must be in the same
10762306a36Sopenharmony_ci * transaction or atomicity cannot be guaranteed; defer_finish ensures
10862306a36Sopenharmony_ci * that this happens.
10962306a36Sopenharmony_ci *
11062306a36Sopenharmony_ci * This requires some coordination between ->finish_item and
11162306a36Sopenharmony_ci * defer_finish.  Upon deciding to request a new transaction,
11262306a36Sopenharmony_ci * ->finish_item should update the current work item to reflect the
11362306a36Sopenharmony_ci * unfinished work.  Next, it should reset the log done item's list
11462306a36Sopenharmony_ci * count to the number of items finished, and return -EAGAIN.
11562306a36Sopenharmony_ci * defer_finish sees the -EAGAIN, logs the new log intent item
11662306a36Sopenharmony_ci * with the remaining work items, and leaves the xfs_defer_pending
11762306a36Sopenharmony_ci * item at the head of the dop_work queue.  Then it rolls the
11862306a36Sopenharmony_ci * transaction and picks up processing where it left off.  It is
11962306a36Sopenharmony_ci * required that ->finish_item must be careful to leave enough
12062306a36Sopenharmony_ci * transaction reservation to fit the new log intent item.
12162306a36Sopenharmony_ci *
12262306a36Sopenharmony_ci * This is an example of remapping the extent (E, E+B) into file X at
12362306a36Sopenharmony_ci * offset A and dealing with the extent (C, C+B) already being mapped
12462306a36Sopenharmony_ci * there:
12562306a36Sopenharmony_ci * +-------------------------------------------------+
12662306a36Sopenharmony_ci * | Unmap file X startblock C offset A length B     | t0
12762306a36Sopenharmony_ci * | Intent to reduce refcount for extent (C, B)     |
12862306a36Sopenharmony_ci * | Intent to remove rmap (X, C, A, B)              |
12962306a36Sopenharmony_ci * | Intent to free extent (D, 1) (bmbt block)       |
13062306a36Sopenharmony_ci * | Intent to map (X, A, B) at startblock E         |
13162306a36Sopenharmony_ci * +-------------------------------------------------+
13262306a36Sopenharmony_ci * | Map file X startblock E offset A length B       | t1
13362306a36Sopenharmony_ci * | Done mapping (X, E, A, B)                       |
13462306a36Sopenharmony_ci * | Intent to increase refcount for extent (E, B)   |
13562306a36Sopenharmony_ci * | Intent to add rmap (X, E, A, B)                 |
13662306a36Sopenharmony_ci * +-------------------------------------------------+
13762306a36Sopenharmony_ci * | Reduce refcount for extent (C, B)               | t2
13862306a36Sopenharmony_ci * | Done reducing refcount for extent (C, 9)        |
13962306a36Sopenharmony_ci * | Intent to reduce refcount for extent (C+9, B-9) |
14062306a36Sopenharmony_ci * | (ran out of space after 9 refcount updates)     |
14162306a36Sopenharmony_ci * +-------------------------------------------------+
14262306a36Sopenharmony_ci * | Reduce refcount for extent (C+9, B+9)           | t3
14362306a36Sopenharmony_ci * | Done reducing refcount for extent (C+9, B-9)    |
14462306a36Sopenharmony_ci * | Increase refcount for extent (E, B)             |
14562306a36Sopenharmony_ci * | Done increasing refcount for extent (E, B)      |
14662306a36Sopenharmony_ci * | Intent to free extent (C, B)                    |
14762306a36Sopenharmony_ci * | Intent to free extent (F, 1) (refcountbt block) |
14862306a36Sopenharmony_ci * | Intent to remove rmap (F, 1, REFC)              |
14962306a36Sopenharmony_ci * +-------------------------------------------------+
15062306a36Sopenharmony_ci * | Remove rmap (X, C, A, B)                        | t4
15162306a36Sopenharmony_ci * | Done removing rmap (X, C, A, B)                 |
15262306a36Sopenharmony_ci * | Add rmap (X, E, A, B)                           |
15362306a36Sopenharmony_ci * | Done adding rmap (X, E, A, B)                   |
15462306a36Sopenharmony_ci * | Remove rmap (F, 1, REFC)                        |
15562306a36Sopenharmony_ci * | Done removing rmap (F, 1, REFC)                 |
15662306a36Sopenharmony_ci * +-------------------------------------------------+
15762306a36Sopenharmony_ci * | Free extent (C, B)                              | t5
15862306a36Sopenharmony_ci * | Done freeing extent (C, B)                      |
15962306a36Sopenharmony_ci * | Free extent (D, 1)                              |
16062306a36Sopenharmony_ci * | Done freeing extent (D, 1)                      |
16162306a36Sopenharmony_ci * | Free extent (F, 1)                              |
16262306a36Sopenharmony_ci * | Done freeing extent (F, 1)                      |
16362306a36Sopenharmony_ci * +-------------------------------------------------+
16462306a36Sopenharmony_ci *
16562306a36Sopenharmony_ci * If we should crash before t2 commits, log recovery replays
16662306a36Sopenharmony_ci * the following intent items:
16762306a36Sopenharmony_ci *
16862306a36Sopenharmony_ci * - Intent to reduce refcount for extent (C, B)
16962306a36Sopenharmony_ci * - Intent to remove rmap (X, C, A, B)
17062306a36Sopenharmony_ci * - Intent to free extent (D, 1) (bmbt block)
17162306a36Sopenharmony_ci * - Intent to increase refcount for extent (E, B)
17262306a36Sopenharmony_ci * - Intent to add rmap (X, E, A, B)
17362306a36Sopenharmony_ci *
17462306a36Sopenharmony_ci * In the process of recovering, it should also generate and take care
17562306a36Sopenharmony_ci * of these intent items:
17662306a36Sopenharmony_ci *
17762306a36Sopenharmony_ci * - Intent to free extent (C, B)
17862306a36Sopenharmony_ci * - Intent to free extent (F, 1) (refcountbt block)
17962306a36Sopenharmony_ci * - Intent to remove rmap (F, 1, REFC)
18062306a36Sopenharmony_ci *
18162306a36Sopenharmony_ci * Note that the continuation requested between t2 and t3 is likely to
18262306a36Sopenharmony_ci * reoccur.
18362306a36Sopenharmony_ci */
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_cistatic const struct xfs_defer_op_type *defer_op_types[] = {
18662306a36Sopenharmony_ci	[XFS_DEFER_OPS_TYPE_BMAP]	= &xfs_bmap_update_defer_type,
18762306a36Sopenharmony_ci	[XFS_DEFER_OPS_TYPE_REFCOUNT]	= &xfs_refcount_update_defer_type,
18862306a36Sopenharmony_ci	[XFS_DEFER_OPS_TYPE_RMAP]	= &xfs_rmap_update_defer_type,
18962306a36Sopenharmony_ci	[XFS_DEFER_OPS_TYPE_FREE]	= &xfs_extent_free_defer_type,
19062306a36Sopenharmony_ci	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
19162306a36Sopenharmony_ci	[XFS_DEFER_OPS_TYPE_ATTR]	= &xfs_attr_defer_type,
19262306a36Sopenharmony_ci};
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci/*
19562306a36Sopenharmony_ci * Ensure there's a log intent item associated with this deferred work item if
19662306a36Sopenharmony_ci * the operation must be restarted on crash.  Returns 1 if there's a log item;
19762306a36Sopenharmony_ci * 0 if there isn't; or a negative errno.
19862306a36Sopenharmony_ci */
19962306a36Sopenharmony_cistatic int
20062306a36Sopenharmony_cixfs_defer_create_intent(
20162306a36Sopenharmony_ci	struct xfs_trans		*tp,
20262306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp,
20362306a36Sopenharmony_ci	bool				sort)
20462306a36Sopenharmony_ci{
20562306a36Sopenharmony_ci	const struct xfs_defer_op_type	*ops = defer_op_types[dfp->dfp_type];
20662306a36Sopenharmony_ci	struct xfs_log_item		*lip;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	if (dfp->dfp_intent)
20962306a36Sopenharmony_ci		return 1;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort);
21262306a36Sopenharmony_ci	if (!lip)
21362306a36Sopenharmony_ci		return 0;
21462306a36Sopenharmony_ci	if (IS_ERR(lip))
21562306a36Sopenharmony_ci		return PTR_ERR(lip);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	dfp->dfp_intent = lip;
21862306a36Sopenharmony_ci	return 1;
21962306a36Sopenharmony_ci}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci/*
22262306a36Sopenharmony_ci * For each pending item in the intake list, log its intent item and the
22362306a36Sopenharmony_ci * associated extents, then add the entire intake list to the end of
22462306a36Sopenharmony_ci * the pending list.
22562306a36Sopenharmony_ci *
22662306a36Sopenharmony_ci * Returns 1 if at least one log item was associated with the deferred work;
22762306a36Sopenharmony_ci * 0 if there are no log items; or a negative errno.
22862306a36Sopenharmony_ci */
22962306a36Sopenharmony_cistatic int
23062306a36Sopenharmony_cixfs_defer_create_intents(
23162306a36Sopenharmony_ci	struct xfs_trans		*tp)
23262306a36Sopenharmony_ci{
23362306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp;
23462306a36Sopenharmony_ci	int				ret = 0;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
23762306a36Sopenharmony_ci		int			ret2;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci		trace_xfs_defer_create_intent(tp->t_mountp, dfp);
24062306a36Sopenharmony_ci		ret2 = xfs_defer_create_intent(tp, dfp, true);
24162306a36Sopenharmony_ci		if (ret2 < 0)
24262306a36Sopenharmony_ci			return ret2;
24362306a36Sopenharmony_ci		ret |= ret2;
24462306a36Sopenharmony_ci	}
24562306a36Sopenharmony_ci	return ret;
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ciSTATIC void
24962306a36Sopenharmony_cixfs_defer_pending_abort(
25062306a36Sopenharmony_ci	struct xfs_mount		*mp,
25162306a36Sopenharmony_ci	struct list_head		*dop_list)
25262306a36Sopenharmony_ci{
25362306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp;
25462306a36Sopenharmony_ci	const struct xfs_defer_op_type	*ops;
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	/* Abort intent items that don't have a done item. */
25762306a36Sopenharmony_ci	list_for_each_entry(dfp, dop_list, dfp_list) {
25862306a36Sopenharmony_ci		ops = defer_op_types[dfp->dfp_type];
25962306a36Sopenharmony_ci		trace_xfs_defer_pending_abort(mp, dfp);
26062306a36Sopenharmony_ci		if (dfp->dfp_intent && !dfp->dfp_done) {
26162306a36Sopenharmony_ci			ops->abort_intent(dfp->dfp_intent);
26262306a36Sopenharmony_ci			dfp->dfp_intent = NULL;
26362306a36Sopenharmony_ci		}
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci/* Abort all the intents that were committed. */
26862306a36Sopenharmony_ciSTATIC void
26962306a36Sopenharmony_cixfs_defer_trans_abort(
27062306a36Sopenharmony_ci	struct xfs_trans		*tp,
27162306a36Sopenharmony_ci	struct list_head		*dop_pending)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	trace_xfs_defer_trans_abort(tp, _RET_IP_);
27462306a36Sopenharmony_ci	xfs_defer_pending_abort(tp->t_mountp, dop_pending);
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci/*
27862306a36Sopenharmony_ci * Capture resources that the caller said not to release ("held") when the
27962306a36Sopenharmony_ci * transaction commits.  Caller is responsible for zero-initializing @dres.
28062306a36Sopenharmony_ci */
28162306a36Sopenharmony_cistatic int
28262306a36Sopenharmony_cixfs_defer_save_resources(
28362306a36Sopenharmony_ci	struct xfs_defer_resources	*dres,
28462306a36Sopenharmony_ci	struct xfs_trans		*tp)
28562306a36Sopenharmony_ci{
28662306a36Sopenharmony_ci	struct xfs_buf_log_item		*bli;
28762306a36Sopenharmony_ci	struct xfs_inode_log_item	*ili;
28862306a36Sopenharmony_ci	struct xfs_log_item		*lip;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	list_for_each_entry(lip, &tp->t_items, li_trans) {
29362306a36Sopenharmony_ci		switch (lip->li_type) {
29462306a36Sopenharmony_ci		case XFS_LI_BUF:
29562306a36Sopenharmony_ci			bli = container_of(lip, struct xfs_buf_log_item,
29662306a36Sopenharmony_ci					   bli_item);
29762306a36Sopenharmony_ci			if (bli->bli_flags & XFS_BLI_HOLD) {
29862306a36Sopenharmony_ci				if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) {
29962306a36Sopenharmony_ci					ASSERT(0);
30062306a36Sopenharmony_ci					return -EFSCORRUPTED;
30162306a36Sopenharmony_ci				}
30262306a36Sopenharmony_ci				if (bli->bli_flags & XFS_BLI_ORDERED)
30362306a36Sopenharmony_ci					dres->dr_ordered |=
30462306a36Sopenharmony_ci							(1U << dres->dr_bufs);
30562306a36Sopenharmony_ci				else
30662306a36Sopenharmony_ci					xfs_trans_dirty_buf(tp, bli->bli_buf);
30762306a36Sopenharmony_ci				dres->dr_bp[dres->dr_bufs++] = bli->bli_buf;
30862306a36Sopenharmony_ci			}
30962306a36Sopenharmony_ci			break;
31062306a36Sopenharmony_ci		case XFS_LI_INODE:
31162306a36Sopenharmony_ci			ili = container_of(lip, struct xfs_inode_log_item,
31262306a36Sopenharmony_ci					   ili_item);
31362306a36Sopenharmony_ci			if (ili->ili_lock_flags == 0) {
31462306a36Sopenharmony_ci				if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) {
31562306a36Sopenharmony_ci					ASSERT(0);
31662306a36Sopenharmony_ci					return -EFSCORRUPTED;
31762306a36Sopenharmony_ci				}
31862306a36Sopenharmony_ci				xfs_trans_log_inode(tp, ili->ili_inode,
31962306a36Sopenharmony_ci						    XFS_ILOG_CORE);
32062306a36Sopenharmony_ci				dres->dr_ip[dres->dr_inos++] = ili->ili_inode;
32162306a36Sopenharmony_ci			}
32262306a36Sopenharmony_ci			break;
32362306a36Sopenharmony_ci		default:
32462306a36Sopenharmony_ci			break;
32562306a36Sopenharmony_ci		}
32662306a36Sopenharmony_ci	}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	return 0;
32962306a36Sopenharmony_ci}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci/* Attach the held resources to the transaction. */
33262306a36Sopenharmony_cistatic void
33362306a36Sopenharmony_cixfs_defer_restore_resources(
33462306a36Sopenharmony_ci	struct xfs_trans		*tp,
33562306a36Sopenharmony_ci	struct xfs_defer_resources	*dres)
33662306a36Sopenharmony_ci{
33762306a36Sopenharmony_ci	unsigned short			i;
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	/* Rejoin the joined inodes. */
34062306a36Sopenharmony_ci	for (i = 0; i < dres->dr_inos; i++)
34162306a36Sopenharmony_ci		xfs_trans_ijoin(tp, dres->dr_ip[i], 0);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	/* Rejoin the buffers and dirty them so the log moves forward. */
34462306a36Sopenharmony_ci	for (i = 0; i < dres->dr_bufs; i++) {
34562306a36Sopenharmony_ci		xfs_trans_bjoin(tp, dres->dr_bp[i]);
34662306a36Sopenharmony_ci		if (dres->dr_ordered & (1U << i))
34762306a36Sopenharmony_ci			xfs_trans_ordered_buf(tp, dres->dr_bp[i]);
34862306a36Sopenharmony_ci		xfs_trans_bhold(tp, dres->dr_bp[i]);
34962306a36Sopenharmony_ci	}
35062306a36Sopenharmony_ci}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci/* Roll a transaction so we can do some deferred op processing. */
35362306a36Sopenharmony_ciSTATIC int
35462306a36Sopenharmony_cixfs_defer_trans_roll(
35562306a36Sopenharmony_ci	struct xfs_trans		**tpp)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	struct xfs_defer_resources	dres = { };
35862306a36Sopenharmony_ci	int				error;
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	error = xfs_defer_save_resources(&dres, *tpp);
36162306a36Sopenharmony_ci	if (error)
36262306a36Sopenharmony_ci		return error;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	trace_xfs_defer_trans_roll(*tpp, _RET_IP_);
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	/*
36762306a36Sopenharmony_ci	 * Roll the transaction.  Rolling always given a new transaction (even
36862306a36Sopenharmony_ci	 * if committing the old one fails!) to hand back to the caller, so we
36962306a36Sopenharmony_ci	 * join the held resources to the new transaction so that we always
37062306a36Sopenharmony_ci	 * return with the held resources joined to @tpp, no matter what
37162306a36Sopenharmony_ci	 * happened.
37262306a36Sopenharmony_ci	 */
37362306a36Sopenharmony_ci	error = xfs_trans_roll(tpp);
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	xfs_defer_restore_resources(*tpp, &dres);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	if (error)
37862306a36Sopenharmony_ci		trace_xfs_defer_trans_roll_error(*tpp, error);
37962306a36Sopenharmony_ci	return error;
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci/*
38362306a36Sopenharmony_ci * Free up any items left in the list.
38462306a36Sopenharmony_ci */
38562306a36Sopenharmony_cistatic void
38662306a36Sopenharmony_cixfs_defer_cancel_list(
38762306a36Sopenharmony_ci	struct xfs_mount		*mp,
38862306a36Sopenharmony_ci	struct list_head		*dop_list)
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp;
39162306a36Sopenharmony_ci	struct xfs_defer_pending	*pli;
39262306a36Sopenharmony_ci	struct list_head		*pwi;
39362306a36Sopenharmony_ci	struct list_head		*n;
39462306a36Sopenharmony_ci	const struct xfs_defer_op_type	*ops;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	/*
39762306a36Sopenharmony_ci	 * Free the pending items.  Caller should already have arranged
39862306a36Sopenharmony_ci	 * for the intent items to be released.
39962306a36Sopenharmony_ci	 */
40062306a36Sopenharmony_ci	list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
40162306a36Sopenharmony_ci		ops = defer_op_types[dfp->dfp_type];
40262306a36Sopenharmony_ci		trace_xfs_defer_cancel_list(mp, dfp);
40362306a36Sopenharmony_ci		list_del(&dfp->dfp_list);
40462306a36Sopenharmony_ci		list_for_each_safe(pwi, n, &dfp->dfp_work) {
40562306a36Sopenharmony_ci			list_del(pwi);
40662306a36Sopenharmony_ci			dfp->dfp_count--;
40762306a36Sopenharmony_ci			trace_xfs_defer_cancel_item(mp, dfp, pwi);
40862306a36Sopenharmony_ci			ops->cancel_item(pwi);
40962306a36Sopenharmony_ci		}
41062306a36Sopenharmony_ci		ASSERT(dfp->dfp_count == 0);
41162306a36Sopenharmony_ci		kmem_cache_free(xfs_defer_pending_cache, dfp);
41262306a36Sopenharmony_ci	}
41362306a36Sopenharmony_ci}
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci/*
41662306a36Sopenharmony_ci * Prevent a log intent item from pinning the tail of the log by logging a
41762306a36Sopenharmony_ci * done item to release the intent item; and then log a new intent item.
41862306a36Sopenharmony_ci * The caller should provide a fresh transaction and roll it after we're done.
41962306a36Sopenharmony_ci */
42062306a36Sopenharmony_cistatic int
42162306a36Sopenharmony_cixfs_defer_relog(
42262306a36Sopenharmony_ci	struct xfs_trans		**tpp,
42362306a36Sopenharmony_ci	struct list_head		*dfops)
42462306a36Sopenharmony_ci{
42562306a36Sopenharmony_ci	struct xlog			*log = (*tpp)->t_mountp->m_log;
42662306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp;
42762306a36Sopenharmony_ci	xfs_lsn_t			threshold_lsn = NULLCOMMITLSN;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	list_for_each_entry(dfp, dfops, dfp_list) {
43362306a36Sopenharmony_ci		/*
43462306a36Sopenharmony_ci		 * If the log intent item for this deferred op is not a part of
43562306a36Sopenharmony_ci		 * the current log checkpoint, relog the intent item to keep
43662306a36Sopenharmony_ci		 * the log tail moving forward.  We're ok with this being racy
43762306a36Sopenharmony_ci		 * because an incorrect decision means we'll be a little slower
43862306a36Sopenharmony_ci		 * at pushing the tail.
43962306a36Sopenharmony_ci		 */
44062306a36Sopenharmony_ci		if (dfp->dfp_intent == NULL ||
44162306a36Sopenharmony_ci		    xfs_log_item_in_current_chkpt(dfp->dfp_intent))
44262306a36Sopenharmony_ci			continue;
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci		/*
44562306a36Sopenharmony_ci		 * Figure out where we need the tail to be in order to maintain
44662306a36Sopenharmony_ci		 * the minimum required free space in the log.  Only sample
44762306a36Sopenharmony_ci		 * the log threshold once per call.
44862306a36Sopenharmony_ci		 */
44962306a36Sopenharmony_ci		if (threshold_lsn == NULLCOMMITLSN) {
45062306a36Sopenharmony_ci			threshold_lsn = xlog_grant_push_threshold(log, 0);
45162306a36Sopenharmony_ci			if (threshold_lsn == NULLCOMMITLSN)
45262306a36Sopenharmony_ci				break;
45362306a36Sopenharmony_ci		}
45462306a36Sopenharmony_ci		if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
45562306a36Sopenharmony_ci			continue;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci		trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
45862306a36Sopenharmony_ci		XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
45962306a36Sopenharmony_ci		dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
46062306a36Sopenharmony_ci	}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
46362306a36Sopenharmony_ci		return xfs_defer_trans_roll(tpp);
46462306a36Sopenharmony_ci	return 0;
46562306a36Sopenharmony_ci}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci/*
46862306a36Sopenharmony_ci * Log an intent-done item for the first pending intent, and finish the work
46962306a36Sopenharmony_ci * items.
47062306a36Sopenharmony_ci */
47162306a36Sopenharmony_cistatic int
47262306a36Sopenharmony_cixfs_defer_finish_one(
47362306a36Sopenharmony_ci	struct xfs_trans		*tp,
47462306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp)
47562306a36Sopenharmony_ci{
47662306a36Sopenharmony_ci	const struct xfs_defer_op_type	*ops = defer_op_types[dfp->dfp_type];
47762306a36Sopenharmony_ci	struct xfs_btree_cur		*state = NULL;
47862306a36Sopenharmony_ci	struct list_head		*li, *n;
47962306a36Sopenharmony_ci	int				error;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
48462306a36Sopenharmony_ci	list_for_each_safe(li, n, &dfp->dfp_work) {
48562306a36Sopenharmony_ci		list_del(li);
48662306a36Sopenharmony_ci		dfp->dfp_count--;
48762306a36Sopenharmony_ci		trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
48862306a36Sopenharmony_ci		error = ops->finish_item(tp, dfp->dfp_done, li, &state);
48962306a36Sopenharmony_ci		if (error == -EAGAIN) {
49062306a36Sopenharmony_ci			int		ret;
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci			/*
49362306a36Sopenharmony_ci			 * Caller wants a fresh transaction; put the work item
49462306a36Sopenharmony_ci			 * back on the list and log a new log intent item to
49562306a36Sopenharmony_ci			 * replace the old one.  See "Requesting a Fresh
49662306a36Sopenharmony_ci			 * Transaction while Finishing Deferred Work" above.
49762306a36Sopenharmony_ci			 */
49862306a36Sopenharmony_ci			list_add(li, &dfp->dfp_work);
49962306a36Sopenharmony_ci			dfp->dfp_count++;
50062306a36Sopenharmony_ci			dfp->dfp_done = NULL;
50162306a36Sopenharmony_ci			dfp->dfp_intent = NULL;
50262306a36Sopenharmony_ci			ret = xfs_defer_create_intent(tp, dfp, false);
50362306a36Sopenharmony_ci			if (ret < 0)
50462306a36Sopenharmony_ci				error = ret;
50562306a36Sopenharmony_ci		}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci		if (error)
50862306a36Sopenharmony_ci			goto out;
50962306a36Sopenharmony_ci	}
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	/* Done with the dfp, free it. */
51262306a36Sopenharmony_ci	list_del(&dfp->dfp_list);
51362306a36Sopenharmony_ci	kmem_cache_free(xfs_defer_pending_cache, dfp);
51462306a36Sopenharmony_ciout:
51562306a36Sopenharmony_ci	if (ops->finish_cleanup)
51662306a36Sopenharmony_ci		ops->finish_cleanup(tp, state, error);
51762306a36Sopenharmony_ci	return error;
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci/*
52162306a36Sopenharmony_ci * Finish all the pending work.  This involves logging intent items for
52262306a36Sopenharmony_ci * any work items that wandered in since the last transaction roll (if
52362306a36Sopenharmony_ci * one has even happened), rolling the transaction, and finishing the
52462306a36Sopenharmony_ci * work items in the first item on the logged-and-pending list.
52562306a36Sopenharmony_ci *
52662306a36Sopenharmony_ci * If an inode is provided, relog it to the new transaction.
52762306a36Sopenharmony_ci */
52862306a36Sopenharmony_ciint
52962306a36Sopenharmony_cixfs_defer_finish_noroll(
53062306a36Sopenharmony_ci	struct xfs_trans		**tp)
53162306a36Sopenharmony_ci{
53262306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp = NULL;
53362306a36Sopenharmony_ci	int				error = 0;
53462306a36Sopenharmony_ci	LIST_HEAD(dop_pending);
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	trace_xfs_defer_finish(*tp, _RET_IP_);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	/* Until we run out of pending work to finish... */
54162306a36Sopenharmony_ci	while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
54262306a36Sopenharmony_ci		/*
54362306a36Sopenharmony_ci		 * Deferred items that are created in the process of finishing
54462306a36Sopenharmony_ci		 * other deferred work items should be queued at the head of
54562306a36Sopenharmony_ci		 * the pending list, which puts them ahead of the deferred work
54662306a36Sopenharmony_ci		 * that was created by the caller.  This keeps the number of
54762306a36Sopenharmony_ci		 * pending work items to a minimum, which decreases the amount
54862306a36Sopenharmony_ci		 * of time that any one intent item can stick around in memory,
54962306a36Sopenharmony_ci		 * pinning the log tail.
55062306a36Sopenharmony_ci		 */
55162306a36Sopenharmony_ci		int has_intents = xfs_defer_create_intents(*tp);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci		list_splice_init(&(*tp)->t_dfops, &dop_pending);
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci		if (has_intents < 0) {
55662306a36Sopenharmony_ci			error = has_intents;
55762306a36Sopenharmony_ci			goto out_shutdown;
55862306a36Sopenharmony_ci		}
55962306a36Sopenharmony_ci		if (has_intents || dfp) {
56062306a36Sopenharmony_ci			error = xfs_defer_trans_roll(tp);
56162306a36Sopenharmony_ci			if (error)
56262306a36Sopenharmony_ci				goto out_shutdown;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci			/* Relog intent items to keep the log moving. */
56562306a36Sopenharmony_ci			error = xfs_defer_relog(tp, &dop_pending);
56662306a36Sopenharmony_ci			if (error)
56762306a36Sopenharmony_ci				goto out_shutdown;
56862306a36Sopenharmony_ci		}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci		dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
57162306a36Sopenharmony_ci				       dfp_list);
57262306a36Sopenharmony_ci		error = xfs_defer_finish_one(*tp, dfp);
57362306a36Sopenharmony_ci		if (error && error != -EAGAIN)
57462306a36Sopenharmony_ci			goto out_shutdown;
57562306a36Sopenharmony_ci	}
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	trace_xfs_defer_finish_done(*tp, _RET_IP_);
57862306a36Sopenharmony_ci	return 0;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ciout_shutdown:
58162306a36Sopenharmony_ci	xfs_defer_trans_abort(*tp, &dop_pending);
58262306a36Sopenharmony_ci	xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
58362306a36Sopenharmony_ci	trace_xfs_defer_finish_error(*tp, error);
58462306a36Sopenharmony_ci	xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
58562306a36Sopenharmony_ci	xfs_defer_cancel(*tp);
58662306a36Sopenharmony_ci	return error;
58762306a36Sopenharmony_ci}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ciint
59062306a36Sopenharmony_cixfs_defer_finish(
59162306a36Sopenharmony_ci	struct xfs_trans	**tp)
59262306a36Sopenharmony_ci{
59362306a36Sopenharmony_ci	int			error;
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	/*
59662306a36Sopenharmony_ci	 * Finish and roll the transaction once more to avoid returning to the
59762306a36Sopenharmony_ci	 * caller with a dirty transaction.
59862306a36Sopenharmony_ci	 */
59962306a36Sopenharmony_ci	error = xfs_defer_finish_noroll(tp);
60062306a36Sopenharmony_ci	if (error)
60162306a36Sopenharmony_ci		return error;
60262306a36Sopenharmony_ci	if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
60362306a36Sopenharmony_ci		error = xfs_defer_trans_roll(tp);
60462306a36Sopenharmony_ci		if (error) {
60562306a36Sopenharmony_ci			xfs_force_shutdown((*tp)->t_mountp,
60662306a36Sopenharmony_ci					   SHUTDOWN_CORRUPT_INCORE);
60762306a36Sopenharmony_ci			return error;
60862306a36Sopenharmony_ci		}
60962306a36Sopenharmony_ci	}
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	/* Reset LOWMODE now that we've finished all the dfops. */
61262306a36Sopenharmony_ci	ASSERT(list_empty(&(*tp)->t_dfops));
61362306a36Sopenharmony_ci	(*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
61462306a36Sopenharmony_ci	return 0;
61562306a36Sopenharmony_ci}
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_civoid
61862306a36Sopenharmony_cixfs_defer_cancel(
61962306a36Sopenharmony_ci	struct xfs_trans	*tp)
62062306a36Sopenharmony_ci{
62162306a36Sopenharmony_ci	struct xfs_mount	*mp = tp->t_mountp;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	trace_xfs_defer_cancel(tp, _RET_IP_);
62462306a36Sopenharmony_ci	xfs_defer_cancel_list(mp, &tp->t_dfops);
62562306a36Sopenharmony_ci}
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci/* Add an item for later deferred processing. */
62862306a36Sopenharmony_civoid
62962306a36Sopenharmony_cixfs_defer_add(
63062306a36Sopenharmony_ci	struct xfs_trans		*tp,
63162306a36Sopenharmony_ci	enum xfs_defer_ops_type		type,
63262306a36Sopenharmony_ci	struct list_head		*li)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	struct xfs_defer_pending	*dfp = NULL;
63562306a36Sopenharmony_ci	const struct xfs_defer_op_type	*ops = defer_op_types[type];
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
63862306a36Sopenharmony_ci	BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	/*
64162306a36Sopenharmony_ci	 * Add the item to a pending item at the end of the intake list.
64262306a36Sopenharmony_ci	 * If the last pending item has the same type, reuse it.  Else,
64362306a36Sopenharmony_ci	 * create a new pending item at the end of the intake list.
64462306a36Sopenharmony_ci	 */
64562306a36Sopenharmony_ci	if (!list_empty(&tp->t_dfops)) {
64662306a36Sopenharmony_ci		dfp = list_last_entry(&tp->t_dfops,
64762306a36Sopenharmony_ci				struct xfs_defer_pending, dfp_list);
64862306a36Sopenharmony_ci		if (dfp->dfp_type != type ||
64962306a36Sopenharmony_ci		    (ops->max_items && dfp->dfp_count >= ops->max_items))
65062306a36Sopenharmony_ci			dfp = NULL;
65162306a36Sopenharmony_ci	}
65262306a36Sopenharmony_ci	if (!dfp) {
65362306a36Sopenharmony_ci		dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
65462306a36Sopenharmony_ci				GFP_NOFS | __GFP_NOFAIL);
65562306a36Sopenharmony_ci		dfp->dfp_type = type;
65662306a36Sopenharmony_ci		dfp->dfp_intent = NULL;
65762306a36Sopenharmony_ci		dfp->dfp_done = NULL;
65862306a36Sopenharmony_ci		dfp->dfp_count = 0;
65962306a36Sopenharmony_ci		INIT_LIST_HEAD(&dfp->dfp_work);
66062306a36Sopenharmony_ci		list_add_tail(&dfp->dfp_list, &tp->t_dfops);
66162306a36Sopenharmony_ci	}
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	list_add_tail(li, &dfp->dfp_work);
66462306a36Sopenharmony_ci	trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
66562306a36Sopenharmony_ci	dfp->dfp_count++;
66662306a36Sopenharmony_ci}
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci/*
66962306a36Sopenharmony_ci * Move deferred ops from one transaction to another and reset the source to
67062306a36Sopenharmony_ci * initial state. This is primarily used to carry state forward across
67162306a36Sopenharmony_ci * transaction rolls with pending dfops.
67262306a36Sopenharmony_ci */
67362306a36Sopenharmony_civoid
67462306a36Sopenharmony_cixfs_defer_move(
67562306a36Sopenharmony_ci	struct xfs_trans	*dtp,
67662306a36Sopenharmony_ci	struct xfs_trans	*stp)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	list_splice_init(&stp->t_dfops, &dtp->t_dfops);
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ci	/*
68162306a36Sopenharmony_ci	 * Low free space mode was historically controlled by a dfops field.
68262306a36Sopenharmony_ci	 * This meant that low mode state potentially carried across multiple
68362306a36Sopenharmony_ci	 * transaction rolls. Transfer low mode on a dfops move to preserve
68462306a36Sopenharmony_ci	 * that behavior.
68562306a36Sopenharmony_ci	 */
68662306a36Sopenharmony_ci	dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
68762306a36Sopenharmony_ci	stp->t_flags &= ~XFS_TRANS_LOWMODE;
68862306a36Sopenharmony_ci}
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci/*
69162306a36Sopenharmony_ci * Prepare a chain of fresh deferred ops work items to be completed later.  Log
69262306a36Sopenharmony_ci * recovery requires the ability to put off until later the actual finishing
69362306a36Sopenharmony_ci * work so that it can process unfinished items recovered from the log in
69462306a36Sopenharmony_ci * correct order.
69562306a36Sopenharmony_ci *
69662306a36Sopenharmony_ci * Create and log intent items for all the work that we're capturing so that we
69762306a36Sopenharmony_ci * can be assured that the items will get replayed if the system goes down
69862306a36Sopenharmony_ci * before log recovery gets a chance to finish the work it put off.  The entire
69962306a36Sopenharmony_ci * deferred ops state is transferred to the capture structure and the
70062306a36Sopenharmony_ci * transaction is then ready for the caller to commit it.  If there are no
70162306a36Sopenharmony_ci * intent items to capture, this function returns NULL.
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci * If capture_ip is not NULL, the capture structure will obtain an extra
70462306a36Sopenharmony_ci * reference to the inode.
70562306a36Sopenharmony_ci */
70662306a36Sopenharmony_cistatic struct xfs_defer_capture *
70762306a36Sopenharmony_cixfs_defer_ops_capture(
70862306a36Sopenharmony_ci	struct xfs_trans		*tp)
70962306a36Sopenharmony_ci{
71062306a36Sopenharmony_ci	struct xfs_defer_capture	*dfc;
71162306a36Sopenharmony_ci	unsigned short			i;
71262306a36Sopenharmony_ci	int				error;
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	if (list_empty(&tp->t_dfops))
71562306a36Sopenharmony_ci		return NULL;
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	error = xfs_defer_create_intents(tp);
71862306a36Sopenharmony_ci	if (error < 0)
71962306a36Sopenharmony_ci		return ERR_PTR(error);
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	/* Create an object to capture the defer ops. */
72262306a36Sopenharmony_ci	dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
72362306a36Sopenharmony_ci	INIT_LIST_HEAD(&dfc->dfc_list);
72462306a36Sopenharmony_ci	INIT_LIST_HEAD(&dfc->dfc_dfops);
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	/* Move the dfops chain and transaction state to the capture struct. */
72762306a36Sopenharmony_ci	list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
72862306a36Sopenharmony_ci	dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
72962306a36Sopenharmony_ci	tp->t_flags &= ~XFS_TRANS_LOWMODE;
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	/* Capture the remaining block reservations along with the dfops. */
73262306a36Sopenharmony_ci	dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
73362306a36Sopenharmony_ci	dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	/* Preserve the log reservation size. */
73662306a36Sopenharmony_ci	dfc->dfc_logres = tp->t_log_res;
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	error = xfs_defer_save_resources(&dfc->dfc_held, tp);
73962306a36Sopenharmony_ci	if (error) {
74062306a36Sopenharmony_ci		/*
74162306a36Sopenharmony_ci		 * Resource capture should never fail, but if it does, we
74262306a36Sopenharmony_ci		 * still have to shut down the log and release things
74362306a36Sopenharmony_ci		 * properly.
74462306a36Sopenharmony_ci		 */
74562306a36Sopenharmony_ci		xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
74662306a36Sopenharmony_ci	}
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	/*
74962306a36Sopenharmony_ci	 * Grab extra references to the inodes and buffers because callers are
75062306a36Sopenharmony_ci	 * expected to release their held references after we commit the
75162306a36Sopenharmony_ci	 * transaction.
75262306a36Sopenharmony_ci	 */
75362306a36Sopenharmony_ci	for (i = 0; i < dfc->dfc_held.dr_inos; i++) {
75462306a36Sopenharmony_ci		ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL));
75562306a36Sopenharmony_ci		ihold(VFS_I(dfc->dfc_held.dr_ip[i]));
75662306a36Sopenharmony_ci	}
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
75962306a36Sopenharmony_ci		xfs_buf_hold(dfc->dfc_held.dr_bp[i]);
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	return dfc;
76262306a36Sopenharmony_ci}
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci/* Release all resources that we used to capture deferred ops. */
76562306a36Sopenharmony_civoid
76662306a36Sopenharmony_cixfs_defer_ops_capture_abort(
76762306a36Sopenharmony_ci	struct xfs_mount		*mp,
76862306a36Sopenharmony_ci	struct xfs_defer_capture	*dfc)
76962306a36Sopenharmony_ci{
77062306a36Sopenharmony_ci	unsigned short			i;
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
77362306a36Sopenharmony_ci	xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
77662306a36Sopenharmony_ci		xfs_buf_relse(dfc->dfc_held.dr_bp[i]);
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	for (i = 0; i < dfc->dfc_held.dr_inos; i++)
77962306a36Sopenharmony_ci		xfs_irele(dfc->dfc_held.dr_ip[i]);
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci	kmem_free(dfc);
78262306a36Sopenharmony_ci}
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci/*
78562306a36Sopenharmony_ci * Capture any deferred ops and commit the transaction.  This is the last step
78662306a36Sopenharmony_ci * needed to finish a log intent item that we recovered from the log.  If any
78762306a36Sopenharmony_ci * of the deferred ops operate on an inode, the caller must pass in that inode
78862306a36Sopenharmony_ci * so that the reference can be transferred to the capture structure.  The
78962306a36Sopenharmony_ci * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
79062306a36Sopenharmony_ci * xfs_defer_ops_continue.
79162306a36Sopenharmony_ci */
79262306a36Sopenharmony_ciint
79362306a36Sopenharmony_cixfs_defer_ops_capture_and_commit(
79462306a36Sopenharmony_ci	struct xfs_trans		*tp,
79562306a36Sopenharmony_ci	struct list_head		*capture_list)
79662306a36Sopenharmony_ci{
79762306a36Sopenharmony_ci	struct xfs_mount		*mp = tp->t_mountp;
79862306a36Sopenharmony_ci	struct xfs_defer_capture	*dfc;
79962306a36Sopenharmony_ci	int				error;
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	/* If we don't capture anything, commit transaction and exit. */
80262306a36Sopenharmony_ci	dfc = xfs_defer_ops_capture(tp);
80362306a36Sopenharmony_ci	if (IS_ERR(dfc)) {
80462306a36Sopenharmony_ci		xfs_trans_cancel(tp);
80562306a36Sopenharmony_ci		return PTR_ERR(dfc);
80662306a36Sopenharmony_ci	}
80762306a36Sopenharmony_ci	if (!dfc)
80862306a36Sopenharmony_ci		return xfs_trans_commit(tp);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	/* Commit the transaction and add the capture structure to the list. */
81162306a36Sopenharmony_ci	error = xfs_trans_commit(tp);
81262306a36Sopenharmony_ci	if (error) {
81362306a36Sopenharmony_ci		xfs_defer_ops_capture_abort(mp, dfc);
81462306a36Sopenharmony_ci		return error;
81562306a36Sopenharmony_ci	}
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	list_add_tail(&dfc->dfc_list, capture_list);
81862306a36Sopenharmony_ci	return 0;
81962306a36Sopenharmony_ci}
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci/*
82262306a36Sopenharmony_ci * Attach a chain of captured deferred ops to a new transaction and free the
82362306a36Sopenharmony_ci * capture structure.  If an inode was captured, it will be passed back to the
82462306a36Sopenharmony_ci * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
82562306a36Sopenharmony_ci * The caller now owns the inode reference.
82662306a36Sopenharmony_ci */
82762306a36Sopenharmony_civoid
82862306a36Sopenharmony_cixfs_defer_ops_continue(
82962306a36Sopenharmony_ci	struct xfs_defer_capture	*dfc,
83062306a36Sopenharmony_ci	struct xfs_trans		*tp,
83162306a36Sopenharmony_ci	struct xfs_defer_resources	*dres)
83262306a36Sopenharmony_ci{
83362306a36Sopenharmony_ci	unsigned int			i;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
83662306a36Sopenharmony_ci	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	/* Lock the captured resources to the new transaction. */
83962306a36Sopenharmony_ci	if (dfc->dfc_held.dr_inos == 2)
84062306a36Sopenharmony_ci		xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
84162306a36Sopenharmony_ci				    dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
84262306a36Sopenharmony_ci	else if (dfc->dfc_held.dr_inos == 1)
84362306a36Sopenharmony_ci		xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
84662306a36Sopenharmony_ci		xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	/* Join the captured resources to the new transaction. */
84962306a36Sopenharmony_ci	xfs_defer_restore_resources(tp, &dfc->dfc_held);
85062306a36Sopenharmony_ci	memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
85162306a36Sopenharmony_ci	dres->dr_bufs = 0;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	/* Move captured dfops chain and state to the transaction. */
85462306a36Sopenharmony_ci	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
85562306a36Sopenharmony_ci	tp->t_flags |= dfc->dfc_tpflags;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	kmem_free(dfc);
85862306a36Sopenharmony_ci}
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci/* Release the resources captured and continued during recovery. */
86162306a36Sopenharmony_civoid
86262306a36Sopenharmony_cixfs_defer_resources_rele(
86362306a36Sopenharmony_ci	struct xfs_defer_resources	*dres)
86462306a36Sopenharmony_ci{
86562306a36Sopenharmony_ci	unsigned short			i;
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	for (i = 0; i < dres->dr_inos; i++) {
86862306a36Sopenharmony_ci		xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL);
86962306a36Sopenharmony_ci		xfs_irele(dres->dr_ip[i]);
87062306a36Sopenharmony_ci		dres->dr_ip[i] = NULL;
87162306a36Sopenharmony_ci	}
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	for (i = 0; i < dres->dr_bufs; i++) {
87462306a36Sopenharmony_ci		xfs_buf_relse(dres->dr_bp[i]);
87562306a36Sopenharmony_ci		dres->dr_bp[i] = NULL;
87662306a36Sopenharmony_ci	}
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	dres->dr_inos = 0;
87962306a36Sopenharmony_ci	dres->dr_bufs = 0;
88062306a36Sopenharmony_ci	dres->dr_ordered = 0;
88162306a36Sopenharmony_ci}
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_cistatic inline int __init
88462306a36Sopenharmony_cixfs_defer_init_cache(void)
88562306a36Sopenharmony_ci{
88662306a36Sopenharmony_ci	xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending",
88762306a36Sopenharmony_ci			sizeof(struct xfs_defer_pending),
88862306a36Sopenharmony_ci			0, 0, NULL);
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM;
89162306a36Sopenharmony_ci}
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_cistatic inline void
89462306a36Sopenharmony_cixfs_defer_destroy_cache(void)
89562306a36Sopenharmony_ci{
89662306a36Sopenharmony_ci	kmem_cache_destroy(xfs_defer_pending_cache);
89762306a36Sopenharmony_ci	xfs_defer_pending_cache = NULL;
89862306a36Sopenharmony_ci}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci/* Set up caches for deferred work items. */
90162306a36Sopenharmony_ciint __init
90262306a36Sopenharmony_cixfs_defer_init_item_caches(void)
90362306a36Sopenharmony_ci{
90462306a36Sopenharmony_ci	int				error;
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	error = xfs_defer_init_cache();
90762306a36Sopenharmony_ci	if (error)
90862306a36Sopenharmony_ci		return error;
90962306a36Sopenharmony_ci	error = xfs_rmap_intent_init_cache();
91062306a36Sopenharmony_ci	if (error)
91162306a36Sopenharmony_ci		goto err;
91262306a36Sopenharmony_ci	error = xfs_refcount_intent_init_cache();
91362306a36Sopenharmony_ci	if (error)
91462306a36Sopenharmony_ci		goto err;
91562306a36Sopenharmony_ci	error = xfs_bmap_intent_init_cache();
91662306a36Sopenharmony_ci	if (error)
91762306a36Sopenharmony_ci		goto err;
91862306a36Sopenharmony_ci	error = xfs_extfree_intent_init_cache();
91962306a36Sopenharmony_ci	if (error)
92062306a36Sopenharmony_ci		goto err;
92162306a36Sopenharmony_ci	error = xfs_attr_intent_init_cache();
92262306a36Sopenharmony_ci	if (error)
92362306a36Sopenharmony_ci		goto err;
92462306a36Sopenharmony_ci	return 0;
92562306a36Sopenharmony_cierr:
92662306a36Sopenharmony_ci	xfs_defer_destroy_item_caches();
92762306a36Sopenharmony_ci	return error;
92862306a36Sopenharmony_ci}
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci/* Destroy all the deferred work item caches, if they've been allocated. */
93162306a36Sopenharmony_civoid
93262306a36Sopenharmony_cixfs_defer_destroy_item_caches(void)
93362306a36Sopenharmony_ci{
93462306a36Sopenharmony_ci	xfs_attr_intent_destroy_cache();
93562306a36Sopenharmony_ci	xfs_extfree_intent_destroy_cache();
93662306a36Sopenharmony_ci	xfs_bmap_intent_destroy_cache();
93762306a36Sopenharmony_ci	xfs_refcount_intent_destroy_cache();
93862306a36Sopenharmony_ci	xfs_rmap_intent_destroy_cache();
93962306a36Sopenharmony_ci	xfs_defer_destroy_cache();
94062306a36Sopenharmony_ci}
941