xref: /kernel/linux/linux-5.10/fs/xfs/xfs_trans.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4 * Copyright (C) 2010 Red Hat, Inc.
5 * All Rights Reserved.
6 */
7#include "xfs.h"
8#include "xfs_fs.h"
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_log_priv.h"
13#include "xfs_trans_resv.h"
14#include "xfs_mount.h"
15#include "xfs_extent_busy.h"
16#include "xfs_quota.h"
17#include "xfs_trans.h"
18#include "xfs_trans_priv.h"
19#include "xfs_log.h"
20#include "xfs_trace.h"
21#include "xfs_error.h"
22#include "xfs_defer.h"
23
24kmem_zone_t	*xfs_trans_zone;
25
26#if defined(CONFIG_TRACEPOINTS)
27static void
28xfs_trans_trace_reservations(
29	struct xfs_mount	*mp)
30{
31	struct xfs_trans_res	resv;
32	struct xfs_trans_res	*res;
33	struct xfs_trans_res	*end_res;
34	int			i;
35
36	res = (struct xfs_trans_res *)M_RES(mp);
37	end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
38	for (i = 0; res < end_res; i++, res++)
39		trace_xfs_trans_resv_calc(mp, i, res);
40	xfs_log_get_max_trans_res(mp, &resv);
41	trace_xfs_trans_resv_calc(mp, -1, &resv);
42}
43#else
44# define xfs_trans_trace_reservations(mp)
45#endif
46
47/*
48 * Initialize the precomputed transaction reservation values
49 * in the mount structure.
50 */
51void
52xfs_trans_init(
53	struct xfs_mount	*mp)
54{
55	xfs_trans_resv_calc(mp, M_RES(mp));
56	xfs_trans_trace_reservations(mp);
57}
58
59/*
60 * Free the transaction structure.  If there is more clean up
61 * to do when the structure is freed, add it here.
62 */
63STATIC void
64xfs_trans_free(
65	struct xfs_trans	*tp)
66{
67	xfs_extent_busy_sort(&tp->t_busy);
68	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
69
70	trace_xfs_trans_free(tp, _RET_IP_);
71	xfs_trans_clear_context(tp);
72	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
73		sb_end_intwrite(tp->t_mountp->m_super);
74	xfs_trans_free_dqinfo(tp);
75	kmem_cache_free(xfs_trans_zone, tp);
76}
77
78/*
79 * This is called to create a new transaction which will share the
80 * permanent log reservation of the given transaction.  The remaining
81 * unused block and rt extent reservations are also inherited.  This
82 * implies that the original transaction is no longer allowed to allocate
83 * blocks.  Locks and log items, however, are no inherited.  They must
84 * be added to the new transaction explicitly.
85 */
86STATIC struct xfs_trans *
87xfs_trans_dup(
88	struct xfs_trans	*tp)
89{
90	struct xfs_trans	*ntp;
91
92	trace_xfs_trans_dup(tp, _RET_IP_);
93
94	ntp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
95
96	/*
97	 * Initialize the new transaction structure.
98	 */
99	ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
100	ntp->t_mountp = tp->t_mountp;
101	INIT_LIST_HEAD(&ntp->t_items);
102	INIT_LIST_HEAD(&ntp->t_busy);
103	INIT_LIST_HEAD(&ntp->t_dfops);
104	ntp->t_firstblock = NULLFSBLOCK;
105
106	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
107	ASSERT(tp->t_ticket != NULL);
108
109	ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
110		       (tp->t_flags & XFS_TRANS_RESERVE) |
111		       (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) |
112		       (tp->t_flags & XFS_TRANS_RES_FDBLKS);
113	/* We gave our writer reference to the new transaction */
114	tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
115	ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
116
117	ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
118	ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
119	tp->t_blk_res = tp->t_blk_res_used;
120
121	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
122	tp->t_rtx_res = tp->t_rtx_res_used;
123
124	xfs_trans_switch_context(tp, ntp);
125
126	/* move deferred ops over to the new tp */
127	xfs_defer_move(ntp, tp);
128
129	xfs_trans_dup_dqinfo(tp, ntp);
130	return ntp;
131}
132
133/*
134 * This is called to reserve free disk blocks and log space for the
135 * given transaction.  This must be done before allocating any resources
136 * within the transaction.
137 *
138 * This will return ENOSPC if there are not enough blocks available.
139 * It will sleep waiting for available log space.
140 * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
141 * is used by long running transactions.  If any one of the reservations
142 * fails then they will all be backed out.
143 *
144 * This does not do quota reservations. That typically is done by the
145 * caller afterwards.
146 */
147static int
148xfs_trans_reserve(
149	struct xfs_trans	*tp,
150	struct xfs_trans_res	*resp,
151	uint			blocks,
152	uint			rtextents)
153{
154	struct xfs_mount	*mp = tp->t_mountp;
155	int			error = 0;
156	bool			rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
157
158	/*
159	 * Attempt to reserve the needed disk blocks by decrementing
160	 * the number needed from the number available.  This will
161	 * fail if the count would go below zero.
162	 */
163	if (blocks > 0) {
164		error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
165		if (error != 0)
166			return -ENOSPC;
167		tp->t_blk_res += blocks;
168	}
169
170	/*
171	 * Reserve the log space needed for this transaction.
172	 */
173	if (resp->tr_logres > 0) {
174		bool	permanent = false;
175
176		ASSERT(tp->t_log_res == 0 ||
177		       tp->t_log_res == resp->tr_logres);
178		ASSERT(tp->t_log_count == 0 ||
179		       tp->t_log_count == resp->tr_logcount);
180
181		if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
182			tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
183			permanent = true;
184		} else {
185			ASSERT(tp->t_ticket == NULL);
186			ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
187		}
188
189		if (tp->t_ticket != NULL) {
190			ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
191			error = xfs_log_regrant(mp, tp->t_ticket);
192		} else {
193			error = xfs_log_reserve(mp,
194						resp->tr_logres,
195						resp->tr_logcount,
196						&tp->t_ticket, XFS_TRANSACTION,
197						permanent);
198		}
199
200		if (error)
201			goto undo_blocks;
202
203		tp->t_log_res = resp->tr_logres;
204		tp->t_log_count = resp->tr_logcount;
205	}
206
207	/*
208	 * Attempt to reserve the needed realtime extents by decrementing
209	 * the number needed from the number available.  This will
210	 * fail if the count would go below zero.
211	 */
212	if (rtextents > 0) {
213		error = xfs_mod_frextents(mp, -((int64_t)rtextents));
214		if (error) {
215			error = -ENOSPC;
216			goto undo_log;
217		}
218		tp->t_rtx_res += rtextents;
219	}
220
221	return 0;
222
223	/*
224	 * Error cases jump to one of these labels to undo any
225	 * reservations which have already been performed.
226	 */
227undo_log:
228	if (resp->tr_logres > 0) {
229		xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
230		tp->t_ticket = NULL;
231		tp->t_log_res = 0;
232		tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
233	}
234
235undo_blocks:
236	if (blocks > 0) {
237		xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
238		tp->t_blk_res = 0;
239	}
240	return error;
241}
242
243int
244xfs_trans_alloc(
245	struct xfs_mount	*mp,
246	struct xfs_trans_res	*resp,
247	uint			blocks,
248	uint			rtextents,
249	uint			flags,
250	struct xfs_trans	**tpp)
251{
252	struct xfs_trans	*tp;
253	int			error;
254
255	/*
256	 * Allocate the handle before we do our freeze accounting and setting up
257	 * GFP_NOFS allocation context so that we avoid lockdep false positives
258	 * by doing GFP_KERNEL allocations inside sb_start_intwrite().
259	 */
260	tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
261	if (!(flags & XFS_TRANS_NO_WRITECOUNT))
262		sb_start_intwrite(mp->m_super);
263	xfs_trans_set_context(tp);
264
265	/*
266	 * Zero-reservation ("empty") transactions can't modify anything, so
267	 * they're allowed to run while we're frozen.
268	 */
269	WARN_ON(resp->tr_logres > 0 &&
270		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
271	ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
272	       xfs_sb_version_haslazysbcount(&mp->m_sb));
273
274	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
275	tp->t_flags = flags;
276	tp->t_mountp = mp;
277	INIT_LIST_HEAD(&tp->t_items);
278	INIT_LIST_HEAD(&tp->t_busy);
279	INIT_LIST_HEAD(&tp->t_dfops);
280	tp->t_firstblock = NULLFSBLOCK;
281
282	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
283	if (error) {
284		xfs_trans_cancel(tp);
285		return error;
286	}
287
288	trace_xfs_trans_alloc(tp, _RET_IP_);
289
290	*tpp = tp;
291	return 0;
292}
293
294/*
295 * Create an empty transaction with no reservation.  This is a defensive
296 * mechanism for routines that query metadata without actually modifying them --
297 * if the metadata being queried is somehow cross-linked (think a btree block
298 * pointer that points higher in the tree), we risk deadlock.  However, blocks
299 * grabbed as part of a transaction can be re-grabbed.  The verifiers will
300 * notice the corrupt block and the operation will fail back to userspace
301 * without deadlocking.
302 *
303 * Note the zero-length reservation; this transaction MUST be cancelled without
304 * any dirty data.
305 *
306 * Callers should obtain freeze protection to avoid a conflict with fs freezing
307 * where we can be grabbing buffers at the same time that freeze is trying to
308 * drain the buffer LRU list.
309 */
310int
311xfs_trans_alloc_empty(
312	struct xfs_mount		*mp,
313	struct xfs_trans		**tpp)
314{
315	struct xfs_trans_res		resv = {0};
316
317	return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
318}
319
320/*
321 * Record the indicated change to the given field for application
322 * to the file system's superblock when the transaction commits.
323 * For now, just store the change in the transaction structure.
324 *
325 * Mark the transaction structure to indicate that the superblock
326 * needs to be updated before committing.
327 *
328 * Because we may not be keeping track of allocated/free inodes and
329 * used filesystem blocks in the superblock, we do not mark the
330 * superblock dirty in this transaction if we modify these fields.
331 * We still need to update the transaction deltas so that they get
332 * applied to the incore superblock, but we don't want them to
333 * cause the superblock to get locked and logged if these are the
334 * only fields in the superblock that the transaction modifies.
335 */
336void
337xfs_trans_mod_sb(
338	xfs_trans_t	*tp,
339	uint		field,
340	int64_t		delta)
341{
342	uint32_t	flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
343	xfs_mount_t	*mp = tp->t_mountp;
344
345	switch (field) {
346	case XFS_TRANS_SB_ICOUNT:
347		tp->t_icount_delta += delta;
348		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
349			flags &= ~XFS_TRANS_SB_DIRTY;
350		break;
351	case XFS_TRANS_SB_IFREE:
352		tp->t_ifree_delta += delta;
353		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
354			flags &= ~XFS_TRANS_SB_DIRTY;
355		break;
356	case XFS_TRANS_SB_FDBLOCKS:
357		/*
358		 * Track the number of blocks allocated in the transaction.
359		 * Make sure it does not exceed the number reserved. If so,
360		 * shutdown as this can lead to accounting inconsistency.
361		 */
362		if (delta < 0) {
363			tp->t_blk_res_used += (uint)-delta;
364			if (tp->t_blk_res_used > tp->t_blk_res)
365				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
366		} else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) {
367			int64_t	blkres_delta;
368
369			/*
370			 * Return freed blocks directly to the reservation
371			 * instead of the global pool, being careful not to
372			 * overflow the trans counter. This is used to preserve
373			 * reservation across chains of transaction rolls that
374			 * repeatedly free and allocate blocks.
375			 */
376			blkres_delta = min_t(int64_t, delta,
377					     UINT_MAX - tp->t_blk_res);
378			tp->t_blk_res += blkres_delta;
379			delta -= blkres_delta;
380		}
381		tp->t_fdblocks_delta += delta;
382		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
383			flags &= ~XFS_TRANS_SB_DIRTY;
384		break;
385	case XFS_TRANS_SB_RES_FDBLOCKS:
386		/*
387		 * The allocation has already been applied to the
388		 * in-core superblock's counter.  This should only
389		 * be applied to the on-disk superblock.
390		 */
391		tp->t_res_fdblocks_delta += delta;
392		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
393			flags &= ~XFS_TRANS_SB_DIRTY;
394		break;
395	case XFS_TRANS_SB_FREXTENTS:
396		/*
397		 * Track the number of blocks allocated in the
398		 * transaction.  Make sure it does not exceed the
399		 * number reserved.
400		 */
401		if (delta < 0) {
402			tp->t_rtx_res_used += (uint)-delta;
403			ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
404		}
405		tp->t_frextents_delta += delta;
406		break;
407	case XFS_TRANS_SB_RES_FREXTENTS:
408		/*
409		 * The allocation has already been applied to the
410		 * in-core superblock's counter.  This should only
411		 * be applied to the on-disk superblock.
412		 */
413		ASSERT(delta < 0);
414		tp->t_res_frextents_delta += delta;
415		break;
416	case XFS_TRANS_SB_DBLOCKS:
417		ASSERT(delta > 0);
418		tp->t_dblocks_delta += delta;
419		break;
420	case XFS_TRANS_SB_AGCOUNT:
421		ASSERT(delta > 0);
422		tp->t_agcount_delta += delta;
423		break;
424	case XFS_TRANS_SB_IMAXPCT:
425		tp->t_imaxpct_delta += delta;
426		break;
427	case XFS_TRANS_SB_REXTSIZE:
428		tp->t_rextsize_delta += delta;
429		break;
430	case XFS_TRANS_SB_RBMBLOCKS:
431		tp->t_rbmblocks_delta += delta;
432		break;
433	case XFS_TRANS_SB_RBLOCKS:
434		tp->t_rblocks_delta += delta;
435		break;
436	case XFS_TRANS_SB_REXTENTS:
437		tp->t_rextents_delta += delta;
438		break;
439	case XFS_TRANS_SB_REXTSLOG:
440		tp->t_rextslog_delta += delta;
441		break;
442	default:
443		ASSERT(0);
444		return;
445	}
446
447	tp->t_flags |= flags;
448}
449
450/*
451 * xfs_trans_apply_sb_deltas() is called from the commit code
452 * to bring the superblock buffer into the current transaction
453 * and modify it as requested by earlier calls to xfs_trans_mod_sb().
454 *
455 * For now we just look at each field allowed to change and change
456 * it if necessary.
457 */
458STATIC void
459xfs_trans_apply_sb_deltas(
460	xfs_trans_t	*tp)
461{
462	xfs_dsb_t	*sbp;
463	xfs_buf_t	*bp;
464	int		whole = 0;
465
466	bp = xfs_trans_getsb(tp);
467	sbp = bp->b_addr;
468
469	/*
470	 * Check that superblock mods match the mods made to AGF counters.
471	 */
472	ASSERT((tp->t_fdblocks_delta + tp->t_res_fdblocks_delta) ==
473	       (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
474		tp->t_ag_btree_delta));
475
476	/*
477	 * Only update the superblock counters if we are logging them
478	 */
479	if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
480		if (tp->t_icount_delta)
481			be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
482		if (tp->t_ifree_delta)
483			be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta);
484		if (tp->t_fdblocks_delta)
485			be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
486		if (tp->t_res_fdblocks_delta)
487			be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
488	}
489
490	if (tp->t_frextents_delta)
491		be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta);
492	if (tp->t_res_frextents_delta)
493		be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta);
494
495	if (tp->t_dblocks_delta) {
496		be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
497		whole = 1;
498	}
499	if (tp->t_agcount_delta) {
500		be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta);
501		whole = 1;
502	}
503	if (tp->t_imaxpct_delta) {
504		sbp->sb_imax_pct += tp->t_imaxpct_delta;
505		whole = 1;
506	}
507	if (tp->t_rextsize_delta) {
508		be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
509		whole = 1;
510	}
511	if (tp->t_rbmblocks_delta) {
512		be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
513		whole = 1;
514	}
515	if (tp->t_rblocks_delta) {
516		be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
517		whole = 1;
518	}
519	if (tp->t_rextents_delta) {
520		be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta);
521		whole = 1;
522	}
523	if (tp->t_rextslog_delta) {
524		sbp->sb_rextslog += tp->t_rextslog_delta;
525		whole = 1;
526	}
527
528	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
529	if (whole)
530		/*
531		 * Log the whole thing, the fields are noncontiguous.
532		 */
533		xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1);
534	else
535		/*
536		 * Since all the modifiable fields are contiguous, we
537		 * can get away with this.
538		 */
539		xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount),
540				  offsetof(xfs_dsb_t, sb_frextents) +
541				  sizeof(sbp->sb_frextents) - 1);
542}
543
544/*
545 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
546 * apply superblock counter changes to the in-core superblock.  The
547 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
548 * applied to the in-core superblock.  The idea is that that has already been
549 * done.
550 *
551 * If we are not logging superblock counters, then the inode allocated/free and
552 * used block counts are not updated in the on disk superblock. In this case,
553 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
554 * still need to update the incore superblock with the changes.
555 *
556 * Deltas for the inode count are +/-64, hence we use a large batch size of 128
557 * so we don't need to take the counter lock on every update.
558 */
559#define XFS_ICOUNT_BATCH	128
560
561void
562xfs_trans_unreserve_and_mod_sb(
563	struct xfs_trans	*tp)
564{
565	struct xfs_mount	*mp = tp->t_mountp;
566	bool			rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
567	int64_t			blkdelta = 0;
568	int64_t			rtxdelta = 0;
569	int64_t			idelta = 0;
570	int64_t			ifreedelta = 0;
571	int			error;
572
573	/* calculate deltas */
574	if (tp->t_blk_res > 0)
575		blkdelta = tp->t_blk_res;
576	if ((tp->t_fdblocks_delta != 0) &&
577	    (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
578	     (tp->t_flags & XFS_TRANS_SB_DIRTY)))
579	        blkdelta += tp->t_fdblocks_delta;
580
581	if (tp->t_rtx_res > 0)
582		rtxdelta = tp->t_rtx_res;
583	if ((tp->t_frextents_delta != 0) &&
584	    (tp->t_flags & XFS_TRANS_SB_DIRTY))
585		rtxdelta += tp->t_frextents_delta;
586
587	if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
588	     (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
589		idelta = tp->t_icount_delta;
590		ifreedelta = tp->t_ifree_delta;
591	}
592
593	/* apply the per-cpu counters */
594	if (blkdelta) {
595		error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
596		ASSERT(!error);
597	}
598
599	if (idelta) {
600		percpu_counter_add_batch(&mp->m_icount, idelta,
601					 XFS_ICOUNT_BATCH);
602		if (idelta < 0)
603			ASSERT(__percpu_counter_compare(&mp->m_icount, 0,
604							XFS_ICOUNT_BATCH) >= 0);
605	}
606
607	if (ifreedelta) {
608		percpu_counter_add(&mp->m_ifree, ifreedelta);
609		if (ifreedelta < 0)
610			ASSERT(percpu_counter_compare(&mp->m_ifree, 0) >= 0);
611	}
612
613	if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
614		return;
615
616	/* apply remaining deltas */
617	spin_lock(&mp->m_sb_lock);
618	mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta;
619	mp->m_sb.sb_icount += idelta;
620	mp->m_sb.sb_ifree += ifreedelta;
621	mp->m_sb.sb_frextents += rtxdelta;
622	mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
623	mp->m_sb.sb_agcount += tp->t_agcount_delta;
624	mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
625	mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
626	mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
627	mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
628	mp->m_sb.sb_rextents += tp->t_rextents_delta;
629	mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
630	spin_unlock(&mp->m_sb_lock);
631
632	/*
633	 * Debug checks outside of the spinlock so they don't lock up the
634	 * machine if they fail.
635	 */
636	ASSERT(mp->m_sb.sb_imax_pct >= 0);
637	ASSERT(mp->m_sb.sb_rextslog >= 0);
638	return;
639}
640
641/* Add the given log item to the transaction's list of log items. */
642void
643xfs_trans_add_item(
644	struct xfs_trans	*tp,
645	struct xfs_log_item	*lip)
646{
647	ASSERT(lip->li_mountp == tp->t_mountp);
648	ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
649	ASSERT(list_empty(&lip->li_trans));
650	ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
651
652	list_add_tail(&lip->li_trans, &tp->t_items);
653	trace_xfs_trans_add_item(tp, _RET_IP_);
654}
655
656/*
657 * Unlink the log item from the transaction. the log item is no longer
658 * considered dirty in this transaction, as the linked transaction has
659 * finished, either by abort or commit completion.
660 */
661void
662xfs_trans_del_item(
663	struct xfs_log_item	*lip)
664{
665	clear_bit(XFS_LI_DIRTY, &lip->li_flags);
666	list_del_init(&lip->li_trans);
667}
668
669/* Detach and unlock all of the items in a transaction */
670static void
671xfs_trans_free_items(
672	struct xfs_trans	*tp,
673	bool			abort)
674{
675	struct xfs_log_item	*lip, *next;
676
677	trace_xfs_trans_free_items(tp, _RET_IP_);
678
679	list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
680		xfs_trans_del_item(lip);
681		if (abort)
682			set_bit(XFS_LI_ABORTED, &lip->li_flags);
683		if (lip->li_ops->iop_release)
684			lip->li_ops->iop_release(lip);
685	}
686}
687
688static inline void
689xfs_log_item_batch_insert(
690	struct xfs_ail		*ailp,
691	struct xfs_ail_cursor	*cur,
692	struct xfs_log_item	**log_items,
693	int			nr_items,
694	xfs_lsn_t		commit_lsn)
695{
696	int	i;
697
698	spin_lock(&ailp->ail_lock);
699	/* xfs_trans_ail_update_bulk drops ailp->ail_lock */
700	xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
701
702	for (i = 0; i < nr_items; i++) {
703		struct xfs_log_item *lip = log_items[i];
704
705		if (lip->li_ops->iop_unpin)
706			lip->li_ops->iop_unpin(lip, 0);
707	}
708}
709
710/*
711 * Bulk operation version of xfs_trans_committed that takes a log vector of
712 * items to insert into the AIL. This uses bulk AIL insertion techniques to
713 * minimise lock traffic.
714 *
715 * If we are called with the aborted flag set, it is because a log write during
716 * a CIL checkpoint commit has failed. In this case, all the items in the
717 * checkpoint have already gone through iop_committed and iop_committing, which
718 * means that checkpoint commit abort handling is treated exactly the same
719 * as an iclog write error even though we haven't started any IO yet. Hence in
720 * this case all we need to do is iop_committed processing, followed by an
721 * iop_unpin(aborted) call.
722 *
723 * The AIL cursor is used to optimise the insert process. If commit_lsn is not
724 * at the end of the AIL, the insert cursor avoids the need to walk
725 * the AIL to find the insertion point on every xfs_log_item_batch_insert()
726 * call. This saves a lot of needless list walking and is a net win, even
727 * though it slightly increases that amount of AIL lock traffic to set it up
728 * and tear it down.
729 */
730void
731xfs_trans_committed_bulk(
732	struct xfs_ail		*ailp,
733	struct xfs_log_vec	*log_vector,
734	xfs_lsn_t		commit_lsn,
735	bool			aborted)
736{
737#define LOG_ITEM_BATCH_SIZE	32
738	struct xfs_log_item	*log_items[LOG_ITEM_BATCH_SIZE];
739	struct xfs_log_vec	*lv;
740	struct xfs_ail_cursor	cur;
741	int			i = 0;
742
743	spin_lock(&ailp->ail_lock);
744	xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
745	spin_unlock(&ailp->ail_lock);
746
747	/* unpin all the log items */
748	for (lv = log_vector; lv; lv = lv->lv_next ) {
749		struct xfs_log_item	*lip = lv->lv_item;
750		xfs_lsn_t		item_lsn;
751
752		if (aborted)
753			set_bit(XFS_LI_ABORTED, &lip->li_flags);
754
755		if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
756			lip->li_ops->iop_release(lip);
757			continue;
758		}
759
760		if (lip->li_ops->iop_committed)
761			item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
762		else
763			item_lsn = commit_lsn;
764
765		/* item_lsn of -1 means the item needs no further processing */
766		if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
767			continue;
768
769		/*
770		 * if we are aborting the operation, no point in inserting the
771		 * object into the AIL as we are in a shutdown situation.
772		 */
773		if (aborted) {
774			ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
775			if (lip->li_ops->iop_unpin)
776				lip->li_ops->iop_unpin(lip, 1);
777			continue;
778		}
779
780		if (item_lsn != commit_lsn) {
781
782			/*
783			 * Not a bulk update option due to unusual item_lsn.
784			 * Push into AIL immediately, rechecking the lsn once
785			 * we have the ail lock. Then unpin the item. This does
786			 * not affect the AIL cursor the bulk insert path is
787			 * using.
788			 */
789			spin_lock(&ailp->ail_lock);
790			if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
791				xfs_trans_ail_update(ailp, lip, item_lsn);
792			else
793				spin_unlock(&ailp->ail_lock);
794			if (lip->li_ops->iop_unpin)
795				lip->li_ops->iop_unpin(lip, 0);
796			continue;
797		}
798
799		/* Item is a candidate for bulk AIL insert.  */
800		log_items[i++] = lv->lv_item;
801		if (i >= LOG_ITEM_BATCH_SIZE) {
802			xfs_log_item_batch_insert(ailp, &cur, log_items,
803					LOG_ITEM_BATCH_SIZE, commit_lsn);
804			i = 0;
805		}
806	}
807
808	/* make sure we insert the remainder! */
809	if (i)
810		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
811
812	spin_lock(&ailp->ail_lock);
813	xfs_trans_ail_cursor_done(&cur);
814	spin_unlock(&ailp->ail_lock);
815}
816
817/*
818 * Commit the given transaction to the log.
819 *
820 * XFS disk error handling mechanism is not based on a typical
821 * transaction abort mechanism. Logically after the filesystem
822 * gets marked 'SHUTDOWN', we can't let any new transactions
823 * be durable - ie. committed to disk - because some metadata might
824 * be inconsistent. In such cases, this returns an error, and the
825 * caller may assume that all locked objects joined to the transaction
826 * have already been unlocked as if the commit had succeeded.
827 * Do not reference the transaction structure after this call.
828 */
829static int
830__xfs_trans_commit(
831	struct xfs_trans	*tp,
832	bool			regrant)
833{
834	struct xfs_mount	*mp = tp->t_mountp;
835	xfs_csn_t		commit_seq = 0;
836	int			error = 0;
837	int			sync = tp->t_flags & XFS_TRANS_SYNC;
838
839	trace_xfs_trans_commit(tp, _RET_IP_);
840
841	/*
842	 * Finish deferred items on final commit. Only permanent transactions
843	 * should ever have deferred ops.
844	 */
845	WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
846		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
847	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
848		error = xfs_defer_finish_noroll(&tp);
849		if (error)
850			goto out_unreserve;
851	}
852
853	/*
854	 * If there is nothing to be logged by the transaction,
855	 * then unlock all of the items associated with the
856	 * transaction and free the transaction structure.
857	 * Also make sure to return any reserved blocks to
858	 * the free pool.
859	 */
860	if (!(tp->t_flags & XFS_TRANS_DIRTY))
861		goto out_unreserve;
862
863	if (XFS_FORCED_SHUTDOWN(mp)) {
864		error = -EIO;
865		goto out_unreserve;
866	}
867
868	ASSERT(tp->t_ticket != NULL);
869
870	/*
871	 * If we need to update the superblock, then do it now.
872	 */
873	if (tp->t_flags & XFS_TRANS_SB_DIRTY)
874		xfs_trans_apply_sb_deltas(tp);
875	xfs_trans_apply_dquot_deltas(tp);
876
877	xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant);
878
879	xfs_trans_free(tp);
880
881	/*
882	 * If the transaction needs to be synchronous, then force the
883	 * log out now and wait for it.
884	 */
885	if (sync) {
886		error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
887		XFS_STATS_INC(mp, xs_trans_sync);
888	} else {
889		XFS_STATS_INC(mp, xs_trans_async);
890	}
891
892	return error;
893
894out_unreserve:
895	xfs_trans_unreserve_and_mod_sb(tp);
896
897	/*
898	 * It is indeed possible for the transaction to be not dirty but
899	 * the dqinfo portion to be.  All that means is that we have some
900	 * (non-persistent) quota reservations that need to be unreserved.
901	 */
902	xfs_trans_unreserve_and_mod_dquots(tp);
903	if (tp->t_ticket) {
904		if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log))
905			xfs_log_ticket_regrant(mp->m_log, tp->t_ticket);
906		else
907			xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
908		tp->t_ticket = NULL;
909	}
910	xfs_trans_free_items(tp, !!error);
911	xfs_trans_free(tp);
912
913	XFS_STATS_INC(mp, xs_trans_empty);
914	return error;
915}
916
917int
918xfs_trans_commit(
919	struct xfs_trans	*tp)
920{
921	return __xfs_trans_commit(tp, false);
922}
923
924/*
925 * Unlock all of the transaction's items and free the transaction.
926 * The transaction must not have modified any of its items, because
927 * there is no way to restore them to their previous state.
928 *
929 * If the transaction has made a log reservation, make sure to release
930 * it as well.
931 */
932void
933xfs_trans_cancel(
934	struct xfs_trans	*tp)
935{
936	struct xfs_mount	*mp = tp->t_mountp;
937	bool			dirty = (tp->t_flags & XFS_TRANS_DIRTY);
938
939	trace_xfs_trans_cancel(tp, _RET_IP_);
940
941	if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
942		xfs_defer_cancel(tp);
943
944	/*
945	 * See if the caller is relying on us to shut down the
946	 * filesystem.  This happens in paths where we detect
947	 * corruption and decide to give up.
948	 */
949	if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
950		XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
951		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
952	}
953#ifdef DEBUG
954	if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
955		struct xfs_log_item *lip;
956
957		list_for_each_entry(lip, &tp->t_items, li_trans)
958			ASSERT(!xlog_item_is_intent_done(lip));
959	}
960#endif
961	xfs_trans_unreserve_and_mod_sb(tp);
962	xfs_trans_unreserve_and_mod_dquots(tp);
963
964	if (tp->t_ticket) {
965		xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
966		tp->t_ticket = NULL;
967	}
968
969	xfs_trans_free_items(tp, dirty);
970	xfs_trans_free(tp);
971}
972
973/*
974 * Roll from one trans in the sequence of PERMANENT transactions to
975 * the next: permanent transactions are only flushed out when
976 * committed with xfs_trans_commit(), but we still want as soon
977 * as possible to let chunks of it go to the log. So we commit the
978 * chunk we've been working on and get a new transaction to continue.
979 */
980int
981xfs_trans_roll(
982	struct xfs_trans	**tpp)
983{
984	struct xfs_trans	*trans = *tpp;
985	struct xfs_trans_res	tres;
986	int			error;
987
988	trace_xfs_trans_roll(trans, _RET_IP_);
989
990	/*
991	 * Copy the critical parameters from one trans to the next.
992	 */
993	tres.tr_logres = trans->t_log_res;
994	tres.tr_logcount = trans->t_log_count;
995
996	*tpp = xfs_trans_dup(trans);
997
998	/*
999	 * Commit the current transaction.
1000	 * If this commit failed, then it'd just unlock those items that
1001	 * are not marked ihold. That also means that a filesystem shutdown
1002	 * is in progress. The caller takes the responsibility to cancel
1003	 * the duplicate transaction that gets returned.
1004	 */
1005	error = __xfs_trans_commit(trans, true);
1006	if (error)
1007		return error;
1008
1009	/*
1010	 * Reserve space in the log for the next transaction.
1011	 * This also pushes items in the "AIL", the list of logged items,
1012	 * out to disk if they are taking up space at the tail of the log
1013	 * that we want to use.  This requires that either nothing be locked
1014	 * across this call, or that anything that is locked be logged in
1015	 * the prior and the next transactions.
1016	 */
1017	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
1018	return xfs_trans_reserve(*tpp, &tres, 0, 0);
1019}
1020