162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc.
462306a36Sopenharmony_ci * All Rights Reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_log_format.h"
1162306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1262306a36Sopenharmony_ci#include "xfs_mount.h"
1362306a36Sopenharmony_ci#include "xfs_errortag.h"
1462306a36Sopenharmony_ci#include "xfs_error.h"
1562306a36Sopenharmony_ci#include "xfs_trans.h"
1662306a36Sopenharmony_ci#include "xfs_trans_priv.h"
1762306a36Sopenharmony_ci#include "xfs_log.h"
1862306a36Sopenharmony_ci#include "xfs_log_priv.h"
1962306a36Sopenharmony_ci#include "xfs_trace.h"
2062306a36Sopenharmony_ci#include "xfs_sysfs.h"
2162306a36Sopenharmony_ci#include "xfs_sb.h"
2262306a36Sopenharmony_ci#include "xfs_health.h"
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistruct kmem_cache	*xfs_log_ticket_cache;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/* Local miscellaneous function prototypes */
2762306a36Sopenharmony_ciSTATIC struct xlog *
2862306a36Sopenharmony_cixlog_alloc_log(
2962306a36Sopenharmony_ci	struct xfs_mount	*mp,
3062306a36Sopenharmony_ci	struct xfs_buftarg	*log_target,
3162306a36Sopenharmony_ci	xfs_daddr_t		blk_offset,
3262306a36Sopenharmony_ci	int			num_bblks);
3362306a36Sopenharmony_ciSTATIC int
3462306a36Sopenharmony_cixlog_space_left(
3562306a36Sopenharmony_ci	struct xlog		*log,
3662306a36Sopenharmony_ci	atomic64_t		*head);
3762306a36Sopenharmony_ciSTATIC void
3862306a36Sopenharmony_cixlog_dealloc_log(
3962306a36Sopenharmony_ci	struct xlog		*log);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/* local state machine functions */
4262306a36Sopenharmony_ciSTATIC void xlog_state_done_syncing(
4362306a36Sopenharmony_ci	struct xlog_in_core	*iclog);
4462306a36Sopenharmony_ciSTATIC void xlog_state_do_callback(
4562306a36Sopenharmony_ci	struct xlog		*log);
4662306a36Sopenharmony_ciSTATIC int
4762306a36Sopenharmony_cixlog_state_get_iclog_space(
4862306a36Sopenharmony_ci	struct xlog		*log,
4962306a36Sopenharmony_ci	int			len,
5062306a36Sopenharmony_ci	struct xlog_in_core	**iclog,
5162306a36Sopenharmony_ci	struct xlog_ticket	*ticket,
5262306a36Sopenharmony_ci	int			*logoffsetp);
5362306a36Sopenharmony_ciSTATIC void
5462306a36Sopenharmony_cixlog_grant_push_ail(
5562306a36Sopenharmony_ci	struct xlog		*log,
5662306a36Sopenharmony_ci	int			need_bytes);
5762306a36Sopenharmony_ciSTATIC void
5862306a36Sopenharmony_cixlog_sync(
5962306a36Sopenharmony_ci	struct xlog		*log,
6062306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
6162306a36Sopenharmony_ci	struct xlog_ticket	*ticket);
6262306a36Sopenharmony_ci#if defined(DEBUG)
6362306a36Sopenharmony_ciSTATIC void
6462306a36Sopenharmony_cixlog_verify_grant_tail(
6562306a36Sopenharmony_ci	struct xlog *log);
6662306a36Sopenharmony_ciSTATIC void
6762306a36Sopenharmony_cixlog_verify_iclog(
6862306a36Sopenharmony_ci	struct xlog		*log,
6962306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
7062306a36Sopenharmony_ci	int			count);
7162306a36Sopenharmony_ciSTATIC void
7262306a36Sopenharmony_cixlog_verify_tail_lsn(
7362306a36Sopenharmony_ci	struct xlog		*log,
7462306a36Sopenharmony_ci	struct xlog_in_core	*iclog);
7562306a36Sopenharmony_ci#else
7662306a36Sopenharmony_ci#define xlog_verify_grant_tail(a)
7762306a36Sopenharmony_ci#define xlog_verify_iclog(a,b,c)
7862306a36Sopenharmony_ci#define xlog_verify_tail_lsn(a,b)
7962306a36Sopenharmony_ci#endif
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ciSTATIC int
8262306a36Sopenharmony_cixlog_iclogs_empty(
8362306a36Sopenharmony_ci	struct xlog		*log);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic int
8662306a36Sopenharmony_cixfs_log_cover(struct xfs_mount *);
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci/*
8962306a36Sopenharmony_ci * We need to make sure the buffer pointer returned is naturally aligned for the
9062306a36Sopenharmony_ci * biggest basic data type we put into it. We have already accounted for this
9162306a36Sopenharmony_ci * padding when sizing the buffer.
9262306a36Sopenharmony_ci *
9362306a36Sopenharmony_ci * However, this padding does not get written into the log, and hence we have to
9462306a36Sopenharmony_ci * track the space used by the log vectors separately to prevent log space hangs
9562306a36Sopenharmony_ci * due to inaccurate accounting (i.e. a leak) of the used log space through the
9662306a36Sopenharmony_ci * CIL context ticket.
9762306a36Sopenharmony_ci *
9862306a36Sopenharmony_ci * We also add space for the xlog_op_header that describes this region in the
9962306a36Sopenharmony_ci * log. This prepends the data region we return to the caller to copy their data
10062306a36Sopenharmony_ci * into, so do all the static initialisation of the ophdr now. Because the ophdr
10162306a36Sopenharmony_ci * is not 8 byte aligned, we have to be careful to ensure that we align the
10262306a36Sopenharmony_ci * start of the buffer such that the region we return to the call is 8 byte
10362306a36Sopenharmony_ci * aligned and packed against the tail of the ophdr.
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_civoid *
10662306a36Sopenharmony_cixlog_prepare_iovec(
10762306a36Sopenharmony_ci	struct xfs_log_vec	*lv,
10862306a36Sopenharmony_ci	struct xfs_log_iovec	**vecp,
10962306a36Sopenharmony_ci	uint			type)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci	struct xfs_log_iovec	*vec = *vecp;
11262306a36Sopenharmony_ci	struct xlog_op_header	*oph;
11362306a36Sopenharmony_ci	uint32_t		len;
11462306a36Sopenharmony_ci	void			*buf;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	if (vec) {
11762306a36Sopenharmony_ci		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
11862306a36Sopenharmony_ci		vec++;
11962306a36Sopenharmony_ci	} else {
12062306a36Sopenharmony_ci		vec = &lv->lv_iovecp[0];
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	len = lv->lv_buf_len + sizeof(struct xlog_op_header);
12462306a36Sopenharmony_ci	if (!IS_ALIGNED(len, sizeof(uint64_t))) {
12562306a36Sopenharmony_ci		lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
12662306a36Sopenharmony_ci					sizeof(struct xlog_op_header);
12762306a36Sopenharmony_ci	}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	vec->i_type = type;
13062306a36Sopenharmony_ci	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	oph = vec->i_addr;
13362306a36Sopenharmony_ci	oph->oh_clientid = XFS_TRANSACTION;
13462306a36Sopenharmony_ci	oph->oh_res2 = 0;
13562306a36Sopenharmony_ci	oph->oh_flags = 0;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	buf = vec->i_addr + sizeof(struct xlog_op_header);
13862306a36Sopenharmony_ci	ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	*vecp = vec;
14162306a36Sopenharmony_ci	return buf;
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_cistatic void
14562306a36Sopenharmony_cixlog_grant_sub_space(
14662306a36Sopenharmony_ci	struct xlog		*log,
14762306a36Sopenharmony_ci	atomic64_t		*head,
14862306a36Sopenharmony_ci	int			bytes)
14962306a36Sopenharmony_ci{
15062306a36Sopenharmony_ci	int64_t	head_val = atomic64_read(head);
15162306a36Sopenharmony_ci	int64_t new, old;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	do {
15462306a36Sopenharmony_ci		int	cycle, space;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci		xlog_crack_grant_head_val(head_val, &cycle, &space);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci		space -= bytes;
15962306a36Sopenharmony_ci		if (space < 0) {
16062306a36Sopenharmony_ci			space += log->l_logsize;
16162306a36Sopenharmony_ci			cycle--;
16262306a36Sopenharmony_ci		}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci		old = head_val;
16562306a36Sopenharmony_ci		new = xlog_assign_grant_head_val(cycle, space);
16662306a36Sopenharmony_ci		head_val = atomic64_cmpxchg(head, old, new);
16762306a36Sopenharmony_ci	} while (head_val != old);
16862306a36Sopenharmony_ci}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic void
17162306a36Sopenharmony_cixlog_grant_add_space(
17262306a36Sopenharmony_ci	struct xlog		*log,
17362306a36Sopenharmony_ci	atomic64_t		*head,
17462306a36Sopenharmony_ci	int			bytes)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	int64_t	head_val = atomic64_read(head);
17762306a36Sopenharmony_ci	int64_t new, old;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	do {
18062306a36Sopenharmony_ci		int		tmp;
18162306a36Sopenharmony_ci		int		cycle, space;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci		xlog_crack_grant_head_val(head_val, &cycle, &space);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci		tmp = log->l_logsize - space;
18662306a36Sopenharmony_ci		if (tmp > bytes)
18762306a36Sopenharmony_ci			space += bytes;
18862306a36Sopenharmony_ci		else {
18962306a36Sopenharmony_ci			space = bytes - tmp;
19062306a36Sopenharmony_ci			cycle++;
19162306a36Sopenharmony_ci		}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci		old = head_val;
19462306a36Sopenharmony_ci		new = xlog_assign_grant_head_val(cycle, space);
19562306a36Sopenharmony_ci		head_val = atomic64_cmpxchg(head, old, new);
19662306a36Sopenharmony_ci	} while (head_val != old);
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ciSTATIC void
20062306a36Sopenharmony_cixlog_grant_head_init(
20162306a36Sopenharmony_ci	struct xlog_grant_head	*head)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	xlog_assign_grant_head(&head->grant, 1, 0);
20462306a36Sopenharmony_ci	INIT_LIST_HEAD(&head->waiters);
20562306a36Sopenharmony_ci	spin_lock_init(&head->lock);
20662306a36Sopenharmony_ci}
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ciSTATIC void
20962306a36Sopenharmony_cixlog_grant_head_wake_all(
21062306a36Sopenharmony_ci	struct xlog_grant_head	*head)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	struct xlog_ticket	*tic;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	spin_lock(&head->lock);
21562306a36Sopenharmony_ci	list_for_each_entry(tic, &head->waiters, t_queue)
21662306a36Sopenharmony_ci		wake_up_process(tic->t_task);
21762306a36Sopenharmony_ci	spin_unlock(&head->lock);
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cistatic inline int
22162306a36Sopenharmony_cixlog_ticket_reservation(
22262306a36Sopenharmony_ci	struct xlog		*log,
22362306a36Sopenharmony_ci	struct xlog_grant_head	*head,
22462306a36Sopenharmony_ci	struct xlog_ticket	*tic)
22562306a36Sopenharmony_ci{
22662306a36Sopenharmony_ci	if (head == &log->l_write_head) {
22762306a36Sopenharmony_ci		ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
22862306a36Sopenharmony_ci		return tic->t_unit_res;
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	if (tic->t_flags & XLOG_TIC_PERM_RESERV)
23262306a36Sopenharmony_ci		return tic->t_unit_res * tic->t_cnt;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	return tic->t_unit_res;
23562306a36Sopenharmony_ci}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ciSTATIC bool
23862306a36Sopenharmony_cixlog_grant_head_wake(
23962306a36Sopenharmony_ci	struct xlog		*log,
24062306a36Sopenharmony_ci	struct xlog_grant_head	*head,
24162306a36Sopenharmony_ci	int			*free_bytes)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	struct xlog_ticket	*tic;
24462306a36Sopenharmony_ci	int			need_bytes;
24562306a36Sopenharmony_ci	bool			woken_task = false;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	list_for_each_entry(tic, &head->waiters, t_queue) {
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci		/*
25062306a36Sopenharmony_ci		 * There is a chance that the size of the CIL checkpoints in
25162306a36Sopenharmony_ci		 * progress at the last AIL push target calculation resulted in
25262306a36Sopenharmony_ci		 * limiting the target to the log head (l_last_sync_lsn) at the
25362306a36Sopenharmony_ci		 * time. This may not reflect where the log head is now as the
25462306a36Sopenharmony_ci		 * CIL checkpoints may have completed.
25562306a36Sopenharmony_ci		 *
25662306a36Sopenharmony_ci		 * Hence when we are woken here, it may be that the head of the
25762306a36Sopenharmony_ci		 * log that has moved rather than the tail. As the tail didn't
25862306a36Sopenharmony_ci		 * move, there still won't be space available for the
25962306a36Sopenharmony_ci		 * reservation we require.  However, if the AIL has already
26062306a36Sopenharmony_ci		 * pushed to the target defined by the old log head location, we
26162306a36Sopenharmony_ci		 * will hang here waiting for something else to update the AIL
26262306a36Sopenharmony_ci		 * push target.
26362306a36Sopenharmony_ci		 *
26462306a36Sopenharmony_ci		 * Therefore, if there isn't space to wake the first waiter on
26562306a36Sopenharmony_ci		 * the grant head, we need to push the AIL again to ensure the
26662306a36Sopenharmony_ci		 * target reflects both the current log tail and log head
26762306a36Sopenharmony_ci		 * position before we wait for the tail to move again.
26862306a36Sopenharmony_ci		 */
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci		need_bytes = xlog_ticket_reservation(log, head, tic);
27162306a36Sopenharmony_ci		if (*free_bytes < need_bytes) {
27262306a36Sopenharmony_ci			if (!woken_task)
27362306a36Sopenharmony_ci				xlog_grant_push_ail(log, need_bytes);
27462306a36Sopenharmony_ci			return false;
27562306a36Sopenharmony_ci		}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci		*free_bytes -= need_bytes;
27862306a36Sopenharmony_ci		trace_xfs_log_grant_wake_up(log, tic);
27962306a36Sopenharmony_ci		wake_up_process(tic->t_task);
28062306a36Sopenharmony_ci		woken_task = true;
28162306a36Sopenharmony_ci	}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	return true;
28462306a36Sopenharmony_ci}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ciSTATIC int
28762306a36Sopenharmony_cixlog_grant_head_wait(
28862306a36Sopenharmony_ci	struct xlog		*log,
28962306a36Sopenharmony_ci	struct xlog_grant_head	*head,
29062306a36Sopenharmony_ci	struct xlog_ticket	*tic,
29162306a36Sopenharmony_ci	int			need_bytes) __releases(&head->lock)
29262306a36Sopenharmony_ci					    __acquires(&head->lock)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	list_add_tail(&tic->t_queue, &head->waiters);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	do {
29762306a36Sopenharmony_ci		if (xlog_is_shutdown(log))
29862306a36Sopenharmony_ci			goto shutdown;
29962306a36Sopenharmony_ci		xlog_grant_push_ail(log, need_bytes);
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci		__set_current_state(TASK_UNINTERRUPTIBLE);
30262306a36Sopenharmony_ci		spin_unlock(&head->lock);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci		XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci		trace_xfs_log_grant_sleep(log, tic);
30762306a36Sopenharmony_ci		schedule();
30862306a36Sopenharmony_ci		trace_xfs_log_grant_wake(log, tic);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci		spin_lock(&head->lock);
31162306a36Sopenharmony_ci		if (xlog_is_shutdown(log))
31262306a36Sopenharmony_ci			goto shutdown;
31362306a36Sopenharmony_ci	} while (xlog_space_left(log, &head->grant) < need_bytes);
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	list_del_init(&tic->t_queue);
31662306a36Sopenharmony_ci	return 0;
31762306a36Sopenharmony_cishutdown:
31862306a36Sopenharmony_ci	list_del_init(&tic->t_queue);
31962306a36Sopenharmony_ci	return -EIO;
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/*
32362306a36Sopenharmony_ci * Atomically get the log space required for a log ticket.
32462306a36Sopenharmony_ci *
32562306a36Sopenharmony_ci * Once a ticket gets put onto head->waiters, it will only return after the
32662306a36Sopenharmony_ci * needed reservation is satisfied.
32762306a36Sopenharmony_ci *
32862306a36Sopenharmony_ci * This function is structured so that it has a lock free fast path. This is
32962306a36Sopenharmony_ci * necessary because every new transaction reservation will come through this
33062306a36Sopenharmony_ci * path. Hence any lock will be globally hot if we take it unconditionally on
33162306a36Sopenharmony_ci * every pass.
33262306a36Sopenharmony_ci *
33362306a36Sopenharmony_ci * As tickets are only ever moved on and off head->waiters under head->lock, we
33462306a36Sopenharmony_ci * only need to take that lock if we are going to add the ticket to the queue
33562306a36Sopenharmony_ci * and sleep. We can avoid taking the lock if the ticket was never added to
33662306a36Sopenharmony_ci * head->waiters because the t_queue list head will be empty and we hold the
33762306a36Sopenharmony_ci * only reference to it so it can safely be checked unlocked.
33862306a36Sopenharmony_ci */
33962306a36Sopenharmony_ciSTATIC int
34062306a36Sopenharmony_cixlog_grant_head_check(
34162306a36Sopenharmony_ci	struct xlog		*log,
34262306a36Sopenharmony_ci	struct xlog_grant_head	*head,
34362306a36Sopenharmony_ci	struct xlog_ticket	*tic,
34462306a36Sopenharmony_ci	int			*need_bytes)
34562306a36Sopenharmony_ci{
34662306a36Sopenharmony_ci	int			free_bytes;
34762306a36Sopenharmony_ci	int			error = 0;
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	ASSERT(!xlog_in_recovery(log));
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	/*
35262306a36Sopenharmony_ci	 * If there are other waiters on the queue then give them a chance at
35362306a36Sopenharmony_ci	 * logspace before us.  Wake up the first waiters, if we do not wake
35462306a36Sopenharmony_ci	 * up all the waiters then go to sleep waiting for more free space,
35562306a36Sopenharmony_ci	 * otherwise try to get some space for this transaction.
35662306a36Sopenharmony_ci	 */
35762306a36Sopenharmony_ci	*need_bytes = xlog_ticket_reservation(log, head, tic);
35862306a36Sopenharmony_ci	free_bytes = xlog_space_left(log, &head->grant);
35962306a36Sopenharmony_ci	if (!list_empty_careful(&head->waiters)) {
36062306a36Sopenharmony_ci		spin_lock(&head->lock);
36162306a36Sopenharmony_ci		if (!xlog_grant_head_wake(log, head, &free_bytes) ||
36262306a36Sopenharmony_ci		    free_bytes < *need_bytes) {
36362306a36Sopenharmony_ci			error = xlog_grant_head_wait(log, head, tic,
36462306a36Sopenharmony_ci						     *need_bytes);
36562306a36Sopenharmony_ci		}
36662306a36Sopenharmony_ci		spin_unlock(&head->lock);
36762306a36Sopenharmony_ci	} else if (free_bytes < *need_bytes) {
36862306a36Sopenharmony_ci		spin_lock(&head->lock);
36962306a36Sopenharmony_ci		error = xlog_grant_head_wait(log, head, tic, *need_bytes);
37062306a36Sopenharmony_ci		spin_unlock(&head->lock);
37162306a36Sopenharmony_ci	}
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	return error;
37462306a36Sopenharmony_ci}
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_cibool
37762306a36Sopenharmony_cixfs_log_writable(
37862306a36Sopenharmony_ci	struct xfs_mount	*mp)
37962306a36Sopenharmony_ci{
38062306a36Sopenharmony_ci	/*
38162306a36Sopenharmony_ci	 * Do not write to the log on norecovery mounts, if the data or log
38262306a36Sopenharmony_ci	 * devices are read-only, or if the filesystem is shutdown. Read-only
38362306a36Sopenharmony_ci	 * mounts allow internal writes for log recovery and unmount purposes,
38462306a36Sopenharmony_ci	 * so don't restrict that case.
38562306a36Sopenharmony_ci	 */
38662306a36Sopenharmony_ci	if (xfs_has_norecovery(mp))
38762306a36Sopenharmony_ci		return false;
38862306a36Sopenharmony_ci	if (xfs_readonly_buftarg(mp->m_ddev_targp))
38962306a36Sopenharmony_ci		return false;
39062306a36Sopenharmony_ci	if (xfs_readonly_buftarg(mp->m_log->l_targ))
39162306a36Sopenharmony_ci		return false;
39262306a36Sopenharmony_ci	if (xlog_is_shutdown(mp->m_log))
39362306a36Sopenharmony_ci		return false;
39462306a36Sopenharmony_ci	return true;
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci/*
39862306a36Sopenharmony_ci * Replenish the byte reservation required by moving the grant write head.
39962306a36Sopenharmony_ci */
40062306a36Sopenharmony_ciint
40162306a36Sopenharmony_cixfs_log_regrant(
40262306a36Sopenharmony_ci	struct xfs_mount	*mp,
40362306a36Sopenharmony_ci	struct xlog_ticket	*tic)
40462306a36Sopenharmony_ci{
40562306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
40662306a36Sopenharmony_ci	int			need_bytes;
40762306a36Sopenharmony_ci	int			error = 0;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
41062306a36Sopenharmony_ci		return -EIO;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_try_logspace);
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	/*
41562306a36Sopenharmony_ci	 * This is a new transaction on the ticket, so we need to change the
41662306a36Sopenharmony_ci	 * transaction ID so that the next transaction has a different TID in
41762306a36Sopenharmony_ci	 * the log. Just add one to the existing tid so that we can see chains
41862306a36Sopenharmony_ci	 * of rolling transactions in the log easily.
41962306a36Sopenharmony_ci	 */
42062306a36Sopenharmony_ci	tic->t_tid++;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	xlog_grant_push_ail(log, tic->t_unit_res);
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	tic->t_curr_res = tic->t_unit_res;
42562306a36Sopenharmony_ci	if (tic->t_cnt > 0)
42662306a36Sopenharmony_ci		return 0;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	trace_xfs_log_regrant(log, tic);
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	error = xlog_grant_head_check(log, &log->l_write_head, tic,
43162306a36Sopenharmony_ci				      &need_bytes);
43262306a36Sopenharmony_ci	if (error)
43362306a36Sopenharmony_ci		goto out_error;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
43662306a36Sopenharmony_ci	trace_xfs_log_regrant_exit(log, tic);
43762306a36Sopenharmony_ci	xlog_verify_grant_tail(log);
43862306a36Sopenharmony_ci	return 0;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ciout_error:
44162306a36Sopenharmony_ci	/*
44262306a36Sopenharmony_ci	 * If we are failing, make sure the ticket doesn't have any current
44362306a36Sopenharmony_ci	 * reservations.  We don't want to add this back when the ticket/
44462306a36Sopenharmony_ci	 * transaction gets cancelled.
44562306a36Sopenharmony_ci	 */
44662306a36Sopenharmony_ci	tic->t_curr_res = 0;
44762306a36Sopenharmony_ci	tic->t_cnt = 0;	/* ungrant will give back unit_res * t_cnt. */
44862306a36Sopenharmony_ci	return error;
44962306a36Sopenharmony_ci}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci/*
45262306a36Sopenharmony_ci * Reserve log space and return a ticket corresponding to the reservation.
45362306a36Sopenharmony_ci *
45462306a36Sopenharmony_ci * Each reservation is going to reserve extra space for a log record header.
45562306a36Sopenharmony_ci * When writes happen to the on-disk log, we don't subtract the length of the
45662306a36Sopenharmony_ci * log record header from any reservation.  By wasting space in each
45762306a36Sopenharmony_ci * reservation, we prevent over allocation problems.
45862306a36Sopenharmony_ci */
45962306a36Sopenharmony_ciint
46062306a36Sopenharmony_cixfs_log_reserve(
46162306a36Sopenharmony_ci	struct xfs_mount	*mp,
46262306a36Sopenharmony_ci	int			unit_bytes,
46362306a36Sopenharmony_ci	int			cnt,
46462306a36Sopenharmony_ci	struct xlog_ticket	**ticp,
46562306a36Sopenharmony_ci	bool			permanent)
46662306a36Sopenharmony_ci{
46762306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
46862306a36Sopenharmony_ci	struct xlog_ticket	*tic;
46962306a36Sopenharmony_ci	int			need_bytes;
47062306a36Sopenharmony_ci	int			error = 0;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
47362306a36Sopenharmony_ci		return -EIO;
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_try_logspace);
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	ASSERT(*ticp == NULL);
47862306a36Sopenharmony_ci	tic = xlog_ticket_alloc(log, unit_bytes, cnt, permanent);
47962306a36Sopenharmony_ci	*ticp = tic;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
48262306a36Sopenharmony_ci					    : tic->t_unit_res);
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	trace_xfs_log_reserve(log, tic);
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
48762306a36Sopenharmony_ci				      &need_bytes);
48862306a36Sopenharmony_ci	if (error)
48962306a36Sopenharmony_ci		goto out_error;
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
49262306a36Sopenharmony_ci	xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
49362306a36Sopenharmony_ci	trace_xfs_log_reserve_exit(log, tic);
49462306a36Sopenharmony_ci	xlog_verify_grant_tail(log);
49562306a36Sopenharmony_ci	return 0;
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ciout_error:
49862306a36Sopenharmony_ci	/*
49962306a36Sopenharmony_ci	 * If we are failing, make sure the ticket doesn't have any current
50062306a36Sopenharmony_ci	 * reservations.  We don't want to add this back when the ticket/
50162306a36Sopenharmony_ci	 * transaction gets cancelled.
50262306a36Sopenharmony_ci	 */
50362306a36Sopenharmony_ci	tic->t_curr_res = 0;
50462306a36Sopenharmony_ci	tic->t_cnt = 0;	/* ungrant will give back unit_res * t_cnt. */
50562306a36Sopenharmony_ci	return error;
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci/*
50962306a36Sopenharmony_ci * Run all the pending iclog callbacks and wake log force waiters and iclog
51062306a36Sopenharmony_ci * space waiters so they can process the newly set shutdown state. We really
51162306a36Sopenharmony_ci * don't care what order we process callbacks here because the log is shut down
51262306a36Sopenharmony_ci * and so state cannot change on disk anymore. However, we cannot wake waiters
51362306a36Sopenharmony_ci * until the callbacks have been processed because we may be in unmount and
51462306a36Sopenharmony_ci * we must ensure that all AIL operations the callbacks perform have completed
51562306a36Sopenharmony_ci * before we tear down the AIL.
51662306a36Sopenharmony_ci *
51762306a36Sopenharmony_ci * We avoid processing actively referenced iclogs so that we don't run callbacks
51862306a36Sopenharmony_ci * while the iclog owner might still be preparing the iclog for IO submssion.
51962306a36Sopenharmony_ci * These will be caught by xlog_state_iclog_release() and call this function
52062306a36Sopenharmony_ci * again to process any callbacks that may have been added to that iclog.
52162306a36Sopenharmony_ci */
52262306a36Sopenharmony_cistatic void
52362306a36Sopenharmony_cixlog_state_shutdown_callbacks(
52462306a36Sopenharmony_ci	struct xlog		*log)
52562306a36Sopenharmony_ci{
52662306a36Sopenharmony_ci	struct xlog_in_core	*iclog;
52762306a36Sopenharmony_ci	LIST_HEAD(cb_list);
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	iclog = log->l_iclog;
53062306a36Sopenharmony_ci	do {
53162306a36Sopenharmony_ci		if (atomic_read(&iclog->ic_refcnt)) {
53262306a36Sopenharmony_ci			/* Reference holder will re-run iclog callbacks. */
53362306a36Sopenharmony_ci			continue;
53462306a36Sopenharmony_ci		}
53562306a36Sopenharmony_ci		list_splice_init(&iclog->ic_callbacks, &cb_list);
53662306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci		xlog_cil_process_committed(&cb_list);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci		spin_lock(&log->l_icloglock);
54162306a36Sopenharmony_ci		wake_up_all(&iclog->ic_write_wait);
54262306a36Sopenharmony_ci		wake_up_all(&iclog->ic_force_wait);
54362306a36Sopenharmony_ci	} while ((iclog = iclog->ic_next) != log->l_iclog);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	wake_up_all(&log->l_flush_wait);
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci/*
54962306a36Sopenharmony_ci * Flush iclog to disk if this is the last reference to the given iclog and the
55062306a36Sopenharmony_ci * it is in the WANT_SYNC state.
55162306a36Sopenharmony_ci *
55262306a36Sopenharmony_ci * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the
55362306a36Sopenharmony_ci * log tail is updated correctly. NEED_FUA indicates that the iclog will be
55462306a36Sopenharmony_ci * written to stable storage, and implies that a commit record is contained
55562306a36Sopenharmony_ci * within the iclog. We need to ensure that the log tail does not move beyond
55662306a36Sopenharmony_ci * the tail that the first commit record in the iclog ordered against, otherwise
55762306a36Sopenharmony_ci * correct recovery of that checkpoint becomes dependent on future operations
55862306a36Sopenharmony_ci * performed on this iclog.
55962306a36Sopenharmony_ci *
56062306a36Sopenharmony_ci * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the
56162306a36Sopenharmony_ci * current tail into iclog. Once the iclog tail is set, future operations must
56262306a36Sopenharmony_ci * not modify it, otherwise they potentially violate ordering constraints for
56362306a36Sopenharmony_ci * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in
56462306a36Sopenharmony_ci * the iclog will get zeroed on activation of the iclog after sync, so we
56562306a36Sopenharmony_ci * always capture the tail lsn on the iclog on the first NEED_FUA release
56662306a36Sopenharmony_ci * regardless of the number of active reference counts on this iclog.
56762306a36Sopenharmony_ci */
56862306a36Sopenharmony_ciint
56962306a36Sopenharmony_cixlog_state_release_iclog(
57062306a36Sopenharmony_ci	struct xlog		*log,
57162306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
57262306a36Sopenharmony_ci	struct xlog_ticket	*ticket)
57362306a36Sopenharmony_ci{
57462306a36Sopenharmony_ci	xfs_lsn_t		tail_lsn;
57562306a36Sopenharmony_ci	bool			last_ref;
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	lockdep_assert_held(&log->l_icloglock);
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	trace_xlog_iclog_release(iclog, _RET_IP_);
58062306a36Sopenharmony_ci	/*
58162306a36Sopenharmony_ci	 * Grabbing the current log tail needs to be atomic w.r.t. the writing
58262306a36Sopenharmony_ci	 * of the tail LSN into the iclog so we guarantee that the log tail does
58362306a36Sopenharmony_ci	 * not move between the first time we know that the iclog needs to be
58462306a36Sopenharmony_ci	 * made stable and when we eventually submit it.
58562306a36Sopenharmony_ci	 */
58662306a36Sopenharmony_ci	if ((iclog->ic_state == XLOG_STATE_WANT_SYNC ||
58762306a36Sopenharmony_ci	     (iclog->ic_flags & XLOG_ICL_NEED_FUA)) &&
58862306a36Sopenharmony_ci	    !iclog->ic_header.h_tail_lsn) {
58962306a36Sopenharmony_ci		tail_lsn = xlog_assign_tail_lsn(log->l_mp);
59062306a36Sopenharmony_ci		iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
59162306a36Sopenharmony_ci	}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	last_ref = atomic_dec_and_test(&iclog->ic_refcnt);
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	if (xlog_is_shutdown(log)) {
59662306a36Sopenharmony_ci		/*
59762306a36Sopenharmony_ci		 * If there are no more references to this iclog, process the
59862306a36Sopenharmony_ci		 * pending iclog callbacks that were waiting on the release of
59962306a36Sopenharmony_ci		 * this iclog.
60062306a36Sopenharmony_ci		 */
60162306a36Sopenharmony_ci		if (last_ref)
60262306a36Sopenharmony_ci			xlog_state_shutdown_callbacks(log);
60362306a36Sopenharmony_ci		return -EIO;
60462306a36Sopenharmony_ci	}
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	if (!last_ref)
60762306a36Sopenharmony_ci		return 0;
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	if (iclog->ic_state != XLOG_STATE_WANT_SYNC) {
61062306a36Sopenharmony_ci		ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
61162306a36Sopenharmony_ci		return 0;
61262306a36Sopenharmony_ci	}
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	iclog->ic_state = XLOG_STATE_SYNCING;
61562306a36Sopenharmony_ci	xlog_verify_tail_lsn(log, iclog);
61662306a36Sopenharmony_ci	trace_xlog_iclog_syncing(iclog, _RET_IP_);
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
61962306a36Sopenharmony_ci	xlog_sync(log, iclog, ticket);
62062306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
62162306a36Sopenharmony_ci	return 0;
62262306a36Sopenharmony_ci}
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci/*
62562306a36Sopenharmony_ci * Mount a log filesystem
62662306a36Sopenharmony_ci *
62762306a36Sopenharmony_ci * mp		- ubiquitous xfs mount point structure
62862306a36Sopenharmony_ci * log_target	- buftarg of on-disk log device
62962306a36Sopenharmony_ci * blk_offset	- Start block # where block size is 512 bytes (BBSIZE)
63062306a36Sopenharmony_ci * num_bblocks	- Number of BBSIZE blocks in on-disk log
63162306a36Sopenharmony_ci *
63262306a36Sopenharmony_ci * Return error or zero.
63362306a36Sopenharmony_ci */
63462306a36Sopenharmony_ciint
63562306a36Sopenharmony_cixfs_log_mount(
63662306a36Sopenharmony_ci	xfs_mount_t	*mp,
63762306a36Sopenharmony_ci	xfs_buftarg_t	*log_target,
63862306a36Sopenharmony_ci	xfs_daddr_t	blk_offset,
63962306a36Sopenharmony_ci	int		num_bblks)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	struct xlog	*log;
64262306a36Sopenharmony_ci	int		error = 0;
64362306a36Sopenharmony_ci	int		min_logfsbs;
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	if (!xfs_has_norecovery(mp)) {
64662306a36Sopenharmony_ci		xfs_notice(mp, "Mounting V%d Filesystem %pU",
64762306a36Sopenharmony_ci			   XFS_SB_VERSION_NUM(&mp->m_sb),
64862306a36Sopenharmony_ci			   &mp->m_sb.sb_uuid);
64962306a36Sopenharmony_ci	} else {
65062306a36Sopenharmony_ci		xfs_notice(mp,
65162306a36Sopenharmony_ci"Mounting V%d filesystem %pU in no-recovery mode. Filesystem will be inconsistent.",
65262306a36Sopenharmony_ci			   XFS_SB_VERSION_NUM(&mp->m_sb),
65362306a36Sopenharmony_ci			   &mp->m_sb.sb_uuid);
65462306a36Sopenharmony_ci		ASSERT(xfs_is_readonly(mp));
65562306a36Sopenharmony_ci	}
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
65862306a36Sopenharmony_ci	if (IS_ERR(log)) {
65962306a36Sopenharmony_ci		error = PTR_ERR(log);
66062306a36Sopenharmony_ci		goto out;
66162306a36Sopenharmony_ci	}
66262306a36Sopenharmony_ci	mp->m_log = log;
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci	/*
66562306a36Sopenharmony_ci	 * Now that we have set up the log and it's internal geometry
66662306a36Sopenharmony_ci	 * parameters, we can validate the given log space and drop a critical
66762306a36Sopenharmony_ci	 * message via syslog if the log size is too small. A log that is too
66862306a36Sopenharmony_ci	 * small can lead to unexpected situations in transaction log space
66962306a36Sopenharmony_ci	 * reservation stage. The superblock verifier has already validated all
67062306a36Sopenharmony_ci	 * the other log geometry constraints, so we don't have to check those
67162306a36Sopenharmony_ci	 * here.
67262306a36Sopenharmony_ci	 *
67362306a36Sopenharmony_ci	 * Note: For v4 filesystems, we can't just reject the mount if the
67462306a36Sopenharmony_ci	 * validation fails.  This would mean that people would have to
67562306a36Sopenharmony_ci	 * downgrade their kernel just to remedy the situation as there is no
67662306a36Sopenharmony_ci	 * way to grow the log (short of black magic surgery with xfs_db).
67762306a36Sopenharmony_ci	 *
67862306a36Sopenharmony_ci	 * We can, however, reject mounts for V5 format filesystems, as the
67962306a36Sopenharmony_ci	 * mkfs binary being used to make the filesystem should never create a
68062306a36Sopenharmony_ci	 * filesystem with a log that is too small.
68162306a36Sopenharmony_ci	 */
68262306a36Sopenharmony_ci	min_logfsbs = xfs_log_calc_minimum_size(mp);
68362306a36Sopenharmony_ci	if (mp->m_sb.sb_logblocks < min_logfsbs) {
68462306a36Sopenharmony_ci		xfs_warn(mp,
68562306a36Sopenharmony_ci		"Log size %d blocks too small, minimum size is %d blocks",
68662306a36Sopenharmony_ci			 mp->m_sb.sb_logblocks, min_logfsbs);
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci		/*
68962306a36Sopenharmony_ci		 * Log check errors are always fatal on v5; or whenever bad
69062306a36Sopenharmony_ci		 * metadata leads to a crash.
69162306a36Sopenharmony_ci		 */
69262306a36Sopenharmony_ci		if (xfs_has_crc(mp)) {
69362306a36Sopenharmony_ci			xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
69462306a36Sopenharmony_ci			ASSERT(0);
69562306a36Sopenharmony_ci			error = -EINVAL;
69662306a36Sopenharmony_ci			goto out_free_log;
69762306a36Sopenharmony_ci		}
69862306a36Sopenharmony_ci		xfs_crit(mp, "Log size out of supported range.");
69962306a36Sopenharmony_ci		xfs_crit(mp,
70062306a36Sopenharmony_ci"Continuing onwards, but if log hangs are experienced then please report this message in the bug report.");
70162306a36Sopenharmony_ci	}
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	/*
70462306a36Sopenharmony_ci	 * Initialize the AIL now we have a log.
70562306a36Sopenharmony_ci	 */
70662306a36Sopenharmony_ci	error = xfs_trans_ail_init(mp);
70762306a36Sopenharmony_ci	if (error) {
70862306a36Sopenharmony_ci		xfs_warn(mp, "AIL initialisation failed: error %d", error);
70962306a36Sopenharmony_ci		goto out_free_log;
71062306a36Sopenharmony_ci	}
71162306a36Sopenharmony_ci	log->l_ailp = mp->m_ail;
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	/*
71462306a36Sopenharmony_ci	 * skip log recovery on a norecovery mount.  pretend it all
71562306a36Sopenharmony_ci	 * just worked.
71662306a36Sopenharmony_ci	 */
71762306a36Sopenharmony_ci	if (!xfs_has_norecovery(mp)) {
71862306a36Sopenharmony_ci		error = xlog_recover(log);
71962306a36Sopenharmony_ci		if (error) {
72062306a36Sopenharmony_ci			xfs_warn(mp, "log mount/recovery failed: error %d",
72162306a36Sopenharmony_ci				error);
72262306a36Sopenharmony_ci			xlog_recover_cancel(log);
72362306a36Sopenharmony_ci			goto out_destroy_ail;
72462306a36Sopenharmony_ci		}
72562306a36Sopenharmony_ci	}
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	error = xfs_sysfs_init(&log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
72862306a36Sopenharmony_ci			       "log");
72962306a36Sopenharmony_ci	if (error)
73062306a36Sopenharmony_ci		goto out_destroy_ail;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	/* Normal transactions can now occur */
73362306a36Sopenharmony_ci	clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	/*
73662306a36Sopenharmony_ci	 * Now the log has been fully initialised and we know were our
73762306a36Sopenharmony_ci	 * space grant counters are, we can initialise the permanent ticket
73862306a36Sopenharmony_ci	 * needed for delayed logging to work.
73962306a36Sopenharmony_ci	 */
74062306a36Sopenharmony_ci	xlog_cil_init_post_recovery(log);
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	return 0;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ciout_destroy_ail:
74562306a36Sopenharmony_ci	xfs_trans_ail_destroy(mp);
74662306a36Sopenharmony_ciout_free_log:
74762306a36Sopenharmony_ci	xlog_dealloc_log(log);
74862306a36Sopenharmony_ciout:
74962306a36Sopenharmony_ci	return error;
75062306a36Sopenharmony_ci}
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci/*
75362306a36Sopenharmony_ci * Finish the recovery of the file system.  This is separate from the
75462306a36Sopenharmony_ci * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read
75562306a36Sopenharmony_ci * in the root and real-time bitmap inodes between calling xfs_log_mount() and
75662306a36Sopenharmony_ci * here.
75762306a36Sopenharmony_ci *
75862306a36Sopenharmony_ci * If we finish recovery successfully, start the background log work. If we are
75962306a36Sopenharmony_ci * not doing recovery, then we have a RO filesystem and we don't need to start
76062306a36Sopenharmony_ci * it.
76162306a36Sopenharmony_ci */
76262306a36Sopenharmony_ciint
76362306a36Sopenharmony_cixfs_log_mount_finish(
76462306a36Sopenharmony_ci	struct xfs_mount	*mp)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
76762306a36Sopenharmony_ci	int			error = 0;
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	if (xfs_has_norecovery(mp)) {
77062306a36Sopenharmony_ci		ASSERT(xfs_is_readonly(mp));
77162306a36Sopenharmony_ci		return 0;
77262306a36Sopenharmony_ci	}
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	/*
77562306a36Sopenharmony_ci	 * During the second phase of log recovery, we need iget and
77662306a36Sopenharmony_ci	 * iput to behave like they do for an active filesystem.
77762306a36Sopenharmony_ci	 * xfs_fs_drop_inode needs to be able to prevent the deletion
77862306a36Sopenharmony_ci	 * of inodes before we're done replaying log items on those
77962306a36Sopenharmony_ci	 * inodes.  Turn it off immediately after recovery finishes
78062306a36Sopenharmony_ci	 * so that we don't leak the quota inodes if subsequent mount
78162306a36Sopenharmony_ci	 * activities fail.
78262306a36Sopenharmony_ci	 *
78362306a36Sopenharmony_ci	 * We let all inodes involved in redo item processing end up on
78462306a36Sopenharmony_ci	 * the LRU instead of being evicted immediately so that if we do
78562306a36Sopenharmony_ci	 * something to an unlinked inode, the irele won't cause
78662306a36Sopenharmony_ci	 * premature truncation and freeing of the inode, which results
78762306a36Sopenharmony_ci	 * in log recovery failure.  We have to evict the unreferenced
78862306a36Sopenharmony_ci	 * lru inodes after clearing SB_ACTIVE because we don't
78962306a36Sopenharmony_ci	 * otherwise clean up the lru if there's a subsequent failure in
79062306a36Sopenharmony_ci	 * xfs_mountfs, which leads to us leaking the inodes if nothing
79162306a36Sopenharmony_ci	 * else (e.g. quotacheck) references the inodes before the
79262306a36Sopenharmony_ci	 * mount failure occurs.
79362306a36Sopenharmony_ci	 */
79462306a36Sopenharmony_ci	mp->m_super->s_flags |= SB_ACTIVE;
79562306a36Sopenharmony_ci	xfs_log_work_queue(mp);
79662306a36Sopenharmony_ci	if (xlog_recovery_needed(log))
79762306a36Sopenharmony_ci		error = xlog_recover_finish(log);
79862306a36Sopenharmony_ci	mp->m_super->s_flags &= ~SB_ACTIVE;
79962306a36Sopenharmony_ci	evict_inodes(mp->m_super);
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	/*
80262306a36Sopenharmony_ci	 * Drain the buffer LRU after log recovery. This is required for v4
80362306a36Sopenharmony_ci	 * filesystems to avoid leaving around buffers with NULL verifier ops,
80462306a36Sopenharmony_ci	 * but we do it unconditionally to make sure we're always in a clean
80562306a36Sopenharmony_ci	 * cache state after mount.
80662306a36Sopenharmony_ci	 *
80762306a36Sopenharmony_ci	 * Don't push in the error case because the AIL may have pending intents
80862306a36Sopenharmony_ci	 * that aren't removed until recovery is cancelled.
80962306a36Sopenharmony_ci	 */
81062306a36Sopenharmony_ci	if (xlog_recovery_needed(log)) {
81162306a36Sopenharmony_ci		if (!error) {
81262306a36Sopenharmony_ci			xfs_log_force(mp, XFS_LOG_SYNC);
81362306a36Sopenharmony_ci			xfs_ail_push_all_sync(mp->m_ail);
81462306a36Sopenharmony_ci		}
81562306a36Sopenharmony_ci		xfs_notice(mp, "Ending recovery (logdev: %s)",
81662306a36Sopenharmony_ci				mp->m_logname ? mp->m_logname : "internal");
81762306a36Sopenharmony_ci	} else {
81862306a36Sopenharmony_ci		xfs_info(mp, "Ending clean mount");
81962306a36Sopenharmony_ci	}
82062306a36Sopenharmony_ci	xfs_buftarg_drain(mp->m_ddev_targp);
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate);
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	/* Make sure the log is dead if we're returning failure. */
82562306a36Sopenharmony_ci	ASSERT(!error || xlog_is_shutdown(log));
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	return error;
82862306a36Sopenharmony_ci}
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci/*
83162306a36Sopenharmony_ci * The mount has failed. Cancel the recovery if it hasn't completed and destroy
83262306a36Sopenharmony_ci * the log.
83362306a36Sopenharmony_ci */
83462306a36Sopenharmony_civoid
83562306a36Sopenharmony_cixfs_log_mount_cancel(
83662306a36Sopenharmony_ci	struct xfs_mount	*mp)
83762306a36Sopenharmony_ci{
83862306a36Sopenharmony_ci	xlog_recover_cancel(mp->m_log);
83962306a36Sopenharmony_ci	xfs_log_unmount(mp);
84062306a36Sopenharmony_ci}
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci/*
84362306a36Sopenharmony_ci * Flush out the iclog to disk ensuring that device caches are flushed and
84462306a36Sopenharmony_ci * the iclog hits stable storage before any completion waiters are woken.
84562306a36Sopenharmony_ci */
84662306a36Sopenharmony_cistatic inline int
84762306a36Sopenharmony_cixlog_force_iclog(
84862306a36Sopenharmony_ci	struct xlog_in_core	*iclog)
84962306a36Sopenharmony_ci{
85062306a36Sopenharmony_ci	atomic_inc(&iclog->ic_refcnt);
85162306a36Sopenharmony_ci	iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
85262306a36Sopenharmony_ci	if (iclog->ic_state == XLOG_STATE_ACTIVE)
85362306a36Sopenharmony_ci		xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
85462306a36Sopenharmony_ci	return xlog_state_release_iclog(iclog->ic_log, iclog, NULL);
85562306a36Sopenharmony_ci}
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci/*
85862306a36Sopenharmony_ci * Cycle all the iclogbuf locks to make sure all log IO completion
85962306a36Sopenharmony_ci * is done before we tear down these buffers.
86062306a36Sopenharmony_ci */
86162306a36Sopenharmony_cistatic void
86262306a36Sopenharmony_cixlog_wait_iclog_completion(struct xlog *log)
86362306a36Sopenharmony_ci{
86462306a36Sopenharmony_ci	int		i;
86562306a36Sopenharmony_ci	struct xlog_in_core	*iclog = log->l_iclog;
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	for (i = 0; i < log->l_iclog_bufs; i++) {
86862306a36Sopenharmony_ci		down(&iclog->ic_sema);
86962306a36Sopenharmony_ci		up(&iclog->ic_sema);
87062306a36Sopenharmony_ci		iclog = iclog->ic_next;
87162306a36Sopenharmony_ci	}
87262306a36Sopenharmony_ci}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci/*
87562306a36Sopenharmony_ci * Wait for the iclog and all prior iclogs to be written disk as required by the
87662306a36Sopenharmony_ci * log force state machine. Waiting on ic_force_wait ensures iclog completions
87762306a36Sopenharmony_ci * have been ordered and callbacks run before we are woken here, hence
87862306a36Sopenharmony_ci * guaranteeing that all the iclogs up to this one are on stable storage.
87962306a36Sopenharmony_ci */
88062306a36Sopenharmony_ciint
88162306a36Sopenharmony_cixlog_wait_on_iclog(
88262306a36Sopenharmony_ci	struct xlog_in_core	*iclog)
88362306a36Sopenharmony_ci		__releases(iclog->ic_log->l_icloglock)
88462306a36Sopenharmony_ci{
88562306a36Sopenharmony_ci	struct xlog		*log = iclog->ic_log;
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	trace_xlog_iclog_wait_on(iclog, _RET_IP_);
88862306a36Sopenharmony_ci	if (!xlog_is_shutdown(log) &&
88962306a36Sopenharmony_ci	    iclog->ic_state != XLOG_STATE_ACTIVE &&
89062306a36Sopenharmony_ci	    iclog->ic_state != XLOG_STATE_DIRTY) {
89162306a36Sopenharmony_ci		XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
89262306a36Sopenharmony_ci		xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
89362306a36Sopenharmony_ci	} else {
89462306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
89562306a36Sopenharmony_ci	}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
89862306a36Sopenharmony_ci		return -EIO;
89962306a36Sopenharmony_ci	return 0;
90062306a36Sopenharmony_ci}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci/*
90362306a36Sopenharmony_ci * Write out an unmount record using the ticket provided. We have to account for
90462306a36Sopenharmony_ci * the data space used in the unmount ticket as this write is not done from a
90562306a36Sopenharmony_ci * transaction context that has already done the accounting for us.
90662306a36Sopenharmony_ci */
90762306a36Sopenharmony_cistatic int
90862306a36Sopenharmony_cixlog_write_unmount_record(
90962306a36Sopenharmony_ci	struct xlog		*log,
91062306a36Sopenharmony_ci	struct xlog_ticket	*ticket)
91162306a36Sopenharmony_ci{
91262306a36Sopenharmony_ci	struct  {
91362306a36Sopenharmony_ci		struct xlog_op_header ophdr;
91462306a36Sopenharmony_ci		struct xfs_unmount_log_format ulf;
91562306a36Sopenharmony_ci	} unmount_rec = {
91662306a36Sopenharmony_ci		.ophdr = {
91762306a36Sopenharmony_ci			.oh_clientid = XFS_LOG,
91862306a36Sopenharmony_ci			.oh_tid = cpu_to_be32(ticket->t_tid),
91962306a36Sopenharmony_ci			.oh_flags = XLOG_UNMOUNT_TRANS,
92062306a36Sopenharmony_ci		},
92162306a36Sopenharmony_ci		.ulf = {
92262306a36Sopenharmony_ci			.magic = XLOG_UNMOUNT_TYPE,
92362306a36Sopenharmony_ci		},
92462306a36Sopenharmony_ci	};
92562306a36Sopenharmony_ci	struct xfs_log_iovec reg = {
92662306a36Sopenharmony_ci		.i_addr = &unmount_rec,
92762306a36Sopenharmony_ci		.i_len = sizeof(unmount_rec),
92862306a36Sopenharmony_ci		.i_type = XLOG_REG_TYPE_UNMOUNT,
92962306a36Sopenharmony_ci	};
93062306a36Sopenharmony_ci	struct xfs_log_vec vec = {
93162306a36Sopenharmony_ci		.lv_niovecs = 1,
93262306a36Sopenharmony_ci		.lv_iovecp = &reg,
93362306a36Sopenharmony_ci	};
93462306a36Sopenharmony_ci	LIST_HEAD(lv_chain);
93562306a36Sopenharmony_ci	list_add(&vec.lv_list, &lv_chain);
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	BUILD_BUG_ON((sizeof(struct xlog_op_header) +
93862306a36Sopenharmony_ci		      sizeof(struct xfs_unmount_log_format)) !=
93962306a36Sopenharmony_ci							sizeof(unmount_rec));
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	/* account for space used by record data */
94262306a36Sopenharmony_ci	ticket->t_curr_res -= sizeof(unmount_rec);
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci	return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len);
94562306a36Sopenharmony_ci}
94662306a36Sopenharmony_ci
94762306a36Sopenharmony_ci/*
94862306a36Sopenharmony_ci * Mark the filesystem clean by writing an unmount record to the head of the
94962306a36Sopenharmony_ci * log.
95062306a36Sopenharmony_ci */
95162306a36Sopenharmony_cistatic void
95262306a36Sopenharmony_cixlog_unmount_write(
95362306a36Sopenharmony_ci	struct xlog		*log)
95462306a36Sopenharmony_ci{
95562306a36Sopenharmony_ci	struct xfs_mount	*mp = log->l_mp;
95662306a36Sopenharmony_ci	struct xlog_in_core	*iclog;
95762306a36Sopenharmony_ci	struct xlog_ticket	*tic = NULL;
95862306a36Sopenharmony_ci	int			error;
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	error = xfs_log_reserve(mp, 600, 1, &tic, 0);
96162306a36Sopenharmony_ci	if (error)
96262306a36Sopenharmony_ci		goto out_err;
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci	error = xlog_write_unmount_record(log, tic);
96562306a36Sopenharmony_ci	/*
96662306a36Sopenharmony_ci	 * At this point, we're umounting anyway, so there's no point in
96762306a36Sopenharmony_ci	 * transitioning log state to shutdown. Just continue...
96862306a36Sopenharmony_ci	 */
96962306a36Sopenharmony_ciout_err:
97062306a36Sopenharmony_ci	if (error)
97162306a36Sopenharmony_ci		xfs_alert(mp, "%s: unmount record failed", __func__);
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
97462306a36Sopenharmony_ci	iclog = log->l_iclog;
97562306a36Sopenharmony_ci	error = xlog_force_iclog(iclog);
97662306a36Sopenharmony_ci	xlog_wait_on_iclog(iclog);
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	if (tic) {
97962306a36Sopenharmony_ci		trace_xfs_log_umount_write(log, tic);
98062306a36Sopenharmony_ci		xfs_log_ticket_ungrant(log, tic);
98162306a36Sopenharmony_ci	}
98262306a36Sopenharmony_ci}
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_cistatic void
98562306a36Sopenharmony_cixfs_log_unmount_verify_iclog(
98662306a36Sopenharmony_ci	struct xlog		*log)
98762306a36Sopenharmony_ci{
98862306a36Sopenharmony_ci	struct xlog_in_core	*iclog = log->l_iclog;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci	do {
99162306a36Sopenharmony_ci		ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
99262306a36Sopenharmony_ci		ASSERT(iclog->ic_offset == 0);
99362306a36Sopenharmony_ci	} while ((iclog = iclog->ic_next) != log->l_iclog);
99462306a36Sopenharmony_ci}
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci/*
99762306a36Sopenharmony_ci * Unmount record used to have a string "Unmount filesystem--" in the
99862306a36Sopenharmony_ci * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
99962306a36Sopenharmony_ci * We just write the magic number now since that particular field isn't
100062306a36Sopenharmony_ci * currently architecture converted and "Unmount" is a bit foo.
100162306a36Sopenharmony_ci * As far as I know, there weren't any dependencies on the old behaviour.
100262306a36Sopenharmony_ci */
100362306a36Sopenharmony_cistatic void
100462306a36Sopenharmony_cixfs_log_unmount_write(
100562306a36Sopenharmony_ci	struct xfs_mount	*mp)
100662306a36Sopenharmony_ci{
100762306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	if (!xfs_log_writable(mp))
101062306a36Sopenharmony_ci		return;
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci	xfs_log_force(mp, XFS_LOG_SYNC);
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
101562306a36Sopenharmony_ci		return;
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	/*
101862306a36Sopenharmony_ci	 * If we think the summary counters are bad, avoid writing the unmount
101962306a36Sopenharmony_ci	 * record to force log recovery at next mount, after which the summary
102062306a36Sopenharmony_ci	 * counters will be recalculated.  Refer to xlog_check_unmount_rec for
102162306a36Sopenharmony_ci	 * more details.
102262306a36Sopenharmony_ci	 */
102362306a36Sopenharmony_ci	if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
102462306a36Sopenharmony_ci			XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
102562306a36Sopenharmony_ci		xfs_alert(mp, "%s: will fix summary counters at next mount",
102662306a36Sopenharmony_ci				__func__);
102762306a36Sopenharmony_ci		return;
102862306a36Sopenharmony_ci	}
102962306a36Sopenharmony_ci
103062306a36Sopenharmony_ci	xfs_log_unmount_verify_iclog(log);
103162306a36Sopenharmony_ci	xlog_unmount_write(log);
103262306a36Sopenharmony_ci}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci/*
103562306a36Sopenharmony_ci * Empty the log for unmount/freeze.
103662306a36Sopenharmony_ci *
103762306a36Sopenharmony_ci * To do this, we first need to shut down the background log work so it is not
103862306a36Sopenharmony_ci * trying to cover the log as we clean up. We then need to unpin all objects in
103962306a36Sopenharmony_ci * the log so we can then flush them out. Once they have completed their IO and
104062306a36Sopenharmony_ci * run the callbacks removing themselves from the AIL, we can cover the log.
104162306a36Sopenharmony_ci */
104262306a36Sopenharmony_ciint
104362306a36Sopenharmony_cixfs_log_quiesce(
104462306a36Sopenharmony_ci	struct xfs_mount	*mp)
104562306a36Sopenharmony_ci{
104662306a36Sopenharmony_ci	/*
104762306a36Sopenharmony_ci	 * Clear log incompat features since we're quiescing the log.  Report
104862306a36Sopenharmony_ci	 * failures, though it's not fatal to have a higher log feature
104962306a36Sopenharmony_ci	 * protection level than the log contents actually require.
105062306a36Sopenharmony_ci	 */
105162306a36Sopenharmony_ci	if (xfs_clear_incompat_log_features(mp)) {
105262306a36Sopenharmony_ci		int error;
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci		error = xfs_sync_sb(mp, false);
105562306a36Sopenharmony_ci		if (error)
105662306a36Sopenharmony_ci			xfs_warn(mp,
105762306a36Sopenharmony_ci	"Failed to clear log incompat features on quiesce");
105862306a36Sopenharmony_ci	}
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	cancel_delayed_work_sync(&mp->m_log->l_work);
106162306a36Sopenharmony_ci	xfs_log_force(mp, XFS_LOG_SYNC);
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	/*
106462306a36Sopenharmony_ci	 * The superblock buffer is uncached and while xfs_ail_push_all_sync()
106562306a36Sopenharmony_ci	 * will push it, xfs_buftarg_wait() will not wait for it. Further,
106662306a36Sopenharmony_ci	 * xfs_buf_iowait() cannot be used because it was pushed with the
106762306a36Sopenharmony_ci	 * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for
106862306a36Sopenharmony_ci	 * the IO to complete.
106962306a36Sopenharmony_ci	 */
107062306a36Sopenharmony_ci	xfs_ail_push_all_sync(mp->m_ail);
107162306a36Sopenharmony_ci	xfs_buftarg_wait(mp->m_ddev_targp);
107262306a36Sopenharmony_ci	xfs_buf_lock(mp->m_sb_bp);
107362306a36Sopenharmony_ci	xfs_buf_unlock(mp->m_sb_bp);
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci	return xfs_log_cover(mp);
107662306a36Sopenharmony_ci}
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_civoid
107962306a36Sopenharmony_cixfs_log_clean(
108062306a36Sopenharmony_ci	struct xfs_mount	*mp)
108162306a36Sopenharmony_ci{
108262306a36Sopenharmony_ci	xfs_log_quiesce(mp);
108362306a36Sopenharmony_ci	xfs_log_unmount_write(mp);
108462306a36Sopenharmony_ci}
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci/*
108762306a36Sopenharmony_ci * Shut down and release the AIL and Log.
108862306a36Sopenharmony_ci *
108962306a36Sopenharmony_ci * During unmount, we need to ensure we flush all the dirty metadata objects
109062306a36Sopenharmony_ci * from the AIL so that the log is empty before we write the unmount record to
109162306a36Sopenharmony_ci * the log. Once this is done, we can tear down the AIL and the log.
109262306a36Sopenharmony_ci */
109362306a36Sopenharmony_civoid
109462306a36Sopenharmony_cixfs_log_unmount(
109562306a36Sopenharmony_ci	struct xfs_mount	*mp)
109662306a36Sopenharmony_ci{
109762306a36Sopenharmony_ci	xfs_log_clean(mp);
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	/*
110062306a36Sopenharmony_ci	 * If shutdown has come from iclog IO context, the log
110162306a36Sopenharmony_ci	 * cleaning will have been skipped and so we need to wait
110262306a36Sopenharmony_ci	 * for the iclog to complete shutdown processing before we
110362306a36Sopenharmony_ci	 * tear anything down.
110462306a36Sopenharmony_ci	 */
110562306a36Sopenharmony_ci	xlog_wait_iclog_completion(mp->m_log);
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ci	xfs_buftarg_drain(mp->m_ddev_targp);
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	xfs_trans_ail_destroy(mp);
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	xfs_sysfs_del(&mp->m_log->l_kobj);
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	xlog_dealloc_log(mp->m_log);
111462306a36Sopenharmony_ci}
111562306a36Sopenharmony_ci
111662306a36Sopenharmony_civoid
111762306a36Sopenharmony_cixfs_log_item_init(
111862306a36Sopenharmony_ci	struct xfs_mount	*mp,
111962306a36Sopenharmony_ci	struct xfs_log_item	*item,
112062306a36Sopenharmony_ci	int			type,
112162306a36Sopenharmony_ci	const struct xfs_item_ops *ops)
112262306a36Sopenharmony_ci{
112362306a36Sopenharmony_ci	item->li_log = mp->m_log;
112462306a36Sopenharmony_ci	item->li_ailp = mp->m_ail;
112562306a36Sopenharmony_ci	item->li_type = type;
112662306a36Sopenharmony_ci	item->li_ops = ops;
112762306a36Sopenharmony_ci	item->li_lv = NULL;
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_ci	INIT_LIST_HEAD(&item->li_ail);
113062306a36Sopenharmony_ci	INIT_LIST_HEAD(&item->li_cil);
113162306a36Sopenharmony_ci	INIT_LIST_HEAD(&item->li_bio_list);
113262306a36Sopenharmony_ci	INIT_LIST_HEAD(&item->li_trans);
113362306a36Sopenharmony_ci}
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_ci/*
113662306a36Sopenharmony_ci * Wake up processes waiting for log space after we have moved the log tail.
113762306a36Sopenharmony_ci */
113862306a36Sopenharmony_civoid
113962306a36Sopenharmony_cixfs_log_space_wake(
114062306a36Sopenharmony_ci	struct xfs_mount	*mp)
114162306a36Sopenharmony_ci{
114262306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
114362306a36Sopenharmony_ci	int			free_bytes;
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
114662306a36Sopenharmony_ci		return;
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ci	if (!list_empty_careful(&log->l_write_head.waiters)) {
114962306a36Sopenharmony_ci		ASSERT(!xlog_in_recovery(log));
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci		spin_lock(&log->l_write_head.lock);
115262306a36Sopenharmony_ci		free_bytes = xlog_space_left(log, &log->l_write_head.grant);
115362306a36Sopenharmony_ci		xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
115462306a36Sopenharmony_ci		spin_unlock(&log->l_write_head.lock);
115562306a36Sopenharmony_ci	}
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci	if (!list_empty_careful(&log->l_reserve_head.waiters)) {
115862306a36Sopenharmony_ci		ASSERT(!xlog_in_recovery(log));
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci		spin_lock(&log->l_reserve_head.lock);
116162306a36Sopenharmony_ci		free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
116262306a36Sopenharmony_ci		xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
116362306a36Sopenharmony_ci		spin_unlock(&log->l_reserve_head.lock);
116462306a36Sopenharmony_ci	}
116562306a36Sopenharmony_ci}
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci/*
116862306a36Sopenharmony_ci * Determine if we have a transaction that has gone to disk that needs to be
116962306a36Sopenharmony_ci * covered. To begin the transition to the idle state firstly the log needs to
117062306a36Sopenharmony_ci * be idle. That means the CIL, the AIL and the iclogs needs to be empty before
117162306a36Sopenharmony_ci * we start attempting to cover the log.
117262306a36Sopenharmony_ci *
117362306a36Sopenharmony_ci * Only if we are then in a state where covering is needed, the caller is
117462306a36Sopenharmony_ci * informed that dummy transactions are required to move the log into the idle
117562306a36Sopenharmony_ci * state.
117662306a36Sopenharmony_ci *
117762306a36Sopenharmony_ci * If there are any items in the AIl or CIL, then we do not want to attempt to
117862306a36Sopenharmony_ci * cover the log as we may be in a situation where there isn't log space
117962306a36Sopenharmony_ci * available to run a dummy transaction and this can lead to deadlocks when the
118062306a36Sopenharmony_ci * tail of the log is pinned by an item that is modified in the CIL.  Hence
118162306a36Sopenharmony_ci * there's no point in running a dummy transaction at this point because we
118262306a36Sopenharmony_ci * can't start trying to idle the log until both the CIL and AIL are empty.
118362306a36Sopenharmony_ci */
118462306a36Sopenharmony_cistatic bool
118562306a36Sopenharmony_cixfs_log_need_covered(
118662306a36Sopenharmony_ci	struct xfs_mount	*mp)
118762306a36Sopenharmony_ci{
118862306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
118962306a36Sopenharmony_ci	bool			needed = false;
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci	if (!xlog_cil_empty(log))
119262306a36Sopenharmony_ci		return false;
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
119562306a36Sopenharmony_ci	switch (log->l_covered_state) {
119662306a36Sopenharmony_ci	case XLOG_STATE_COVER_DONE:
119762306a36Sopenharmony_ci	case XLOG_STATE_COVER_DONE2:
119862306a36Sopenharmony_ci	case XLOG_STATE_COVER_IDLE:
119962306a36Sopenharmony_ci		break;
120062306a36Sopenharmony_ci	case XLOG_STATE_COVER_NEED:
120162306a36Sopenharmony_ci	case XLOG_STATE_COVER_NEED2:
120262306a36Sopenharmony_ci		if (xfs_ail_min_lsn(log->l_ailp))
120362306a36Sopenharmony_ci			break;
120462306a36Sopenharmony_ci		if (!xlog_iclogs_empty(log))
120562306a36Sopenharmony_ci			break;
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci		needed = true;
120862306a36Sopenharmony_ci		if (log->l_covered_state == XLOG_STATE_COVER_NEED)
120962306a36Sopenharmony_ci			log->l_covered_state = XLOG_STATE_COVER_DONE;
121062306a36Sopenharmony_ci		else
121162306a36Sopenharmony_ci			log->l_covered_state = XLOG_STATE_COVER_DONE2;
121262306a36Sopenharmony_ci		break;
121362306a36Sopenharmony_ci	default:
121462306a36Sopenharmony_ci		needed = true;
121562306a36Sopenharmony_ci		break;
121662306a36Sopenharmony_ci	}
121762306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
121862306a36Sopenharmony_ci	return needed;
121962306a36Sopenharmony_ci}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci/*
122262306a36Sopenharmony_ci * Explicitly cover the log. This is similar to background log covering but
122362306a36Sopenharmony_ci * intended for usage in quiesce codepaths. The caller is responsible to ensure
122462306a36Sopenharmony_ci * the log is idle and suitable for covering. The CIL, iclog buffers and AIL
122562306a36Sopenharmony_ci * must all be empty.
122662306a36Sopenharmony_ci */
122762306a36Sopenharmony_cistatic int
122862306a36Sopenharmony_cixfs_log_cover(
122962306a36Sopenharmony_ci	struct xfs_mount	*mp)
123062306a36Sopenharmony_ci{
123162306a36Sopenharmony_ci	int			error = 0;
123262306a36Sopenharmony_ci	bool			need_covered;
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci	ASSERT((xlog_cil_empty(mp->m_log) && xlog_iclogs_empty(mp->m_log) &&
123562306a36Sopenharmony_ci	        !xfs_ail_min_lsn(mp->m_log->l_ailp)) ||
123662306a36Sopenharmony_ci		xlog_is_shutdown(mp->m_log));
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci	if (!xfs_log_writable(mp))
123962306a36Sopenharmony_ci		return 0;
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_ci	/*
124262306a36Sopenharmony_ci	 * xfs_log_need_covered() is not idempotent because it progresses the
124362306a36Sopenharmony_ci	 * state machine if the log requires covering. Therefore, we must call
124462306a36Sopenharmony_ci	 * this function once and use the result until we've issued an sb sync.
124562306a36Sopenharmony_ci	 * Do so first to make that abundantly clear.
124662306a36Sopenharmony_ci	 *
124762306a36Sopenharmony_ci	 * Fall into the covering sequence if the log needs covering or the
124862306a36Sopenharmony_ci	 * mount has lazy superblock accounting to sync to disk. The sb sync
124962306a36Sopenharmony_ci	 * used for covering accumulates the in-core counters, so covering
125062306a36Sopenharmony_ci	 * handles this for us.
125162306a36Sopenharmony_ci	 */
125262306a36Sopenharmony_ci	need_covered = xfs_log_need_covered(mp);
125362306a36Sopenharmony_ci	if (!need_covered && !xfs_has_lazysbcount(mp))
125462306a36Sopenharmony_ci		return 0;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	/*
125762306a36Sopenharmony_ci	 * To cover the log, commit the superblock twice (at most) in
125862306a36Sopenharmony_ci	 * independent checkpoints. The first serves as a reference for the
125962306a36Sopenharmony_ci	 * tail pointer. The sync transaction and AIL push empties the AIL and
126062306a36Sopenharmony_ci	 * updates the in-core tail to the LSN of the first checkpoint. The
126162306a36Sopenharmony_ci	 * second commit updates the on-disk tail with the in-core LSN,
126262306a36Sopenharmony_ci	 * covering the log. Push the AIL one more time to leave it empty, as
126362306a36Sopenharmony_ci	 * we found it.
126462306a36Sopenharmony_ci	 */
126562306a36Sopenharmony_ci	do {
126662306a36Sopenharmony_ci		error = xfs_sync_sb(mp, true);
126762306a36Sopenharmony_ci		if (error)
126862306a36Sopenharmony_ci			break;
126962306a36Sopenharmony_ci		xfs_ail_push_all_sync(mp->m_ail);
127062306a36Sopenharmony_ci	} while (xfs_log_need_covered(mp));
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	return error;
127362306a36Sopenharmony_ci}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci/*
127662306a36Sopenharmony_ci * We may be holding the log iclog lock upon entering this routine.
127762306a36Sopenharmony_ci */
127862306a36Sopenharmony_cixfs_lsn_t
127962306a36Sopenharmony_cixlog_assign_tail_lsn_locked(
128062306a36Sopenharmony_ci	struct xfs_mount	*mp)
128162306a36Sopenharmony_ci{
128262306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
128362306a36Sopenharmony_ci	struct xfs_log_item	*lip;
128462306a36Sopenharmony_ci	xfs_lsn_t		tail_lsn;
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci	assert_spin_locked(&mp->m_ail->ail_lock);
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci	/*
128962306a36Sopenharmony_ci	 * To make sure we always have a valid LSN for the log tail we keep
129062306a36Sopenharmony_ci	 * track of the last LSN which was committed in log->l_last_sync_lsn,
129162306a36Sopenharmony_ci	 * and use that when the AIL was empty.
129262306a36Sopenharmony_ci	 */
129362306a36Sopenharmony_ci	lip = xfs_ail_min(mp->m_ail);
129462306a36Sopenharmony_ci	if (lip)
129562306a36Sopenharmony_ci		tail_lsn = lip->li_lsn;
129662306a36Sopenharmony_ci	else
129762306a36Sopenharmony_ci		tail_lsn = atomic64_read(&log->l_last_sync_lsn);
129862306a36Sopenharmony_ci	trace_xfs_log_assign_tail_lsn(log, tail_lsn);
129962306a36Sopenharmony_ci	atomic64_set(&log->l_tail_lsn, tail_lsn);
130062306a36Sopenharmony_ci	return tail_lsn;
130162306a36Sopenharmony_ci}
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_cixfs_lsn_t
130462306a36Sopenharmony_cixlog_assign_tail_lsn(
130562306a36Sopenharmony_ci	struct xfs_mount	*mp)
130662306a36Sopenharmony_ci{
130762306a36Sopenharmony_ci	xfs_lsn_t		tail_lsn;
130862306a36Sopenharmony_ci
130962306a36Sopenharmony_ci	spin_lock(&mp->m_ail->ail_lock);
131062306a36Sopenharmony_ci	tail_lsn = xlog_assign_tail_lsn_locked(mp);
131162306a36Sopenharmony_ci	spin_unlock(&mp->m_ail->ail_lock);
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	return tail_lsn;
131462306a36Sopenharmony_ci}
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci/*
131762306a36Sopenharmony_ci * Return the space in the log between the tail and the head.  The head
131862306a36Sopenharmony_ci * is passed in the cycle/bytes formal parms.  In the special case where
131962306a36Sopenharmony_ci * the reserve head has wrapped passed the tail, this calculation is no
132062306a36Sopenharmony_ci * longer valid.  In this case, just return 0 which means there is no space
132162306a36Sopenharmony_ci * in the log.  This works for all places where this function is called
132262306a36Sopenharmony_ci * with the reserve head.  Of course, if the write head were to ever
132362306a36Sopenharmony_ci * wrap the tail, we should blow up.  Rather than catch this case here,
132462306a36Sopenharmony_ci * we depend on other ASSERTions in other parts of the code.   XXXmiken
132562306a36Sopenharmony_ci *
132662306a36Sopenharmony_ci * If reservation head is behind the tail, we have a problem. Warn about it,
132762306a36Sopenharmony_ci * but then treat it as if the log is empty.
132862306a36Sopenharmony_ci *
132962306a36Sopenharmony_ci * If the log is shut down, the head and tail may be invalid or out of whack, so
133062306a36Sopenharmony_ci * shortcut invalidity asserts in this case so that we don't trigger them
133162306a36Sopenharmony_ci * falsely.
133262306a36Sopenharmony_ci */
133362306a36Sopenharmony_ciSTATIC int
133462306a36Sopenharmony_cixlog_space_left(
133562306a36Sopenharmony_ci	struct xlog	*log,
133662306a36Sopenharmony_ci	atomic64_t	*head)
133762306a36Sopenharmony_ci{
133862306a36Sopenharmony_ci	int		tail_bytes;
133962306a36Sopenharmony_ci	int		tail_cycle;
134062306a36Sopenharmony_ci	int		head_cycle;
134162306a36Sopenharmony_ci	int		head_bytes;
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_ci	xlog_crack_grant_head(head, &head_cycle, &head_bytes);
134462306a36Sopenharmony_ci	xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
134562306a36Sopenharmony_ci	tail_bytes = BBTOB(tail_bytes);
134662306a36Sopenharmony_ci	if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
134762306a36Sopenharmony_ci		return log->l_logsize - (head_bytes - tail_bytes);
134862306a36Sopenharmony_ci	if (tail_cycle + 1 < head_cycle)
134962306a36Sopenharmony_ci		return 0;
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	/* Ignore potential inconsistency when shutdown. */
135262306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
135362306a36Sopenharmony_ci		return log->l_logsize;
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ci	if (tail_cycle < head_cycle) {
135662306a36Sopenharmony_ci		ASSERT(tail_cycle == (head_cycle - 1));
135762306a36Sopenharmony_ci		return tail_bytes - head_bytes;
135862306a36Sopenharmony_ci	}
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci	/*
136162306a36Sopenharmony_ci	 * The reservation head is behind the tail. In this case we just want to
136262306a36Sopenharmony_ci	 * return the size of the log as the amount of space left.
136362306a36Sopenharmony_ci	 */
136462306a36Sopenharmony_ci	xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
136562306a36Sopenharmony_ci	xfs_alert(log->l_mp, "  tail_cycle = %d, tail_bytes = %d",
136662306a36Sopenharmony_ci		  tail_cycle, tail_bytes);
136762306a36Sopenharmony_ci	xfs_alert(log->l_mp, "  GH   cycle = %d, GH   bytes = %d",
136862306a36Sopenharmony_ci		  head_cycle, head_bytes);
136962306a36Sopenharmony_ci	ASSERT(0);
137062306a36Sopenharmony_ci	return log->l_logsize;
137162306a36Sopenharmony_ci}
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_cistatic void
137562306a36Sopenharmony_cixlog_ioend_work(
137662306a36Sopenharmony_ci	struct work_struct	*work)
137762306a36Sopenharmony_ci{
137862306a36Sopenharmony_ci	struct xlog_in_core     *iclog =
137962306a36Sopenharmony_ci		container_of(work, struct xlog_in_core, ic_end_io_work);
138062306a36Sopenharmony_ci	struct xlog		*log = iclog->ic_log;
138162306a36Sopenharmony_ci	int			error;
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci	error = blk_status_to_errno(iclog->ic_bio.bi_status);
138462306a36Sopenharmony_ci#ifdef DEBUG
138562306a36Sopenharmony_ci	/* treat writes with injected CRC errors as failed */
138662306a36Sopenharmony_ci	if (iclog->ic_fail_crc)
138762306a36Sopenharmony_ci		error = -EIO;
138862306a36Sopenharmony_ci#endif
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	/*
139162306a36Sopenharmony_ci	 * Race to shutdown the filesystem if we see an error.
139262306a36Sopenharmony_ci	 */
139362306a36Sopenharmony_ci	if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
139462306a36Sopenharmony_ci		xfs_alert(log->l_mp, "log I/O error %d", error);
139562306a36Sopenharmony_ci		xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
139662306a36Sopenharmony_ci	}
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci	xlog_state_done_syncing(iclog);
139962306a36Sopenharmony_ci	bio_uninit(&iclog->ic_bio);
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	/*
140262306a36Sopenharmony_ci	 * Drop the lock to signal that we are done. Nothing references the
140362306a36Sopenharmony_ci	 * iclog after this, so an unmount waiting on this lock can now tear it
140462306a36Sopenharmony_ci	 * down safely. As such, it is unsafe to reference the iclog after the
140562306a36Sopenharmony_ci	 * unlock as we could race with it being freed.
140662306a36Sopenharmony_ci	 */
140762306a36Sopenharmony_ci	up(&iclog->ic_sema);
140862306a36Sopenharmony_ci}
140962306a36Sopenharmony_ci
141062306a36Sopenharmony_ci/*
141162306a36Sopenharmony_ci * Return size of each in-core log record buffer.
141262306a36Sopenharmony_ci *
141362306a36Sopenharmony_ci * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
141462306a36Sopenharmony_ci *
141562306a36Sopenharmony_ci * If the filesystem blocksize is too large, we may need to choose a
141662306a36Sopenharmony_ci * larger size since the directory code currently logs entire blocks.
141762306a36Sopenharmony_ci */
141862306a36Sopenharmony_ciSTATIC void
141962306a36Sopenharmony_cixlog_get_iclog_buffer_size(
142062306a36Sopenharmony_ci	struct xfs_mount	*mp,
142162306a36Sopenharmony_ci	struct xlog		*log)
142262306a36Sopenharmony_ci{
142362306a36Sopenharmony_ci	if (mp->m_logbufs <= 0)
142462306a36Sopenharmony_ci		mp->m_logbufs = XLOG_MAX_ICLOGS;
142562306a36Sopenharmony_ci	if (mp->m_logbsize <= 0)
142662306a36Sopenharmony_ci		mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
142762306a36Sopenharmony_ci
142862306a36Sopenharmony_ci	log->l_iclog_bufs = mp->m_logbufs;
142962306a36Sopenharmony_ci	log->l_iclog_size = mp->m_logbsize;
143062306a36Sopenharmony_ci
143162306a36Sopenharmony_ci	/*
143262306a36Sopenharmony_ci	 * # headers = size / 32k - one header holds cycles from 32k of data.
143362306a36Sopenharmony_ci	 */
143462306a36Sopenharmony_ci	log->l_iclog_heads =
143562306a36Sopenharmony_ci		DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
143662306a36Sopenharmony_ci	log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
143762306a36Sopenharmony_ci}
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_civoid
144062306a36Sopenharmony_cixfs_log_work_queue(
144162306a36Sopenharmony_ci	struct xfs_mount        *mp)
144262306a36Sopenharmony_ci{
144362306a36Sopenharmony_ci	queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
144462306a36Sopenharmony_ci				msecs_to_jiffies(xfs_syncd_centisecs * 10));
144562306a36Sopenharmony_ci}
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci/*
144862306a36Sopenharmony_ci * Clear the log incompat flags if we have the opportunity.
144962306a36Sopenharmony_ci *
145062306a36Sopenharmony_ci * This only happens if we're about to log the second dummy transaction as part
145162306a36Sopenharmony_ci * of covering the log and we can get the log incompat feature usage lock.
145262306a36Sopenharmony_ci */
145362306a36Sopenharmony_cistatic inline void
145462306a36Sopenharmony_cixlog_clear_incompat(
145562306a36Sopenharmony_ci	struct xlog		*log)
145662306a36Sopenharmony_ci{
145762306a36Sopenharmony_ci	struct xfs_mount	*mp = log->l_mp;
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	if (!xfs_sb_has_incompat_log_feature(&mp->m_sb,
146062306a36Sopenharmony_ci				XFS_SB_FEAT_INCOMPAT_LOG_ALL))
146162306a36Sopenharmony_ci		return;
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	if (log->l_covered_state != XLOG_STATE_COVER_DONE2)
146462306a36Sopenharmony_ci		return;
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_ci	if (!down_write_trylock(&log->l_incompat_users))
146762306a36Sopenharmony_ci		return;
146862306a36Sopenharmony_ci
146962306a36Sopenharmony_ci	xfs_clear_incompat_log_features(mp);
147062306a36Sopenharmony_ci	up_write(&log->l_incompat_users);
147162306a36Sopenharmony_ci}
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci/*
147462306a36Sopenharmony_ci * Every sync period we need to unpin all items in the AIL and push them to
147562306a36Sopenharmony_ci * disk. If there is nothing dirty, then we might need to cover the log to
147662306a36Sopenharmony_ci * indicate that the filesystem is idle.
147762306a36Sopenharmony_ci */
147862306a36Sopenharmony_cistatic void
147962306a36Sopenharmony_cixfs_log_worker(
148062306a36Sopenharmony_ci	struct work_struct	*work)
148162306a36Sopenharmony_ci{
148262306a36Sopenharmony_ci	struct xlog		*log = container_of(to_delayed_work(work),
148362306a36Sopenharmony_ci						struct xlog, l_work);
148462306a36Sopenharmony_ci	struct xfs_mount	*mp = log->l_mp;
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_ci	/* dgc: errors ignored - not fatal and nowhere to report them */
148762306a36Sopenharmony_ci	if (xfs_fs_writable(mp, SB_FREEZE_WRITE) && xfs_log_need_covered(mp)) {
148862306a36Sopenharmony_ci		/*
148962306a36Sopenharmony_ci		 * Dump a transaction into the log that contains no real change.
149062306a36Sopenharmony_ci		 * This is needed to stamp the current tail LSN into the log
149162306a36Sopenharmony_ci		 * during the covering operation.
149262306a36Sopenharmony_ci		 *
149362306a36Sopenharmony_ci		 * We cannot use an inode here for this - that will push dirty
149462306a36Sopenharmony_ci		 * state back up into the VFS and then periodic inode flushing
149562306a36Sopenharmony_ci		 * will prevent log covering from making progress. Hence we
149662306a36Sopenharmony_ci		 * synchronously log the superblock instead to ensure the
149762306a36Sopenharmony_ci		 * superblock is immediately unpinned and can be written back.
149862306a36Sopenharmony_ci		 */
149962306a36Sopenharmony_ci		xlog_clear_incompat(log);
150062306a36Sopenharmony_ci		xfs_sync_sb(mp, true);
150162306a36Sopenharmony_ci	} else
150262306a36Sopenharmony_ci		xfs_log_force(mp, 0);
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_ci	/* start pushing all the metadata that is currently dirty */
150562306a36Sopenharmony_ci	xfs_ail_push_all(mp->m_ail);
150662306a36Sopenharmony_ci
150762306a36Sopenharmony_ci	/* queue us up again */
150862306a36Sopenharmony_ci	xfs_log_work_queue(mp);
150962306a36Sopenharmony_ci}
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_ci/*
151262306a36Sopenharmony_ci * This routine initializes some of the log structure for a given mount point.
151362306a36Sopenharmony_ci * Its primary purpose is to fill in enough, so recovery can occur.  However,
151462306a36Sopenharmony_ci * some other stuff may be filled in too.
151562306a36Sopenharmony_ci */
151662306a36Sopenharmony_ciSTATIC struct xlog *
151762306a36Sopenharmony_cixlog_alloc_log(
151862306a36Sopenharmony_ci	struct xfs_mount	*mp,
151962306a36Sopenharmony_ci	struct xfs_buftarg	*log_target,
152062306a36Sopenharmony_ci	xfs_daddr_t		blk_offset,
152162306a36Sopenharmony_ci	int			num_bblks)
152262306a36Sopenharmony_ci{
152362306a36Sopenharmony_ci	struct xlog		*log;
152462306a36Sopenharmony_ci	xlog_rec_header_t	*head;
152562306a36Sopenharmony_ci	xlog_in_core_t		**iclogp;
152662306a36Sopenharmony_ci	xlog_in_core_t		*iclog, *prev_iclog=NULL;
152762306a36Sopenharmony_ci	int			i;
152862306a36Sopenharmony_ci	int			error = -ENOMEM;
152962306a36Sopenharmony_ci	uint			log2_size = 0;
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_ci	log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
153262306a36Sopenharmony_ci	if (!log) {
153362306a36Sopenharmony_ci		xfs_warn(mp, "Log allocation failed: No memory!");
153462306a36Sopenharmony_ci		goto out;
153562306a36Sopenharmony_ci	}
153662306a36Sopenharmony_ci
153762306a36Sopenharmony_ci	log->l_mp	   = mp;
153862306a36Sopenharmony_ci	log->l_targ	   = log_target;
153962306a36Sopenharmony_ci	log->l_logsize     = BBTOB(num_bblks);
154062306a36Sopenharmony_ci	log->l_logBBstart  = blk_offset;
154162306a36Sopenharmony_ci	log->l_logBBsize   = num_bblks;
154262306a36Sopenharmony_ci	log->l_covered_state = XLOG_STATE_COVER_IDLE;
154362306a36Sopenharmony_ci	set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
154462306a36Sopenharmony_ci	INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_ci	log->l_prev_block  = -1;
154762306a36Sopenharmony_ci	/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
154862306a36Sopenharmony_ci	xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
154962306a36Sopenharmony_ci	xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
155062306a36Sopenharmony_ci	log->l_curr_cycle  = 1;	    /* 0 is bad since this is initial value */
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci	if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1)
155362306a36Sopenharmony_ci		log->l_iclog_roundoff = mp->m_sb.sb_logsunit;
155462306a36Sopenharmony_ci	else
155562306a36Sopenharmony_ci		log->l_iclog_roundoff = BBSIZE;
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ci	xlog_grant_head_init(&log->l_reserve_head);
155862306a36Sopenharmony_ci	xlog_grant_head_init(&log->l_write_head);
155962306a36Sopenharmony_ci
156062306a36Sopenharmony_ci	error = -EFSCORRUPTED;
156162306a36Sopenharmony_ci	if (xfs_has_sector(mp)) {
156262306a36Sopenharmony_ci	        log2_size = mp->m_sb.sb_logsectlog;
156362306a36Sopenharmony_ci		if (log2_size < BBSHIFT) {
156462306a36Sopenharmony_ci			xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
156562306a36Sopenharmony_ci				log2_size, BBSHIFT);
156662306a36Sopenharmony_ci			goto out_free_log;
156762306a36Sopenharmony_ci		}
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_ci	        log2_size -= BBSHIFT;
157062306a36Sopenharmony_ci		if (log2_size > mp->m_sectbb_log) {
157162306a36Sopenharmony_ci			xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)",
157262306a36Sopenharmony_ci				log2_size, mp->m_sectbb_log);
157362306a36Sopenharmony_ci			goto out_free_log;
157462306a36Sopenharmony_ci		}
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci		/* for larger sector sizes, must have v2 or external log */
157762306a36Sopenharmony_ci		if (log2_size && log->l_logBBstart > 0 &&
157862306a36Sopenharmony_ci			    !xfs_has_logv2(mp)) {
157962306a36Sopenharmony_ci			xfs_warn(mp,
158062306a36Sopenharmony_ci		"log sector size (0x%x) invalid for configuration.",
158162306a36Sopenharmony_ci				log2_size);
158262306a36Sopenharmony_ci			goto out_free_log;
158362306a36Sopenharmony_ci		}
158462306a36Sopenharmony_ci	}
158562306a36Sopenharmony_ci	log->l_sectBBsize = 1 << log2_size;
158662306a36Sopenharmony_ci
158762306a36Sopenharmony_ci	init_rwsem(&log->l_incompat_users);
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci	xlog_get_iclog_buffer_size(mp, log);
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci	spin_lock_init(&log->l_icloglock);
159262306a36Sopenharmony_ci	init_waitqueue_head(&log->l_flush_wait);
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci	iclogp = &log->l_iclog;
159562306a36Sopenharmony_ci	/*
159662306a36Sopenharmony_ci	 * The amount of memory to allocate for the iclog structure is
159762306a36Sopenharmony_ci	 * rather funky due to the way the structure is defined.  It is
159862306a36Sopenharmony_ci	 * done this way so that we can use different sizes for machines
159962306a36Sopenharmony_ci	 * with different amounts of memory.  See the definition of
160062306a36Sopenharmony_ci	 * xlog_in_core_t in xfs_log_priv.h for details.
160162306a36Sopenharmony_ci	 */
160262306a36Sopenharmony_ci	ASSERT(log->l_iclog_size >= 4096);
160362306a36Sopenharmony_ci	for (i = 0; i < log->l_iclog_bufs; i++) {
160462306a36Sopenharmony_ci		size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
160562306a36Sopenharmony_ci				sizeof(struct bio_vec);
160662306a36Sopenharmony_ci
160762306a36Sopenharmony_ci		iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
160862306a36Sopenharmony_ci		if (!iclog)
160962306a36Sopenharmony_ci			goto out_free_iclog;
161062306a36Sopenharmony_ci
161162306a36Sopenharmony_ci		*iclogp = iclog;
161262306a36Sopenharmony_ci		iclog->ic_prev = prev_iclog;
161362306a36Sopenharmony_ci		prev_iclog = iclog;
161462306a36Sopenharmony_ci
161562306a36Sopenharmony_ci		iclog->ic_data = kvzalloc(log->l_iclog_size,
161662306a36Sopenharmony_ci				GFP_KERNEL | __GFP_RETRY_MAYFAIL);
161762306a36Sopenharmony_ci		if (!iclog->ic_data)
161862306a36Sopenharmony_ci			goto out_free_iclog;
161962306a36Sopenharmony_ci		head = &iclog->ic_header;
162062306a36Sopenharmony_ci		memset(head, 0, sizeof(xlog_rec_header_t));
162162306a36Sopenharmony_ci		head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
162262306a36Sopenharmony_ci		head->h_version = cpu_to_be32(
162362306a36Sopenharmony_ci			xfs_has_logv2(log->l_mp) ? 2 : 1);
162462306a36Sopenharmony_ci		head->h_size = cpu_to_be32(log->l_iclog_size);
162562306a36Sopenharmony_ci		/* new fields */
162662306a36Sopenharmony_ci		head->h_fmt = cpu_to_be32(XLOG_FMT);
162762306a36Sopenharmony_ci		memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_ci		iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
163062306a36Sopenharmony_ci		iclog->ic_state = XLOG_STATE_ACTIVE;
163162306a36Sopenharmony_ci		iclog->ic_log = log;
163262306a36Sopenharmony_ci		atomic_set(&iclog->ic_refcnt, 0);
163362306a36Sopenharmony_ci		INIT_LIST_HEAD(&iclog->ic_callbacks);
163462306a36Sopenharmony_ci		iclog->ic_datap = (void *)iclog->ic_data + log->l_iclog_hsize;
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci		init_waitqueue_head(&iclog->ic_force_wait);
163762306a36Sopenharmony_ci		init_waitqueue_head(&iclog->ic_write_wait);
163862306a36Sopenharmony_ci		INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
163962306a36Sopenharmony_ci		sema_init(&iclog->ic_sema, 1);
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci		iclogp = &iclog->ic_next;
164262306a36Sopenharmony_ci	}
164362306a36Sopenharmony_ci	*iclogp = log->l_iclog;			/* complete ring */
164462306a36Sopenharmony_ci	log->l_iclog->ic_prev = prev_iclog;	/* re-write 1st prev ptr */
164562306a36Sopenharmony_ci
164662306a36Sopenharmony_ci	log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
164762306a36Sopenharmony_ci			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM |
164862306a36Sopenharmony_ci				    WQ_HIGHPRI),
164962306a36Sopenharmony_ci			0, mp->m_super->s_id);
165062306a36Sopenharmony_ci	if (!log->l_ioend_workqueue)
165162306a36Sopenharmony_ci		goto out_free_iclog;
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	error = xlog_cil_init(log);
165462306a36Sopenharmony_ci	if (error)
165562306a36Sopenharmony_ci		goto out_destroy_workqueue;
165662306a36Sopenharmony_ci	return log;
165762306a36Sopenharmony_ci
165862306a36Sopenharmony_ciout_destroy_workqueue:
165962306a36Sopenharmony_ci	destroy_workqueue(log->l_ioend_workqueue);
166062306a36Sopenharmony_ciout_free_iclog:
166162306a36Sopenharmony_ci	for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
166262306a36Sopenharmony_ci		prev_iclog = iclog->ic_next;
166362306a36Sopenharmony_ci		kmem_free(iclog->ic_data);
166462306a36Sopenharmony_ci		kmem_free(iclog);
166562306a36Sopenharmony_ci		if (prev_iclog == log->l_iclog)
166662306a36Sopenharmony_ci			break;
166762306a36Sopenharmony_ci	}
166862306a36Sopenharmony_ciout_free_log:
166962306a36Sopenharmony_ci	kmem_free(log);
167062306a36Sopenharmony_ciout:
167162306a36Sopenharmony_ci	return ERR_PTR(error);
167262306a36Sopenharmony_ci}	/* xlog_alloc_log */
167362306a36Sopenharmony_ci
167462306a36Sopenharmony_ci/*
167562306a36Sopenharmony_ci * Compute the LSN that we'd need to push the log tail towards in order to have
167662306a36Sopenharmony_ci * (a) enough on-disk log space to log the number of bytes specified, (b) at
167762306a36Sopenharmony_ci * least 25% of the log space free, and (c) at least 256 blocks free.  If the
167862306a36Sopenharmony_ci * log free space already meets all three thresholds, this function returns
167962306a36Sopenharmony_ci * NULLCOMMITLSN.
168062306a36Sopenharmony_ci */
168162306a36Sopenharmony_cixfs_lsn_t
168262306a36Sopenharmony_cixlog_grant_push_threshold(
168362306a36Sopenharmony_ci	struct xlog	*log,
168462306a36Sopenharmony_ci	int		need_bytes)
168562306a36Sopenharmony_ci{
168662306a36Sopenharmony_ci	xfs_lsn_t	threshold_lsn = 0;
168762306a36Sopenharmony_ci	xfs_lsn_t	last_sync_lsn;
168862306a36Sopenharmony_ci	int		free_blocks;
168962306a36Sopenharmony_ci	int		free_bytes;
169062306a36Sopenharmony_ci	int		threshold_block;
169162306a36Sopenharmony_ci	int		threshold_cycle;
169262306a36Sopenharmony_ci	int		free_threshold;
169362306a36Sopenharmony_ci
169462306a36Sopenharmony_ci	ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
169562306a36Sopenharmony_ci
169662306a36Sopenharmony_ci	free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
169762306a36Sopenharmony_ci	free_blocks = BTOBBT(free_bytes);
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci	/*
170062306a36Sopenharmony_ci	 * Set the threshold for the minimum number of free blocks in the
170162306a36Sopenharmony_ci	 * log to the maximum of what the caller needs, one quarter of the
170262306a36Sopenharmony_ci	 * log, and 256 blocks.
170362306a36Sopenharmony_ci	 */
170462306a36Sopenharmony_ci	free_threshold = BTOBB(need_bytes);
170562306a36Sopenharmony_ci	free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
170662306a36Sopenharmony_ci	free_threshold = max(free_threshold, 256);
170762306a36Sopenharmony_ci	if (free_blocks >= free_threshold)
170862306a36Sopenharmony_ci		return NULLCOMMITLSN;
170962306a36Sopenharmony_ci
171062306a36Sopenharmony_ci	xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
171162306a36Sopenharmony_ci						&threshold_block);
171262306a36Sopenharmony_ci	threshold_block += free_threshold;
171362306a36Sopenharmony_ci	if (threshold_block >= log->l_logBBsize) {
171462306a36Sopenharmony_ci		threshold_block -= log->l_logBBsize;
171562306a36Sopenharmony_ci		threshold_cycle += 1;
171662306a36Sopenharmony_ci	}
171762306a36Sopenharmony_ci	threshold_lsn = xlog_assign_lsn(threshold_cycle,
171862306a36Sopenharmony_ci					threshold_block);
171962306a36Sopenharmony_ci	/*
172062306a36Sopenharmony_ci	 * Don't pass in an lsn greater than the lsn of the last
172162306a36Sopenharmony_ci	 * log record known to be on disk. Use a snapshot of the last sync lsn
172262306a36Sopenharmony_ci	 * so that it doesn't change between the compare and the set.
172362306a36Sopenharmony_ci	 */
172462306a36Sopenharmony_ci	last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
172562306a36Sopenharmony_ci	if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
172662306a36Sopenharmony_ci		threshold_lsn = last_sync_lsn;
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	return threshold_lsn;
172962306a36Sopenharmony_ci}
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci/*
173262306a36Sopenharmony_ci * Push the tail of the log if we need to do so to maintain the free log space
173362306a36Sopenharmony_ci * thresholds set out by xlog_grant_push_threshold.  We may need to adopt a
173462306a36Sopenharmony_ci * policy which pushes on an lsn which is further along in the log once we
173562306a36Sopenharmony_ci * reach the high water mark.  In this manner, we would be creating a low water
173662306a36Sopenharmony_ci * mark.
173762306a36Sopenharmony_ci */
173862306a36Sopenharmony_ciSTATIC void
173962306a36Sopenharmony_cixlog_grant_push_ail(
174062306a36Sopenharmony_ci	struct xlog	*log,
174162306a36Sopenharmony_ci	int		need_bytes)
174262306a36Sopenharmony_ci{
174362306a36Sopenharmony_ci	xfs_lsn_t	threshold_lsn;
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_ci	threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
174662306a36Sopenharmony_ci	if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log))
174762306a36Sopenharmony_ci		return;
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci	/*
175062306a36Sopenharmony_ci	 * Get the transaction layer to kick the dirty buffers out to
175162306a36Sopenharmony_ci	 * disk asynchronously. No point in trying to do this if
175262306a36Sopenharmony_ci	 * the filesystem is shutting down.
175362306a36Sopenharmony_ci	 */
175462306a36Sopenharmony_ci	xfs_ail_push(log->l_ailp, threshold_lsn);
175562306a36Sopenharmony_ci}
175662306a36Sopenharmony_ci
175762306a36Sopenharmony_ci/*
175862306a36Sopenharmony_ci * Stamp cycle number in every block
175962306a36Sopenharmony_ci */
176062306a36Sopenharmony_ciSTATIC void
176162306a36Sopenharmony_cixlog_pack_data(
176262306a36Sopenharmony_ci	struct xlog		*log,
176362306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
176462306a36Sopenharmony_ci	int			roundoff)
176562306a36Sopenharmony_ci{
176662306a36Sopenharmony_ci	int			i, j, k;
176762306a36Sopenharmony_ci	int			size = iclog->ic_offset + roundoff;
176862306a36Sopenharmony_ci	__be32			cycle_lsn;
176962306a36Sopenharmony_ci	char			*dp;
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_ci	cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
177262306a36Sopenharmony_ci
177362306a36Sopenharmony_ci	dp = iclog->ic_datap;
177462306a36Sopenharmony_ci	for (i = 0; i < BTOBB(size); i++) {
177562306a36Sopenharmony_ci		if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE))
177662306a36Sopenharmony_ci			break;
177762306a36Sopenharmony_ci		iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
177862306a36Sopenharmony_ci		*(__be32 *)dp = cycle_lsn;
177962306a36Sopenharmony_ci		dp += BBSIZE;
178062306a36Sopenharmony_ci	}
178162306a36Sopenharmony_ci
178262306a36Sopenharmony_ci	if (xfs_has_logv2(log->l_mp)) {
178362306a36Sopenharmony_ci		xlog_in_core_2_t *xhdr = iclog->ic_data;
178462306a36Sopenharmony_ci
178562306a36Sopenharmony_ci		for ( ; i < BTOBB(size); i++) {
178662306a36Sopenharmony_ci			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
178762306a36Sopenharmony_ci			k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
178862306a36Sopenharmony_ci			xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
178962306a36Sopenharmony_ci			*(__be32 *)dp = cycle_lsn;
179062306a36Sopenharmony_ci			dp += BBSIZE;
179162306a36Sopenharmony_ci		}
179262306a36Sopenharmony_ci
179362306a36Sopenharmony_ci		for (i = 1; i < log->l_iclog_heads; i++)
179462306a36Sopenharmony_ci			xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
179562306a36Sopenharmony_ci	}
179662306a36Sopenharmony_ci}
179762306a36Sopenharmony_ci
179862306a36Sopenharmony_ci/*
179962306a36Sopenharmony_ci * Calculate the checksum for a log buffer.
180062306a36Sopenharmony_ci *
180162306a36Sopenharmony_ci * This is a little more complicated than it should be because the various
180262306a36Sopenharmony_ci * headers and the actual data are non-contiguous.
180362306a36Sopenharmony_ci */
180462306a36Sopenharmony_ci__le32
180562306a36Sopenharmony_cixlog_cksum(
180662306a36Sopenharmony_ci	struct xlog		*log,
180762306a36Sopenharmony_ci	struct xlog_rec_header	*rhead,
180862306a36Sopenharmony_ci	char			*dp,
180962306a36Sopenharmony_ci	int			size)
181062306a36Sopenharmony_ci{
181162306a36Sopenharmony_ci	uint32_t		crc;
181262306a36Sopenharmony_ci
181362306a36Sopenharmony_ci	/* first generate the crc for the record header ... */
181462306a36Sopenharmony_ci	crc = xfs_start_cksum_update((char *)rhead,
181562306a36Sopenharmony_ci			      sizeof(struct xlog_rec_header),
181662306a36Sopenharmony_ci			      offsetof(struct xlog_rec_header, h_crc));
181762306a36Sopenharmony_ci
181862306a36Sopenharmony_ci	/* ... then for additional cycle data for v2 logs ... */
181962306a36Sopenharmony_ci	if (xfs_has_logv2(log->l_mp)) {
182062306a36Sopenharmony_ci		union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
182162306a36Sopenharmony_ci		int		i;
182262306a36Sopenharmony_ci		int		xheads;
182362306a36Sopenharmony_ci
182462306a36Sopenharmony_ci		xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
182562306a36Sopenharmony_ci
182662306a36Sopenharmony_ci		for (i = 1; i < xheads; i++) {
182762306a36Sopenharmony_ci			crc = crc32c(crc, &xhdr[i].hic_xheader,
182862306a36Sopenharmony_ci				     sizeof(struct xlog_rec_ext_header));
182962306a36Sopenharmony_ci		}
183062306a36Sopenharmony_ci	}
183162306a36Sopenharmony_ci
183262306a36Sopenharmony_ci	/* ... and finally for the payload */
183362306a36Sopenharmony_ci	crc = crc32c(crc, dp, size);
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_ci	return xfs_end_cksum(crc);
183662306a36Sopenharmony_ci}
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_cistatic void
183962306a36Sopenharmony_cixlog_bio_end_io(
184062306a36Sopenharmony_ci	struct bio		*bio)
184162306a36Sopenharmony_ci{
184262306a36Sopenharmony_ci	struct xlog_in_core	*iclog = bio->bi_private;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci	queue_work(iclog->ic_log->l_ioend_workqueue,
184562306a36Sopenharmony_ci		   &iclog->ic_end_io_work);
184662306a36Sopenharmony_ci}
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_cistatic int
184962306a36Sopenharmony_cixlog_map_iclog_data(
185062306a36Sopenharmony_ci	struct bio		*bio,
185162306a36Sopenharmony_ci	void			*data,
185262306a36Sopenharmony_ci	size_t			count)
185362306a36Sopenharmony_ci{
185462306a36Sopenharmony_ci	do {
185562306a36Sopenharmony_ci		struct page	*page = kmem_to_page(data);
185662306a36Sopenharmony_ci		unsigned int	off = offset_in_page(data);
185762306a36Sopenharmony_ci		size_t		len = min_t(size_t, count, PAGE_SIZE - off);
185862306a36Sopenharmony_ci
185962306a36Sopenharmony_ci		if (bio_add_page(bio, page, len, off) != len)
186062306a36Sopenharmony_ci			return -EIO;
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_ci		data += len;
186362306a36Sopenharmony_ci		count -= len;
186462306a36Sopenharmony_ci	} while (count);
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci	return 0;
186762306a36Sopenharmony_ci}
186862306a36Sopenharmony_ci
186962306a36Sopenharmony_ciSTATIC void
187062306a36Sopenharmony_cixlog_write_iclog(
187162306a36Sopenharmony_ci	struct xlog		*log,
187262306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
187362306a36Sopenharmony_ci	uint64_t		bno,
187462306a36Sopenharmony_ci	unsigned int		count)
187562306a36Sopenharmony_ci{
187662306a36Sopenharmony_ci	ASSERT(bno < log->l_logBBsize);
187762306a36Sopenharmony_ci	trace_xlog_iclog_write(iclog, _RET_IP_);
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_ci	/*
188062306a36Sopenharmony_ci	 * We lock the iclogbufs here so that we can serialise against I/O
188162306a36Sopenharmony_ci	 * completion during unmount.  We might be processing a shutdown
188262306a36Sopenharmony_ci	 * triggered during unmount, and that can occur asynchronously to the
188362306a36Sopenharmony_ci	 * unmount thread, and hence we need to ensure that completes before
188462306a36Sopenharmony_ci	 * tearing down the iclogbufs.  Hence we need to hold the buffer lock
188562306a36Sopenharmony_ci	 * across the log IO to archieve that.
188662306a36Sopenharmony_ci	 */
188762306a36Sopenharmony_ci	down(&iclog->ic_sema);
188862306a36Sopenharmony_ci	if (xlog_is_shutdown(log)) {
188962306a36Sopenharmony_ci		/*
189062306a36Sopenharmony_ci		 * It would seem logical to return EIO here, but we rely on
189162306a36Sopenharmony_ci		 * the log state machine to propagate I/O errors instead of
189262306a36Sopenharmony_ci		 * doing it here.  We kick of the state machine and unlock
189362306a36Sopenharmony_ci		 * the buffer manually, the code needs to be kept in sync
189462306a36Sopenharmony_ci		 * with the I/O completion path.
189562306a36Sopenharmony_ci		 */
189662306a36Sopenharmony_ci		goto sync;
189762306a36Sopenharmony_ci	}
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci	/*
190062306a36Sopenharmony_ci	 * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
190162306a36Sopenharmony_ci	 * IOs coming immediately after this one. This prevents the block layer
190262306a36Sopenharmony_ci	 * writeback throttle from throttling log writes behind background
190362306a36Sopenharmony_ci	 * metadata writeback and causing priority inversions.
190462306a36Sopenharmony_ci	 */
190562306a36Sopenharmony_ci	bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec,
190662306a36Sopenharmony_ci		 howmany(count, PAGE_SIZE),
190762306a36Sopenharmony_ci		 REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE);
190862306a36Sopenharmony_ci	iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
190962306a36Sopenharmony_ci	iclog->ic_bio.bi_end_io = xlog_bio_end_io;
191062306a36Sopenharmony_ci	iclog->ic_bio.bi_private = iclog;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
191362306a36Sopenharmony_ci		iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
191462306a36Sopenharmony_ci		/*
191562306a36Sopenharmony_ci		 * For external log devices, we also need to flush the data
191662306a36Sopenharmony_ci		 * device cache first to ensure all metadata writeback covered
191762306a36Sopenharmony_ci		 * by the LSN in this iclog is on stable storage. This is slow,
191862306a36Sopenharmony_ci		 * but it *must* complete before we issue the external log IO.
191962306a36Sopenharmony_ci		 *
192062306a36Sopenharmony_ci		 * If the flush fails, we cannot conclude that past metadata
192162306a36Sopenharmony_ci		 * writeback from the log succeeded.  Repeating the flush is
192262306a36Sopenharmony_ci		 * not possible, hence we must shut down with log IO error to
192362306a36Sopenharmony_ci		 * avoid shutdown re-entering this path and erroring out again.
192462306a36Sopenharmony_ci		 */
192562306a36Sopenharmony_ci		if (log->l_targ != log->l_mp->m_ddev_targp &&
192662306a36Sopenharmony_ci		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
192762306a36Sopenharmony_ci			goto shutdown;
192862306a36Sopenharmony_ci	}
192962306a36Sopenharmony_ci	if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
193062306a36Sopenharmony_ci		iclog->ic_bio.bi_opf |= REQ_FUA;
193162306a36Sopenharmony_ci
193262306a36Sopenharmony_ci	iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci	if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
193562306a36Sopenharmony_ci		goto shutdown;
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci	if (is_vmalloc_addr(iclog->ic_data))
193862306a36Sopenharmony_ci		flush_kernel_vmap_range(iclog->ic_data, count);
193962306a36Sopenharmony_ci
194062306a36Sopenharmony_ci	/*
194162306a36Sopenharmony_ci	 * If this log buffer would straddle the end of the log we will have
194262306a36Sopenharmony_ci	 * to split it up into two bios, so that we can continue at the start.
194362306a36Sopenharmony_ci	 */
194462306a36Sopenharmony_ci	if (bno + BTOBB(count) > log->l_logBBsize) {
194562306a36Sopenharmony_ci		struct bio *split;
194662306a36Sopenharmony_ci
194762306a36Sopenharmony_ci		split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
194862306a36Sopenharmony_ci				  GFP_NOIO, &fs_bio_set);
194962306a36Sopenharmony_ci		bio_chain(split, &iclog->ic_bio);
195062306a36Sopenharmony_ci		submit_bio(split);
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_ci		/* restart at logical offset zero for the remainder */
195362306a36Sopenharmony_ci		iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
195462306a36Sopenharmony_ci	}
195562306a36Sopenharmony_ci
195662306a36Sopenharmony_ci	submit_bio(&iclog->ic_bio);
195762306a36Sopenharmony_ci	return;
195862306a36Sopenharmony_cishutdown:
195962306a36Sopenharmony_ci	xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
196062306a36Sopenharmony_cisync:
196162306a36Sopenharmony_ci	xlog_state_done_syncing(iclog);
196262306a36Sopenharmony_ci	up(&iclog->ic_sema);
196362306a36Sopenharmony_ci}
196462306a36Sopenharmony_ci
196562306a36Sopenharmony_ci/*
196662306a36Sopenharmony_ci * We need to bump cycle number for the part of the iclog that is
196762306a36Sopenharmony_ci * written to the start of the log. Watch out for the header magic
196862306a36Sopenharmony_ci * number case, though.
196962306a36Sopenharmony_ci */
197062306a36Sopenharmony_cistatic void
197162306a36Sopenharmony_cixlog_split_iclog(
197262306a36Sopenharmony_ci	struct xlog		*log,
197362306a36Sopenharmony_ci	void			*data,
197462306a36Sopenharmony_ci	uint64_t		bno,
197562306a36Sopenharmony_ci	unsigned int		count)
197662306a36Sopenharmony_ci{
197762306a36Sopenharmony_ci	unsigned int		split_offset = BBTOB(log->l_logBBsize - bno);
197862306a36Sopenharmony_ci	unsigned int		i;
197962306a36Sopenharmony_ci
198062306a36Sopenharmony_ci	for (i = split_offset; i < count; i += BBSIZE) {
198162306a36Sopenharmony_ci		uint32_t cycle = get_unaligned_be32(data + i);
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ci		if (++cycle == XLOG_HEADER_MAGIC_NUM)
198462306a36Sopenharmony_ci			cycle++;
198562306a36Sopenharmony_ci		put_unaligned_be32(cycle, data + i);
198662306a36Sopenharmony_ci	}
198762306a36Sopenharmony_ci}
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_cistatic int
199062306a36Sopenharmony_cixlog_calc_iclog_size(
199162306a36Sopenharmony_ci	struct xlog		*log,
199262306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
199362306a36Sopenharmony_ci	uint32_t		*roundoff)
199462306a36Sopenharmony_ci{
199562306a36Sopenharmony_ci	uint32_t		count_init, count;
199662306a36Sopenharmony_ci
199762306a36Sopenharmony_ci	/* Add for LR header */
199862306a36Sopenharmony_ci	count_init = log->l_iclog_hsize + iclog->ic_offset;
199962306a36Sopenharmony_ci	count = roundup(count_init, log->l_iclog_roundoff);
200062306a36Sopenharmony_ci
200162306a36Sopenharmony_ci	*roundoff = count - count_init;
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci	ASSERT(count >= count_init);
200462306a36Sopenharmony_ci	ASSERT(*roundoff < log->l_iclog_roundoff);
200562306a36Sopenharmony_ci	return count;
200662306a36Sopenharmony_ci}
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci/*
200962306a36Sopenharmony_ci * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
201062306a36Sopenharmony_ci * fashion.  Previously, we should have moved the current iclog
201162306a36Sopenharmony_ci * ptr in the log to point to the next available iclog.  This allows further
201262306a36Sopenharmony_ci * write to continue while this code syncs out an iclog ready to go.
201362306a36Sopenharmony_ci * Before an in-core log can be written out, the data section must be scanned
201462306a36Sopenharmony_ci * to save away the 1st word of each BBSIZE block into the header.  We replace
201562306a36Sopenharmony_ci * it with the current cycle count.  Each BBSIZE block is tagged with the
201662306a36Sopenharmony_ci * cycle count because there in an implicit assumption that drives will
201762306a36Sopenharmony_ci * guarantee that entire 512 byte blocks get written at once.  In other words,
201862306a36Sopenharmony_ci * we can't have part of a 512 byte block written and part not written.  By
201962306a36Sopenharmony_ci * tagging each block, we will know which blocks are valid when recovering
202062306a36Sopenharmony_ci * after an unclean shutdown.
202162306a36Sopenharmony_ci *
202262306a36Sopenharmony_ci * This routine is single threaded on the iclog.  No other thread can be in
202362306a36Sopenharmony_ci * this routine with the same iclog.  Changing contents of iclog can there-
202462306a36Sopenharmony_ci * fore be done without grabbing the state machine lock.  Updating the global
202562306a36Sopenharmony_ci * log will require grabbing the lock though.
202662306a36Sopenharmony_ci *
202762306a36Sopenharmony_ci * The entire log manager uses a logical block numbering scheme.  Only
202862306a36Sopenharmony_ci * xlog_write_iclog knows about the fact that the log may not start with
202962306a36Sopenharmony_ci * block zero on a given device.
203062306a36Sopenharmony_ci */
203162306a36Sopenharmony_ciSTATIC void
203262306a36Sopenharmony_cixlog_sync(
203362306a36Sopenharmony_ci	struct xlog		*log,
203462306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
203562306a36Sopenharmony_ci	struct xlog_ticket	*ticket)
203662306a36Sopenharmony_ci{
203762306a36Sopenharmony_ci	unsigned int		count;		/* byte count of bwrite */
203862306a36Sopenharmony_ci	unsigned int		roundoff;       /* roundoff to BB or stripe */
203962306a36Sopenharmony_ci	uint64_t		bno;
204062306a36Sopenharmony_ci	unsigned int		size;
204162306a36Sopenharmony_ci
204262306a36Sopenharmony_ci	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
204362306a36Sopenharmony_ci	trace_xlog_iclog_sync(iclog, _RET_IP_);
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci	count = xlog_calc_iclog_size(log, iclog, &roundoff);
204662306a36Sopenharmony_ci
204762306a36Sopenharmony_ci	/*
204862306a36Sopenharmony_ci	 * If we have a ticket, account for the roundoff via the ticket
204962306a36Sopenharmony_ci	 * reservation to avoid touching the hot grant heads needlessly.
205062306a36Sopenharmony_ci	 * Otherwise, we have to move grant heads directly.
205162306a36Sopenharmony_ci	 */
205262306a36Sopenharmony_ci	if (ticket) {
205362306a36Sopenharmony_ci		ticket->t_curr_res -= roundoff;
205462306a36Sopenharmony_ci	} else {
205562306a36Sopenharmony_ci		xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
205662306a36Sopenharmony_ci		xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
205762306a36Sopenharmony_ci	}
205862306a36Sopenharmony_ci
205962306a36Sopenharmony_ci	/* put cycle number in every block */
206062306a36Sopenharmony_ci	xlog_pack_data(log, iclog, roundoff);
206162306a36Sopenharmony_ci
206262306a36Sopenharmony_ci	/* real byte length */
206362306a36Sopenharmony_ci	size = iclog->ic_offset;
206462306a36Sopenharmony_ci	if (xfs_has_logv2(log->l_mp))
206562306a36Sopenharmony_ci		size += roundoff;
206662306a36Sopenharmony_ci	iclog->ic_header.h_len = cpu_to_be32(size);
206762306a36Sopenharmony_ci
206862306a36Sopenharmony_ci	XFS_STATS_INC(log->l_mp, xs_log_writes);
206962306a36Sopenharmony_ci	XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
207062306a36Sopenharmony_ci
207162306a36Sopenharmony_ci	bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci	/* Do we need to split this write into 2 parts? */
207462306a36Sopenharmony_ci	if (bno + BTOBB(count) > log->l_logBBsize)
207562306a36Sopenharmony_ci		xlog_split_iclog(log, &iclog->ic_header, bno, count);
207662306a36Sopenharmony_ci
207762306a36Sopenharmony_ci	/* calculcate the checksum */
207862306a36Sopenharmony_ci	iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
207962306a36Sopenharmony_ci					    iclog->ic_datap, size);
208062306a36Sopenharmony_ci	/*
208162306a36Sopenharmony_ci	 * Intentionally corrupt the log record CRC based on the error injection
208262306a36Sopenharmony_ci	 * frequency, if defined. This facilitates testing log recovery in the
208362306a36Sopenharmony_ci	 * event of torn writes. Hence, set the IOABORT state to abort the log
208462306a36Sopenharmony_ci	 * write on I/O completion and shutdown the fs. The subsequent mount
208562306a36Sopenharmony_ci	 * detects the bad CRC and attempts to recover.
208662306a36Sopenharmony_ci	 */
208762306a36Sopenharmony_ci#ifdef DEBUG
208862306a36Sopenharmony_ci	if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
208962306a36Sopenharmony_ci		iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
209062306a36Sopenharmony_ci		iclog->ic_fail_crc = true;
209162306a36Sopenharmony_ci		xfs_warn(log->l_mp,
209262306a36Sopenharmony_ci	"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
209362306a36Sopenharmony_ci			 be64_to_cpu(iclog->ic_header.h_lsn));
209462306a36Sopenharmony_ci	}
209562306a36Sopenharmony_ci#endif
209662306a36Sopenharmony_ci	xlog_verify_iclog(log, iclog, count);
209762306a36Sopenharmony_ci	xlog_write_iclog(log, iclog, bno, count);
209862306a36Sopenharmony_ci}
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci/*
210162306a36Sopenharmony_ci * Deallocate a log structure
210262306a36Sopenharmony_ci */
210362306a36Sopenharmony_ciSTATIC void
210462306a36Sopenharmony_cixlog_dealloc_log(
210562306a36Sopenharmony_ci	struct xlog	*log)
210662306a36Sopenharmony_ci{
210762306a36Sopenharmony_ci	xlog_in_core_t	*iclog, *next_iclog;
210862306a36Sopenharmony_ci	int		i;
210962306a36Sopenharmony_ci
211062306a36Sopenharmony_ci	/*
211162306a36Sopenharmony_ci	 * Destroy the CIL after waiting for iclog IO completion because an
211262306a36Sopenharmony_ci	 * iclog EIO error will try to shut down the log, which accesses the
211362306a36Sopenharmony_ci	 * CIL to wake up the waiters.
211462306a36Sopenharmony_ci	 */
211562306a36Sopenharmony_ci	xlog_cil_destroy(log);
211662306a36Sopenharmony_ci
211762306a36Sopenharmony_ci	iclog = log->l_iclog;
211862306a36Sopenharmony_ci	for (i = 0; i < log->l_iclog_bufs; i++) {
211962306a36Sopenharmony_ci		next_iclog = iclog->ic_next;
212062306a36Sopenharmony_ci		kmem_free(iclog->ic_data);
212162306a36Sopenharmony_ci		kmem_free(iclog);
212262306a36Sopenharmony_ci		iclog = next_iclog;
212362306a36Sopenharmony_ci	}
212462306a36Sopenharmony_ci
212562306a36Sopenharmony_ci	log->l_mp->m_log = NULL;
212662306a36Sopenharmony_ci	destroy_workqueue(log->l_ioend_workqueue);
212762306a36Sopenharmony_ci	kmem_free(log);
212862306a36Sopenharmony_ci}
212962306a36Sopenharmony_ci
213062306a36Sopenharmony_ci/*
213162306a36Sopenharmony_ci * Update counters atomically now that memcpy is done.
213262306a36Sopenharmony_ci */
213362306a36Sopenharmony_cistatic inline void
213462306a36Sopenharmony_cixlog_state_finish_copy(
213562306a36Sopenharmony_ci	struct xlog		*log,
213662306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
213762306a36Sopenharmony_ci	int			record_cnt,
213862306a36Sopenharmony_ci	int			copy_bytes)
213962306a36Sopenharmony_ci{
214062306a36Sopenharmony_ci	lockdep_assert_held(&log->l_icloglock);
214162306a36Sopenharmony_ci
214262306a36Sopenharmony_ci	be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
214362306a36Sopenharmony_ci	iclog->ic_offset += copy_bytes;
214462306a36Sopenharmony_ci}
214562306a36Sopenharmony_ci
214662306a36Sopenharmony_ci/*
214762306a36Sopenharmony_ci * print out info relating to regions written which consume
214862306a36Sopenharmony_ci * the reservation
214962306a36Sopenharmony_ci */
215062306a36Sopenharmony_civoid
215162306a36Sopenharmony_cixlog_print_tic_res(
215262306a36Sopenharmony_ci	struct xfs_mount	*mp,
215362306a36Sopenharmony_ci	struct xlog_ticket	*ticket)
215462306a36Sopenharmony_ci{
215562306a36Sopenharmony_ci	xfs_warn(mp, "ticket reservation summary:");
215662306a36Sopenharmony_ci	xfs_warn(mp, "  unit res    = %d bytes", ticket->t_unit_res);
215762306a36Sopenharmony_ci	xfs_warn(mp, "  current res = %d bytes", ticket->t_curr_res);
215862306a36Sopenharmony_ci	xfs_warn(mp, "  original count  = %d", ticket->t_ocnt);
215962306a36Sopenharmony_ci	xfs_warn(mp, "  remaining count = %d", ticket->t_cnt);
216062306a36Sopenharmony_ci}
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci/*
216362306a36Sopenharmony_ci * Print a summary of the transaction.
216462306a36Sopenharmony_ci */
216562306a36Sopenharmony_civoid
216662306a36Sopenharmony_cixlog_print_trans(
216762306a36Sopenharmony_ci	struct xfs_trans	*tp)
216862306a36Sopenharmony_ci{
216962306a36Sopenharmony_ci	struct xfs_mount	*mp = tp->t_mountp;
217062306a36Sopenharmony_ci	struct xfs_log_item	*lip;
217162306a36Sopenharmony_ci
217262306a36Sopenharmony_ci	/* dump core transaction and ticket info */
217362306a36Sopenharmony_ci	xfs_warn(mp, "transaction summary:");
217462306a36Sopenharmony_ci	xfs_warn(mp, "  log res   = %d", tp->t_log_res);
217562306a36Sopenharmony_ci	xfs_warn(mp, "  log count = %d", tp->t_log_count);
217662306a36Sopenharmony_ci	xfs_warn(mp, "  flags     = 0x%x", tp->t_flags);
217762306a36Sopenharmony_ci
217862306a36Sopenharmony_ci	xlog_print_tic_res(mp, tp->t_ticket);
217962306a36Sopenharmony_ci
218062306a36Sopenharmony_ci	/* dump each log item */
218162306a36Sopenharmony_ci	list_for_each_entry(lip, &tp->t_items, li_trans) {
218262306a36Sopenharmony_ci		struct xfs_log_vec	*lv = lip->li_lv;
218362306a36Sopenharmony_ci		struct xfs_log_iovec	*vec;
218462306a36Sopenharmony_ci		int			i;
218562306a36Sopenharmony_ci
218662306a36Sopenharmony_ci		xfs_warn(mp, "log item: ");
218762306a36Sopenharmony_ci		xfs_warn(mp, "  type	= 0x%x", lip->li_type);
218862306a36Sopenharmony_ci		xfs_warn(mp, "  flags	= 0x%lx", lip->li_flags);
218962306a36Sopenharmony_ci		if (!lv)
219062306a36Sopenharmony_ci			continue;
219162306a36Sopenharmony_ci		xfs_warn(mp, "  niovecs	= %d", lv->lv_niovecs);
219262306a36Sopenharmony_ci		xfs_warn(mp, "  size	= %d", lv->lv_size);
219362306a36Sopenharmony_ci		xfs_warn(mp, "  bytes	= %d", lv->lv_bytes);
219462306a36Sopenharmony_ci		xfs_warn(mp, "  buf len	= %d", lv->lv_buf_len);
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci		/* dump each iovec for the log item */
219762306a36Sopenharmony_ci		vec = lv->lv_iovecp;
219862306a36Sopenharmony_ci		for (i = 0; i < lv->lv_niovecs; i++) {
219962306a36Sopenharmony_ci			int dumplen = min(vec->i_len, 32);
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci			xfs_warn(mp, "  iovec[%d]", i);
220262306a36Sopenharmony_ci			xfs_warn(mp, "    type	= 0x%x", vec->i_type);
220362306a36Sopenharmony_ci			xfs_warn(mp, "    len	= %d", vec->i_len);
220462306a36Sopenharmony_ci			xfs_warn(mp, "    first %d bytes of iovec[%d]:", dumplen, i);
220562306a36Sopenharmony_ci			xfs_hex_dump(vec->i_addr, dumplen);
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci			vec++;
220862306a36Sopenharmony_ci		}
220962306a36Sopenharmony_ci	}
221062306a36Sopenharmony_ci}
221162306a36Sopenharmony_ci
221262306a36Sopenharmony_cistatic inline void
221362306a36Sopenharmony_cixlog_write_iovec(
221462306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
221562306a36Sopenharmony_ci	uint32_t		*log_offset,
221662306a36Sopenharmony_ci	void			*data,
221762306a36Sopenharmony_ci	uint32_t		write_len,
221862306a36Sopenharmony_ci	int			*bytes_left,
221962306a36Sopenharmony_ci	uint32_t		*record_cnt,
222062306a36Sopenharmony_ci	uint32_t		*data_cnt)
222162306a36Sopenharmony_ci{
222262306a36Sopenharmony_ci	ASSERT(*log_offset < iclog->ic_log->l_iclog_size);
222362306a36Sopenharmony_ci	ASSERT(*log_offset % sizeof(int32_t) == 0);
222462306a36Sopenharmony_ci	ASSERT(write_len % sizeof(int32_t) == 0);
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci	memcpy(iclog->ic_datap + *log_offset, data, write_len);
222762306a36Sopenharmony_ci	*log_offset += write_len;
222862306a36Sopenharmony_ci	*bytes_left -= write_len;
222962306a36Sopenharmony_ci	(*record_cnt)++;
223062306a36Sopenharmony_ci	*data_cnt += write_len;
223162306a36Sopenharmony_ci}
223262306a36Sopenharmony_ci
223362306a36Sopenharmony_ci/*
223462306a36Sopenharmony_ci * Write log vectors into a single iclog which is guaranteed by the caller
223562306a36Sopenharmony_ci * to have enough space to write the entire log vector into.
223662306a36Sopenharmony_ci */
223762306a36Sopenharmony_cistatic void
223862306a36Sopenharmony_cixlog_write_full(
223962306a36Sopenharmony_ci	struct xfs_log_vec	*lv,
224062306a36Sopenharmony_ci	struct xlog_ticket	*ticket,
224162306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
224262306a36Sopenharmony_ci	uint32_t		*log_offset,
224362306a36Sopenharmony_ci	uint32_t		*len,
224462306a36Sopenharmony_ci	uint32_t		*record_cnt,
224562306a36Sopenharmony_ci	uint32_t		*data_cnt)
224662306a36Sopenharmony_ci{
224762306a36Sopenharmony_ci	int			index;
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ci	ASSERT(*log_offset + *len <= iclog->ic_size ||
225062306a36Sopenharmony_ci		iclog->ic_state == XLOG_STATE_WANT_SYNC);
225162306a36Sopenharmony_ci
225262306a36Sopenharmony_ci	/*
225362306a36Sopenharmony_ci	 * Ordered log vectors have no regions to write so this
225462306a36Sopenharmony_ci	 * loop will naturally skip them.
225562306a36Sopenharmony_ci	 */
225662306a36Sopenharmony_ci	for (index = 0; index < lv->lv_niovecs; index++) {
225762306a36Sopenharmony_ci		struct xfs_log_iovec	*reg = &lv->lv_iovecp[index];
225862306a36Sopenharmony_ci		struct xlog_op_header	*ophdr = reg->i_addr;
225962306a36Sopenharmony_ci
226062306a36Sopenharmony_ci		ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
226162306a36Sopenharmony_ci		xlog_write_iovec(iclog, log_offset, reg->i_addr,
226262306a36Sopenharmony_ci				reg->i_len, len, record_cnt, data_cnt);
226362306a36Sopenharmony_ci	}
226462306a36Sopenharmony_ci}
226562306a36Sopenharmony_ci
226662306a36Sopenharmony_cistatic int
226762306a36Sopenharmony_cixlog_write_get_more_iclog_space(
226862306a36Sopenharmony_ci	struct xlog_ticket	*ticket,
226962306a36Sopenharmony_ci	struct xlog_in_core	**iclogp,
227062306a36Sopenharmony_ci	uint32_t		*log_offset,
227162306a36Sopenharmony_ci	uint32_t		len,
227262306a36Sopenharmony_ci	uint32_t		*record_cnt,
227362306a36Sopenharmony_ci	uint32_t		*data_cnt)
227462306a36Sopenharmony_ci{
227562306a36Sopenharmony_ci	struct xlog_in_core	*iclog = *iclogp;
227662306a36Sopenharmony_ci	struct xlog		*log = iclog->ic_log;
227762306a36Sopenharmony_ci	int			error;
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
228062306a36Sopenharmony_ci	ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC);
228162306a36Sopenharmony_ci	xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
228262306a36Sopenharmony_ci	error = xlog_state_release_iclog(log, iclog, ticket);
228362306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
228462306a36Sopenharmony_ci	if (error)
228562306a36Sopenharmony_ci		return error;
228662306a36Sopenharmony_ci
228762306a36Sopenharmony_ci	error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
228862306a36Sopenharmony_ci					log_offset);
228962306a36Sopenharmony_ci	if (error)
229062306a36Sopenharmony_ci		return error;
229162306a36Sopenharmony_ci	*record_cnt = 0;
229262306a36Sopenharmony_ci	*data_cnt = 0;
229362306a36Sopenharmony_ci	*iclogp = iclog;
229462306a36Sopenharmony_ci	return 0;
229562306a36Sopenharmony_ci}
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_ci/*
229862306a36Sopenharmony_ci * Write log vectors into a single iclog which is smaller than the current chain
229962306a36Sopenharmony_ci * length. We write until we cannot fit a full record into the remaining space
230062306a36Sopenharmony_ci * and then stop. We return the log vector that is to be written that cannot
230162306a36Sopenharmony_ci * wholly fit in the iclog.
230262306a36Sopenharmony_ci */
230362306a36Sopenharmony_cistatic int
230462306a36Sopenharmony_cixlog_write_partial(
230562306a36Sopenharmony_ci	struct xfs_log_vec	*lv,
230662306a36Sopenharmony_ci	struct xlog_ticket	*ticket,
230762306a36Sopenharmony_ci	struct xlog_in_core	**iclogp,
230862306a36Sopenharmony_ci	uint32_t		*log_offset,
230962306a36Sopenharmony_ci	uint32_t		*len,
231062306a36Sopenharmony_ci	uint32_t		*record_cnt,
231162306a36Sopenharmony_ci	uint32_t		*data_cnt)
231262306a36Sopenharmony_ci{
231362306a36Sopenharmony_ci	struct xlog_in_core	*iclog = *iclogp;
231462306a36Sopenharmony_ci	struct xlog_op_header	*ophdr;
231562306a36Sopenharmony_ci	int			index = 0;
231662306a36Sopenharmony_ci	uint32_t		rlen;
231762306a36Sopenharmony_ci	int			error;
231862306a36Sopenharmony_ci
231962306a36Sopenharmony_ci	/* walk the logvec, copying until we run out of space in the iclog */
232062306a36Sopenharmony_ci	for (index = 0; index < lv->lv_niovecs; index++) {
232162306a36Sopenharmony_ci		struct xfs_log_iovec	*reg = &lv->lv_iovecp[index];
232262306a36Sopenharmony_ci		uint32_t		reg_offset = 0;
232362306a36Sopenharmony_ci
232462306a36Sopenharmony_ci		/*
232562306a36Sopenharmony_ci		 * The first region of a continuation must have a non-zero
232662306a36Sopenharmony_ci		 * length otherwise log recovery will just skip over it and
232762306a36Sopenharmony_ci		 * start recovering from the next opheader it finds. Because we
232862306a36Sopenharmony_ci		 * mark the next opheader as a continuation, recovery will then
232962306a36Sopenharmony_ci		 * incorrectly add the continuation to the previous region and
233062306a36Sopenharmony_ci		 * that breaks stuff.
233162306a36Sopenharmony_ci		 *
233262306a36Sopenharmony_ci		 * Hence if there isn't space for region data after the
233362306a36Sopenharmony_ci		 * opheader, then we need to start afresh with a new iclog.
233462306a36Sopenharmony_ci		 */
233562306a36Sopenharmony_ci		if (iclog->ic_size - *log_offset <=
233662306a36Sopenharmony_ci					sizeof(struct xlog_op_header)) {
233762306a36Sopenharmony_ci			error = xlog_write_get_more_iclog_space(ticket,
233862306a36Sopenharmony_ci					&iclog, log_offset, *len, record_cnt,
233962306a36Sopenharmony_ci					data_cnt);
234062306a36Sopenharmony_ci			if (error)
234162306a36Sopenharmony_ci				return error;
234262306a36Sopenharmony_ci		}
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci		ophdr = reg->i_addr;
234562306a36Sopenharmony_ci		rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset);
234662306a36Sopenharmony_ci
234762306a36Sopenharmony_ci		ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
234862306a36Sopenharmony_ci		ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header));
234962306a36Sopenharmony_ci		if (rlen != reg->i_len)
235062306a36Sopenharmony_ci			ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
235162306a36Sopenharmony_ci
235262306a36Sopenharmony_ci		xlog_write_iovec(iclog, log_offset, reg->i_addr,
235362306a36Sopenharmony_ci				rlen, len, record_cnt, data_cnt);
235462306a36Sopenharmony_ci
235562306a36Sopenharmony_ci		/* If we wrote the whole region, move to the next. */
235662306a36Sopenharmony_ci		if (rlen == reg->i_len)
235762306a36Sopenharmony_ci			continue;
235862306a36Sopenharmony_ci
235962306a36Sopenharmony_ci		/*
236062306a36Sopenharmony_ci		 * We now have a partially written iovec, but it can span
236162306a36Sopenharmony_ci		 * multiple iclogs so we loop here. First we release the iclog
236262306a36Sopenharmony_ci		 * we currently have, then we get a new iclog and add a new
236362306a36Sopenharmony_ci		 * opheader. Then we continue copying from where we were until
236462306a36Sopenharmony_ci		 * we either complete the iovec or fill the iclog. If we
236562306a36Sopenharmony_ci		 * complete the iovec, then we increment the index and go right
236662306a36Sopenharmony_ci		 * back to the top of the outer loop. if we fill the iclog, we
236762306a36Sopenharmony_ci		 * run the inner loop again.
236862306a36Sopenharmony_ci		 *
236962306a36Sopenharmony_ci		 * This is complicated by the tail of a region using all the
237062306a36Sopenharmony_ci		 * space in an iclog and hence requiring us to release the iclog
237162306a36Sopenharmony_ci		 * and get a new one before returning to the outer loop. We must
237262306a36Sopenharmony_ci		 * always guarantee that we exit this inner loop with at least
237362306a36Sopenharmony_ci		 * space for log transaction opheaders left in the current
237462306a36Sopenharmony_ci		 * iclog, hence we cannot just terminate the loop at the end
237562306a36Sopenharmony_ci		 * of the of the continuation. So we loop while there is no
237662306a36Sopenharmony_ci		 * space left in the current iclog, and check for the end of the
237762306a36Sopenharmony_ci		 * continuation after getting a new iclog.
237862306a36Sopenharmony_ci		 */
237962306a36Sopenharmony_ci		do {
238062306a36Sopenharmony_ci			/*
238162306a36Sopenharmony_ci			 * Ensure we include the continuation opheader in the
238262306a36Sopenharmony_ci			 * space we need in the new iclog by adding that size
238362306a36Sopenharmony_ci			 * to the length we require. This continuation opheader
238462306a36Sopenharmony_ci			 * needs to be accounted to the ticket as the space it
238562306a36Sopenharmony_ci			 * consumes hasn't been accounted to the lv we are
238662306a36Sopenharmony_ci			 * writing.
238762306a36Sopenharmony_ci			 */
238862306a36Sopenharmony_ci			error = xlog_write_get_more_iclog_space(ticket,
238962306a36Sopenharmony_ci					&iclog, log_offset,
239062306a36Sopenharmony_ci					*len + sizeof(struct xlog_op_header),
239162306a36Sopenharmony_ci					record_cnt, data_cnt);
239262306a36Sopenharmony_ci			if (error)
239362306a36Sopenharmony_ci				return error;
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ci			ophdr = iclog->ic_datap + *log_offset;
239662306a36Sopenharmony_ci			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
239762306a36Sopenharmony_ci			ophdr->oh_clientid = XFS_TRANSACTION;
239862306a36Sopenharmony_ci			ophdr->oh_res2 = 0;
239962306a36Sopenharmony_ci			ophdr->oh_flags = XLOG_WAS_CONT_TRANS;
240062306a36Sopenharmony_ci
240162306a36Sopenharmony_ci			ticket->t_curr_res -= sizeof(struct xlog_op_header);
240262306a36Sopenharmony_ci			*log_offset += sizeof(struct xlog_op_header);
240362306a36Sopenharmony_ci			*data_cnt += sizeof(struct xlog_op_header);
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci			/*
240662306a36Sopenharmony_ci			 * If rlen fits in the iclog, then end the region
240762306a36Sopenharmony_ci			 * continuation. Otherwise we're going around again.
240862306a36Sopenharmony_ci			 */
240962306a36Sopenharmony_ci			reg_offset += rlen;
241062306a36Sopenharmony_ci			rlen = reg->i_len - reg_offset;
241162306a36Sopenharmony_ci			if (rlen <= iclog->ic_size - *log_offset)
241262306a36Sopenharmony_ci				ophdr->oh_flags |= XLOG_END_TRANS;
241362306a36Sopenharmony_ci			else
241462306a36Sopenharmony_ci				ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
241562306a36Sopenharmony_ci
241662306a36Sopenharmony_ci			rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset);
241762306a36Sopenharmony_ci			ophdr->oh_len = cpu_to_be32(rlen);
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ci			xlog_write_iovec(iclog, log_offset,
242062306a36Sopenharmony_ci					reg->i_addr + reg_offset,
242162306a36Sopenharmony_ci					rlen, len, record_cnt, data_cnt);
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_ci		} while (ophdr->oh_flags & XLOG_CONTINUE_TRANS);
242462306a36Sopenharmony_ci	}
242562306a36Sopenharmony_ci
242662306a36Sopenharmony_ci	/*
242762306a36Sopenharmony_ci	 * No more iovecs remain in this logvec so return the next log vec to
242862306a36Sopenharmony_ci	 * the caller so it can go back to fast path copying.
242962306a36Sopenharmony_ci	 */
243062306a36Sopenharmony_ci	*iclogp = iclog;
243162306a36Sopenharmony_ci	return 0;
243262306a36Sopenharmony_ci}
243362306a36Sopenharmony_ci
243462306a36Sopenharmony_ci/*
243562306a36Sopenharmony_ci * Write some region out to in-core log
243662306a36Sopenharmony_ci *
243762306a36Sopenharmony_ci * This will be called when writing externally provided regions or when
243862306a36Sopenharmony_ci * writing out a commit record for a given transaction.
243962306a36Sopenharmony_ci *
244062306a36Sopenharmony_ci * General algorithm:
244162306a36Sopenharmony_ci *	1. Find total length of this write.  This may include adding to the
244262306a36Sopenharmony_ci *		lengths passed in.
244362306a36Sopenharmony_ci *	2. Check whether we violate the tickets reservation.
244462306a36Sopenharmony_ci *	3. While writing to this iclog
244562306a36Sopenharmony_ci *	    A. Reserve as much space in this iclog as can get
244662306a36Sopenharmony_ci *	    B. If this is first write, save away start lsn
244762306a36Sopenharmony_ci *	    C. While writing this region:
244862306a36Sopenharmony_ci *		1. If first write of transaction, write start record
244962306a36Sopenharmony_ci *		2. Write log operation header (header per region)
245062306a36Sopenharmony_ci *		3. Find out if we can fit entire region into this iclog
245162306a36Sopenharmony_ci *		4. Potentially, verify destination memcpy ptr
245262306a36Sopenharmony_ci *		5. Memcpy (partial) region
245362306a36Sopenharmony_ci *		6. If partial copy, release iclog; otherwise, continue
245462306a36Sopenharmony_ci *			copying more regions into current iclog
245562306a36Sopenharmony_ci *	4. Mark want sync bit (in simulation mode)
245662306a36Sopenharmony_ci *	5. Release iclog for potential flush to on-disk log.
245762306a36Sopenharmony_ci *
245862306a36Sopenharmony_ci * ERRORS:
245962306a36Sopenharmony_ci * 1.	Panic if reservation is overrun.  This should never happen since
246062306a36Sopenharmony_ci *	reservation amounts are generated internal to the filesystem.
246162306a36Sopenharmony_ci * NOTES:
246262306a36Sopenharmony_ci * 1. Tickets are single threaded data structures.
246362306a36Sopenharmony_ci * 2. The XLOG_END_TRANS & XLOG_CONTINUE_TRANS flags are passed down to the
246462306a36Sopenharmony_ci *	syncing routine.  When a single log_write region needs to span
246562306a36Sopenharmony_ci *	multiple in-core logs, the XLOG_CONTINUE_TRANS bit should be set
246662306a36Sopenharmony_ci *	on all log operation writes which don't contain the end of the
246762306a36Sopenharmony_ci *	region.  The XLOG_END_TRANS bit is used for the in-core log
246862306a36Sopenharmony_ci *	operation which contains the end of the continued log_write region.
246962306a36Sopenharmony_ci * 3. When xlog_state_get_iclog_space() grabs the rest of the current iclog,
247062306a36Sopenharmony_ci *	we don't really know exactly how much space will be used.  As a result,
247162306a36Sopenharmony_ci *	we don't update ic_offset until the end when we know exactly how many
247262306a36Sopenharmony_ci *	bytes have been written out.
247362306a36Sopenharmony_ci */
247462306a36Sopenharmony_ciint
247562306a36Sopenharmony_cixlog_write(
247662306a36Sopenharmony_ci	struct xlog		*log,
247762306a36Sopenharmony_ci	struct xfs_cil_ctx	*ctx,
247862306a36Sopenharmony_ci	struct list_head	*lv_chain,
247962306a36Sopenharmony_ci	struct xlog_ticket	*ticket,
248062306a36Sopenharmony_ci	uint32_t		len)
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_ci{
248362306a36Sopenharmony_ci	struct xlog_in_core	*iclog = NULL;
248462306a36Sopenharmony_ci	struct xfs_log_vec	*lv;
248562306a36Sopenharmony_ci	uint32_t		record_cnt = 0;
248662306a36Sopenharmony_ci	uint32_t		data_cnt = 0;
248762306a36Sopenharmony_ci	int			error = 0;
248862306a36Sopenharmony_ci	int			log_offset;
248962306a36Sopenharmony_ci
249062306a36Sopenharmony_ci	if (ticket->t_curr_res < 0) {
249162306a36Sopenharmony_ci		xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
249262306a36Sopenharmony_ci		     "ctx ticket reservation ran out. Need to up reservation");
249362306a36Sopenharmony_ci		xlog_print_tic_res(log->l_mp, ticket);
249462306a36Sopenharmony_ci		xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
249562306a36Sopenharmony_ci	}
249662306a36Sopenharmony_ci
249762306a36Sopenharmony_ci	error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
249862306a36Sopenharmony_ci					   &log_offset);
249962306a36Sopenharmony_ci	if (error)
250062306a36Sopenharmony_ci		return error;
250162306a36Sopenharmony_ci
250262306a36Sopenharmony_ci	ASSERT(log_offset <= iclog->ic_size - 1);
250362306a36Sopenharmony_ci
250462306a36Sopenharmony_ci	/*
250562306a36Sopenharmony_ci	 * If we have a context pointer, pass it the first iclog we are
250662306a36Sopenharmony_ci	 * writing to so it can record state needed for iclog write
250762306a36Sopenharmony_ci	 * ordering.
250862306a36Sopenharmony_ci	 */
250962306a36Sopenharmony_ci	if (ctx)
251062306a36Sopenharmony_ci		xlog_cil_set_ctx_write_state(ctx, iclog);
251162306a36Sopenharmony_ci
251262306a36Sopenharmony_ci	list_for_each_entry(lv, lv_chain, lv_list) {
251362306a36Sopenharmony_ci		/*
251462306a36Sopenharmony_ci		 * If the entire log vec does not fit in the iclog, punt it to
251562306a36Sopenharmony_ci		 * the partial copy loop which can handle this case.
251662306a36Sopenharmony_ci		 */
251762306a36Sopenharmony_ci		if (lv->lv_niovecs &&
251862306a36Sopenharmony_ci		    lv->lv_bytes > iclog->ic_size - log_offset) {
251962306a36Sopenharmony_ci			error = xlog_write_partial(lv, ticket, &iclog,
252062306a36Sopenharmony_ci					&log_offset, &len, &record_cnt,
252162306a36Sopenharmony_ci					&data_cnt);
252262306a36Sopenharmony_ci			if (error) {
252362306a36Sopenharmony_ci				/*
252462306a36Sopenharmony_ci				 * We have no iclog to release, so just return
252562306a36Sopenharmony_ci				 * the error immediately.
252662306a36Sopenharmony_ci				 */
252762306a36Sopenharmony_ci				return error;
252862306a36Sopenharmony_ci			}
252962306a36Sopenharmony_ci		} else {
253062306a36Sopenharmony_ci			xlog_write_full(lv, ticket, iclog, &log_offset,
253162306a36Sopenharmony_ci					 &len, &record_cnt, &data_cnt);
253262306a36Sopenharmony_ci		}
253362306a36Sopenharmony_ci	}
253462306a36Sopenharmony_ci	ASSERT(len == 0);
253562306a36Sopenharmony_ci
253662306a36Sopenharmony_ci	/*
253762306a36Sopenharmony_ci	 * We've already been guaranteed that the last writes will fit inside
253862306a36Sopenharmony_ci	 * the current iclog, and hence it will already have the space used by
253962306a36Sopenharmony_ci	 * those writes accounted to it. Hence we do not need to update the
254062306a36Sopenharmony_ci	 * iclog with the number of bytes written here.
254162306a36Sopenharmony_ci	 */
254262306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
254362306a36Sopenharmony_ci	xlog_state_finish_copy(log, iclog, record_cnt, 0);
254462306a36Sopenharmony_ci	error = xlog_state_release_iclog(log, iclog, ticket);
254562306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci	return error;
254862306a36Sopenharmony_ci}
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_cistatic void
255162306a36Sopenharmony_cixlog_state_activate_iclog(
255262306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
255362306a36Sopenharmony_ci	int			*iclogs_changed)
255462306a36Sopenharmony_ci{
255562306a36Sopenharmony_ci	ASSERT(list_empty_careful(&iclog->ic_callbacks));
255662306a36Sopenharmony_ci	trace_xlog_iclog_activate(iclog, _RET_IP_);
255762306a36Sopenharmony_ci
255862306a36Sopenharmony_ci	/*
255962306a36Sopenharmony_ci	 * If the number of ops in this iclog indicate it just contains the
256062306a36Sopenharmony_ci	 * dummy transaction, we can change state into IDLE (the second time
256162306a36Sopenharmony_ci	 * around). Otherwise we should change the state into NEED a dummy.
256262306a36Sopenharmony_ci	 * We don't need to cover the dummy.
256362306a36Sopenharmony_ci	 */
256462306a36Sopenharmony_ci	if (*iclogs_changed == 0 &&
256562306a36Sopenharmony_ci	    iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) {
256662306a36Sopenharmony_ci		*iclogs_changed = 1;
256762306a36Sopenharmony_ci	} else {
256862306a36Sopenharmony_ci		/*
256962306a36Sopenharmony_ci		 * We have two dirty iclogs so start over.  This could also be
257062306a36Sopenharmony_ci		 * num of ops indicating this is not the dummy going out.
257162306a36Sopenharmony_ci		 */
257262306a36Sopenharmony_ci		*iclogs_changed = 2;
257362306a36Sopenharmony_ci	}
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_ci	iclog->ic_state	= XLOG_STATE_ACTIVE;
257662306a36Sopenharmony_ci	iclog->ic_offset = 0;
257762306a36Sopenharmony_ci	iclog->ic_header.h_num_logops = 0;
257862306a36Sopenharmony_ci	memset(iclog->ic_header.h_cycle_data, 0,
257962306a36Sopenharmony_ci		sizeof(iclog->ic_header.h_cycle_data));
258062306a36Sopenharmony_ci	iclog->ic_header.h_lsn = 0;
258162306a36Sopenharmony_ci	iclog->ic_header.h_tail_lsn = 0;
258262306a36Sopenharmony_ci}
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ci/*
258562306a36Sopenharmony_ci * Loop through all iclogs and mark all iclogs currently marked DIRTY as
258662306a36Sopenharmony_ci * ACTIVE after iclog I/O has completed.
258762306a36Sopenharmony_ci */
258862306a36Sopenharmony_cistatic void
258962306a36Sopenharmony_cixlog_state_activate_iclogs(
259062306a36Sopenharmony_ci	struct xlog		*log,
259162306a36Sopenharmony_ci	int			*iclogs_changed)
259262306a36Sopenharmony_ci{
259362306a36Sopenharmony_ci	struct xlog_in_core	*iclog = log->l_iclog;
259462306a36Sopenharmony_ci
259562306a36Sopenharmony_ci	do {
259662306a36Sopenharmony_ci		if (iclog->ic_state == XLOG_STATE_DIRTY)
259762306a36Sopenharmony_ci			xlog_state_activate_iclog(iclog, iclogs_changed);
259862306a36Sopenharmony_ci		/*
259962306a36Sopenharmony_ci		 * The ordering of marking iclogs ACTIVE must be maintained, so
260062306a36Sopenharmony_ci		 * an iclog doesn't become ACTIVE beyond one that is SYNCING.
260162306a36Sopenharmony_ci		 */
260262306a36Sopenharmony_ci		else if (iclog->ic_state != XLOG_STATE_ACTIVE)
260362306a36Sopenharmony_ci			break;
260462306a36Sopenharmony_ci	} while ((iclog = iclog->ic_next) != log->l_iclog);
260562306a36Sopenharmony_ci}
260662306a36Sopenharmony_ci
260762306a36Sopenharmony_cistatic int
260862306a36Sopenharmony_cixlog_covered_state(
260962306a36Sopenharmony_ci	int			prev_state,
261062306a36Sopenharmony_ci	int			iclogs_changed)
261162306a36Sopenharmony_ci{
261262306a36Sopenharmony_ci	/*
261362306a36Sopenharmony_ci	 * We go to NEED for any non-covering writes. We go to NEED2 if we just
261462306a36Sopenharmony_ci	 * wrote the first covering record (DONE). We go to IDLE if we just
261562306a36Sopenharmony_ci	 * wrote the second covering record (DONE2) and remain in IDLE until a
261662306a36Sopenharmony_ci	 * non-covering write occurs.
261762306a36Sopenharmony_ci	 */
261862306a36Sopenharmony_ci	switch (prev_state) {
261962306a36Sopenharmony_ci	case XLOG_STATE_COVER_IDLE:
262062306a36Sopenharmony_ci		if (iclogs_changed == 1)
262162306a36Sopenharmony_ci			return XLOG_STATE_COVER_IDLE;
262262306a36Sopenharmony_ci		fallthrough;
262362306a36Sopenharmony_ci	case XLOG_STATE_COVER_NEED:
262462306a36Sopenharmony_ci	case XLOG_STATE_COVER_NEED2:
262562306a36Sopenharmony_ci		break;
262662306a36Sopenharmony_ci	case XLOG_STATE_COVER_DONE:
262762306a36Sopenharmony_ci		if (iclogs_changed == 1)
262862306a36Sopenharmony_ci			return XLOG_STATE_COVER_NEED2;
262962306a36Sopenharmony_ci		break;
263062306a36Sopenharmony_ci	case XLOG_STATE_COVER_DONE2:
263162306a36Sopenharmony_ci		if (iclogs_changed == 1)
263262306a36Sopenharmony_ci			return XLOG_STATE_COVER_IDLE;
263362306a36Sopenharmony_ci		break;
263462306a36Sopenharmony_ci	default:
263562306a36Sopenharmony_ci		ASSERT(0);
263662306a36Sopenharmony_ci	}
263762306a36Sopenharmony_ci
263862306a36Sopenharmony_ci	return XLOG_STATE_COVER_NEED;
263962306a36Sopenharmony_ci}
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ciSTATIC void
264262306a36Sopenharmony_cixlog_state_clean_iclog(
264362306a36Sopenharmony_ci	struct xlog		*log,
264462306a36Sopenharmony_ci	struct xlog_in_core	*dirty_iclog)
264562306a36Sopenharmony_ci{
264662306a36Sopenharmony_ci	int			iclogs_changed = 0;
264762306a36Sopenharmony_ci
264862306a36Sopenharmony_ci	trace_xlog_iclog_clean(dirty_iclog, _RET_IP_);
264962306a36Sopenharmony_ci
265062306a36Sopenharmony_ci	dirty_iclog->ic_state = XLOG_STATE_DIRTY;
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	xlog_state_activate_iclogs(log, &iclogs_changed);
265362306a36Sopenharmony_ci	wake_up_all(&dirty_iclog->ic_force_wait);
265462306a36Sopenharmony_ci
265562306a36Sopenharmony_ci	if (iclogs_changed) {
265662306a36Sopenharmony_ci		log->l_covered_state = xlog_covered_state(log->l_covered_state,
265762306a36Sopenharmony_ci				iclogs_changed);
265862306a36Sopenharmony_ci	}
265962306a36Sopenharmony_ci}
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ciSTATIC xfs_lsn_t
266262306a36Sopenharmony_cixlog_get_lowest_lsn(
266362306a36Sopenharmony_ci	struct xlog		*log)
266462306a36Sopenharmony_ci{
266562306a36Sopenharmony_ci	struct xlog_in_core	*iclog = log->l_iclog;
266662306a36Sopenharmony_ci	xfs_lsn_t		lowest_lsn = 0, lsn;
266762306a36Sopenharmony_ci
266862306a36Sopenharmony_ci	do {
266962306a36Sopenharmony_ci		if (iclog->ic_state == XLOG_STATE_ACTIVE ||
267062306a36Sopenharmony_ci		    iclog->ic_state == XLOG_STATE_DIRTY)
267162306a36Sopenharmony_ci			continue;
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ci		lsn = be64_to_cpu(iclog->ic_header.h_lsn);
267462306a36Sopenharmony_ci		if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
267562306a36Sopenharmony_ci			lowest_lsn = lsn;
267662306a36Sopenharmony_ci	} while ((iclog = iclog->ic_next) != log->l_iclog);
267762306a36Sopenharmony_ci
267862306a36Sopenharmony_ci	return lowest_lsn;
267962306a36Sopenharmony_ci}
268062306a36Sopenharmony_ci
268162306a36Sopenharmony_ci/*
268262306a36Sopenharmony_ci * Completion of a iclog IO does not imply that a transaction has completed, as
268362306a36Sopenharmony_ci * transactions can be large enough to span many iclogs. We cannot change the
268462306a36Sopenharmony_ci * tail of the log half way through a transaction as this may be the only
268562306a36Sopenharmony_ci * transaction in the log and moving the tail to point to the middle of it
268662306a36Sopenharmony_ci * will prevent recovery from finding the start of the transaction. Hence we
268762306a36Sopenharmony_ci * should only update the last_sync_lsn if this iclog contains transaction
268862306a36Sopenharmony_ci * completion callbacks on it.
268962306a36Sopenharmony_ci *
269062306a36Sopenharmony_ci * We have to do this before we drop the icloglock to ensure we are the only one
269162306a36Sopenharmony_ci * that can update it.
269262306a36Sopenharmony_ci *
269362306a36Sopenharmony_ci * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
269462306a36Sopenharmony_ci * the reservation grant head pushing. This is due to the fact that the push
269562306a36Sopenharmony_ci * target is bound by the current last_sync_lsn value. Hence if we have a large
269662306a36Sopenharmony_ci * amount of log space bound up in this committing transaction then the
269762306a36Sopenharmony_ci * last_sync_lsn value may be the limiting factor preventing tail pushing from
269862306a36Sopenharmony_ci * freeing space in the log. Hence once we've updated the last_sync_lsn we
269962306a36Sopenharmony_ci * should push the AIL to ensure the push target (and hence the grant head) is
270062306a36Sopenharmony_ci * no longer bound by the old log head location and can move forwards and make
270162306a36Sopenharmony_ci * progress again.
270262306a36Sopenharmony_ci */
270362306a36Sopenharmony_cistatic void
270462306a36Sopenharmony_cixlog_state_set_callback(
270562306a36Sopenharmony_ci	struct xlog		*log,
270662306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
270762306a36Sopenharmony_ci	xfs_lsn_t		header_lsn)
270862306a36Sopenharmony_ci{
270962306a36Sopenharmony_ci	trace_xlog_iclog_callback(iclog, _RET_IP_);
271062306a36Sopenharmony_ci	iclog->ic_state = XLOG_STATE_CALLBACK;
271162306a36Sopenharmony_ci
271262306a36Sopenharmony_ci	ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
271362306a36Sopenharmony_ci			   header_lsn) <= 0);
271462306a36Sopenharmony_ci
271562306a36Sopenharmony_ci	if (list_empty_careful(&iclog->ic_callbacks))
271662306a36Sopenharmony_ci		return;
271762306a36Sopenharmony_ci
271862306a36Sopenharmony_ci	atomic64_set(&log->l_last_sync_lsn, header_lsn);
271962306a36Sopenharmony_ci	xlog_grant_push_ail(log, 0);
272062306a36Sopenharmony_ci}
272162306a36Sopenharmony_ci
272262306a36Sopenharmony_ci/*
272362306a36Sopenharmony_ci * Return true if we need to stop processing, false to continue to the next
272462306a36Sopenharmony_ci * iclog. The caller will need to run callbacks if the iclog is returned in the
272562306a36Sopenharmony_ci * XLOG_STATE_CALLBACK state.
272662306a36Sopenharmony_ci */
272762306a36Sopenharmony_cistatic bool
272862306a36Sopenharmony_cixlog_state_iodone_process_iclog(
272962306a36Sopenharmony_ci	struct xlog		*log,
273062306a36Sopenharmony_ci	struct xlog_in_core	*iclog)
273162306a36Sopenharmony_ci{
273262306a36Sopenharmony_ci	xfs_lsn_t		lowest_lsn;
273362306a36Sopenharmony_ci	xfs_lsn_t		header_lsn;
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_ci	switch (iclog->ic_state) {
273662306a36Sopenharmony_ci	case XLOG_STATE_ACTIVE:
273762306a36Sopenharmony_ci	case XLOG_STATE_DIRTY:
273862306a36Sopenharmony_ci		/*
273962306a36Sopenharmony_ci		 * Skip all iclogs in the ACTIVE & DIRTY states:
274062306a36Sopenharmony_ci		 */
274162306a36Sopenharmony_ci		return false;
274262306a36Sopenharmony_ci	case XLOG_STATE_DONE_SYNC:
274362306a36Sopenharmony_ci		/*
274462306a36Sopenharmony_ci		 * Now that we have an iclog that is in the DONE_SYNC state, do
274562306a36Sopenharmony_ci		 * one more check here to see if we have chased our tail around.
274662306a36Sopenharmony_ci		 * If this is not the lowest lsn iclog, then we will leave it
274762306a36Sopenharmony_ci		 * for another completion to process.
274862306a36Sopenharmony_ci		 */
274962306a36Sopenharmony_ci		header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
275062306a36Sopenharmony_ci		lowest_lsn = xlog_get_lowest_lsn(log);
275162306a36Sopenharmony_ci		if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
275262306a36Sopenharmony_ci			return false;
275362306a36Sopenharmony_ci		xlog_state_set_callback(log, iclog, header_lsn);
275462306a36Sopenharmony_ci		return false;
275562306a36Sopenharmony_ci	default:
275662306a36Sopenharmony_ci		/*
275762306a36Sopenharmony_ci		 * Can only perform callbacks in order.  Since this iclog is not
275862306a36Sopenharmony_ci		 * in the DONE_SYNC state, we skip the rest and just try to
275962306a36Sopenharmony_ci		 * clean up.
276062306a36Sopenharmony_ci		 */
276162306a36Sopenharmony_ci		return true;
276262306a36Sopenharmony_ci	}
276362306a36Sopenharmony_ci}
276462306a36Sopenharmony_ci
276562306a36Sopenharmony_ci/*
276662306a36Sopenharmony_ci * Loop over all the iclogs, running attached callbacks on them. Return true if
276762306a36Sopenharmony_ci * we ran any callbacks, indicating that we dropped the icloglock. We don't need
276862306a36Sopenharmony_ci * to handle transient shutdown state here at all because
276962306a36Sopenharmony_ci * xlog_state_shutdown_callbacks() will be run to do the necessary shutdown
277062306a36Sopenharmony_ci * cleanup of the callbacks.
277162306a36Sopenharmony_ci */
277262306a36Sopenharmony_cistatic bool
277362306a36Sopenharmony_cixlog_state_do_iclog_callbacks(
277462306a36Sopenharmony_ci	struct xlog		*log)
277562306a36Sopenharmony_ci		__releases(&log->l_icloglock)
277662306a36Sopenharmony_ci		__acquires(&log->l_icloglock)
277762306a36Sopenharmony_ci{
277862306a36Sopenharmony_ci	struct xlog_in_core	*first_iclog = log->l_iclog;
277962306a36Sopenharmony_ci	struct xlog_in_core	*iclog = first_iclog;
278062306a36Sopenharmony_ci	bool			ran_callback = false;
278162306a36Sopenharmony_ci
278262306a36Sopenharmony_ci	do {
278362306a36Sopenharmony_ci		LIST_HEAD(cb_list);
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci		if (xlog_state_iodone_process_iclog(log, iclog))
278662306a36Sopenharmony_ci			break;
278762306a36Sopenharmony_ci		if (iclog->ic_state != XLOG_STATE_CALLBACK) {
278862306a36Sopenharmony_ci			iclog = iclog->ic_next;
278962306a36Sopenharmony_ci			continue;
279062306a36Sopenharmony_ci		}
279162306a36Sopenharmony_ci		list_splice_init(&iclog->ic_callbacks, &cb_list);
279262306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
279362306a36Sopenharmony_ci
279462306a36Sopenharmony_ci		trace_xlog_iclog_callbacks_start(iclog, _RET_IP_);
279562306a36Sopenharmony_ci		xlog_cil_process_committed(&cb_list);
279662306a36Sopenharmony_ci		trace_xlog_iclog_callbacks_done(iclog, _RET_IP_);
279762306a36Sopenharmony_ci		ran_callback = true;
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci		spin_lock(&log->l_icloglock);
280062306a36Sopenharmony_ci		xlog_state_clean_iclog(log, iclog);
280162306a36Sopenharmony_ci		iclog = iclog->ic_next;
280262306a36Sopenharmony_ci	} while (iclog != first_iclog);
280362306a36Sopenharmony_ci
280462306a36Sopenharmony_ci	return ran_callback;
280562306a36Sopenharmony_ci}
280662306a36Sopenharmony_ci
280762306a36Sopenharmony_ci
280862306a36Sopenharmony_ci/*
280962306a36Sopenharmony_ci * Loop running iclog completion callbacks until there are no more iclogs in a
281062306a36Sopenharmony_ci * state that can run callbacks.
281162306a36Sopenharmony_ci */
281262306a36Sopenharmony_ciSTATIC void
281362306a36Sopenharmony_cixlog_state_do_callback(
281462306a36Sopenharmony_ci	struct xlog		*log)
281562306a36Sopenharmony_ci{
281662306a36Sopenharmony_ci	int			flushcnt = 0;
281762306a36Sopenharmony_ci	int			repeats = 0;
281862306a36Sopenharmony_ci
281962306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
282062306a36Sopenharmony_ci	while (xlog_state_do_iclog_callbacks(log)) {
282162306a36Sopenharmony_ci		if (xlog_is_shutdown(log))
282262306a36Sopenharmony_ci			break;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci		if (++repeats > 5000) {
282562306a36Sopenharmony_ci			flushcnt += repeats;
282662306a36Sopenharmony_ci			repeats = 0;
282762306a36Sopenharmony_ci			xfs_warn(log->l_mp,
282862306a36Sopenharmony_ci				"%s: possible infinite loop (%d iterations)",
282962306a36Sopenharmony_ci				__func__, flushcnt);
283062306a36Sopenharmony_ci		}
283162306a36Sopenharmony_ci	}
283262306a36Sopenharmony_ci
283362306a36Sopenharmony_ci	if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE)
283462306a36Sopenharmony_ci		wake_up_all(&log->l_flush_wait);
283562306a36Sopenharmony_ci
283662306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
283762306a36Sopenharmony_ci}
283862306a36Sopenharmony_ci
283962306a36Sopenharmony_ci
284062306a36Sopenharmony_ci/*
284162306a36Sopenharmony_ci * Finish transitioning this iclog to the dirty state.
284262306a36Sopenharmony_ci *
284362306a36Sopenharmony_ci * Callbacks could take time, so they are done outside the scope of the
284462306a36Sopenharmony_ci * global state machine log lock.
284562306a36Sopenharmony_ci */
284662306a36Sopenharmony_ciSTATIC void
284762306a36Sopenharmony_cixlog_state_done_syncing(
284862306a36Sopenharmony_ci	struct xlog_in_core	*iclog)
284962306a36Sopenharmony_ci{
285062306a36Sopenharmony_ci	struct xlog		*log = iclog->ic_log;
285162306a36Sopenharmony_ci
285262306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
285362306a36Sopenharmony_ci	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
285462306a36Sopenharmony_ci	trace_xlog_iclog_sync_done(iclog, _RET_IP_);
285562306a36Sopenharmony_ci
285662306a36Sopenharmony_ci	/*
285762306a36Sopenharmony_ci	 * If we got an error, either on the first buffer, or in the case of
285862306a36Sopenharmony_ci	 * split log writes, on the second, we shut down the file system and
285962306a36Sopenharmony_ci	 * no iclogs should ever be attempted to be written to disk again.
286062306a36Sopenharmony_ci	 */
286162306a36Sopenharmony_ci	if (!xlog_is_shutdown(log)) {
286262306a36Sopenharmony_ci		ASSERT(iclog->ic_state == XLOG_STATE_SYNCING);
286362306a36Sopenharmony_ci		iclog->ic_state = XLOG_STATE_DONE_SYNC;
286462306a36Sopenharmony_ci	}
286562306a36Sopenharmony_ci
286662306a36Sopenharmony_ci	/*
286762306a36Sopenharmony_ci	 * Someone could be sleeping prior to writing out the next
286862306a36Sopenharmony_ci	 * iclog buffer, we wake them all, one will get to do the
286962306a36Sopenharmony_ci	 * I/O, the others get to wait for the result.
287062306a36Sopenharmony_ci	 */
287162306a36Sopenharmony_ci	wake_up_all(&iclog->ic_write_wait);
287262306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
287362306a36Sopenharmony_ci	xlog_state_do_callback(log);
287462306a36Sopenharmony_ci}
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci/*
287762306a36Sopenharmony_ci * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
287862306a36Sopenharmony_ci * sleep.  We wait on the flush queue on the head iclog as that should be
287962306a36Sopenharmony_ci * the first iclog to complete flushing. Hence if all iclogs are syncing,
288062306a36Sopenharmony_ci * we will wait here and all new writes will sleep until a sync completes.
288162306a36Sopenharmony_ci *
288262306a36Sopenharmony_ci * The in-core logs are used in a circular fashion. They are not used
288362306a36Sopenharmony_ci * out-of-order even when an iclog past the head is free.
288462306a36Sopenharmony_ci *
288562306a36Sopenharmony_ci * return:
288662306a36Sopenharmony_ci *	* log_offset where xlog_write() can start writing into the in-core
288762306a36Sopenharmony_ci *		log's data space.
288862306a36Sopenharmony_ci *	* in-core log pointer to which xlog_write() should write.
288962306a36Sopenharmony_ci *	* boolean indicating this is a continued write to an in-core log.
289062306a36Sopenharmony_ci *		If this is the last write, then the in-core log's offset field
289162306a36Sopenharmony_ci *		needs to be incremented, depending on the amount of data which
289262306a36Sopenharmony_ci *		is copied.
289362306a36Sopenharmony_ci */
289462306a36Sopenharmony_ciSTATIC int
289562306a36Sopenharmony_cixlog_state_get_iclog_space(
289662306a36Sopenharmony_ci	struct xlog		*log,
289762306a36Sopenharmony_ci	int			len,
289862306a36Sopenharmony_ci	struct xlog_in_core	**iclogp,
289962306a36Sopenharmony_ci	struct xlog_ticket	*ticket,
290062306a36Sopenharmony_ci	int			*logoffsetp)
290162306a36Sopenharmony_ci{
290262306a36Sopenharmony_ci	int		  log_offset;
290362306a36Sopenharmony_ci	xlog_rec_header_t *head;
290462306a36Sopenharmony_ci	xlog_in_core_t	  *iclog;
290562306a36Sopenharmony_ci
290662306a36Sopenharmony_cirestart:
290762306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
290862306a36Sopenharmony_ci	if (xlog_is_shutdown(log)) {
290962306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
291062306a36Sopenharmony_ci		return -EIO;
291162306a36Sopenharmony_ci	}
291262306a36Sopenharmony_ci
291362306a36Sopenharmony_ci	iclog = log->l_iclog;
291462306a36Sopenharmony_ci	if (iclog->ic_state != XLOG_STATE_ACTIVE) {
291562306a36Sopenharmony_ci		XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
291662306a36Sopenharmony_ci
291762306a36Sopenharmony_ci		/* Wait for log writes to have flushed */
291862306a36Sopenharmony_ci		xlog_wait(&log->l_flush_wait, &log->l_icloglock);
291962306a36Sopenharmony_ci		goto restart;
292062306a36Sopenharmony_ci	}
292162306a36Sopenharmony_ci
292262306a36Sopenharmony_ci	head = &iclog->ic_header;
292362306a36Sopenharmony_ci
292462306a36Sopenharmony_ci	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
292562306a36Sopenharmony_ci	log_offset = iclog->ic_offset;
292662306a36Sopenharmony_ci
292762306a36Sopenharmony_ci	trace_xlog_iclog_get_space(iclog, _RET_IP_);
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci	/* On the 1st write to an iclog, figure out lsn.  This works
293062306a36Sopenharmony_ci	 * if iclogs marked XLOG_STATE_WANT_SYNC always write out what they are
293162306a36Sopenharmony_ci	 * committing to.  If the offset is set, that's how many blocks
293262306a36Sopenharmony_ci	 * must be written.
293362306a36Sopenharmony_ci	 */
293462306a36Sopenharmony_ci	if (log_offset == 0) {
293562306a36Sopenharmony_ci		ticket->t_curr_res -= log->l_iclog_hsize;
293662306a36Sopenharmony_ci		head->h_cycle = cpu_to_be32(log->l_curr_cycle);
293762306a36Sopenharmony_ci		head->h_lsn = cpu_to_be64(
293862306a36Sopenharmony_ci			xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block));
293962306a36Sopenharmony_ci		ASSERT(log->l_curr_block >= 0);
294062306a36Sopenharmony_ci	}
294162306a36Sopenharmony_ci
294262306a36Sopenharmony_ci	/* If there is enough room to write everything, then do it.  Otherwise,
294362306a36Sopenharmony_ci	 * claim the rest of the region and make sure the XLOG_STATE_WANT_SYNC
294462306a36Sopenharmony_ci	 * bit is on, so this will get flushed out.  Don't update ic_offset
294562306a36Sopenharmony_ci	 * until you know exactly how many bytes get copied.  Therefore, wait
294662306a36Sopenharmony_ci	 * until later to update ic_offset.
294762306a36Sopenharmony_ci	 *
294862306a36Sopenharmony_ci	 * xlog_write() algorithm assumes that at least 2 xlog_op_header_t's
294962306a36Sopenharmony_ci	 * can fit into remaining data section.
295062306a36Sopenharmony_ci	 */
295162306a36Sopenharmony_ci	if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
295262306a36Sopenharmony_ci		int		error = 0;
295362306a36Sopenharmony_ci
295462306a36Sopenharmony_ci		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
295562306a36Sopenharmony_ci
295662306a36Sopenharmony_ci		/*
295762306a36Sopenharmony_ci		 * If we are the only one writing to this iclog, sync it to
295862306a36Sopenharmony_ci		 * disk.  We need to do an atomic compare and decrement here to
295962306a36Sopenharmony_ci		 * avoid racing with concurrent atomic_dec_and_lock() calls in
296062306a36Sopenharmony_ci		 * xlog_state_release_iclog() when there is more than one
296162306a36Sopenharmony_ci		 * reference to the iclog.
296262306a36Sopenharmony_ci		 */
296362306a36Sopenharmony_ci		if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
296462306a36Sopenharmony_ci			error = xlog_state_release_iclog(log, iclog, ticket);
296562306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
296662306a36Sopenharmony_ci		if (error)
296762306a36Sopenharmony_ci			return error;
296862306a36Sopenharmony_ci		goto restart;
296962306a36Sopenharmony_ci	}
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci	/* Do we have enough room to write the full amount in the remainder
297262306a36Sopenharmony_ci	 * of this iclog?  Or must we continue a write on the next iclog and
297362306a36Sopenharmony_ci	 * mark this iclog as completely taken?  In the case where we switch
297462306a36Sopenharmony_ci	 * iclogs (to mark it taken), this particular iclog will release/sync
297562306a36Sopenharmony_ci	 * to disk in xlog_write().
297662306a36Sopenharmony_ci	 */
297762306a36Sopenharmony_ci	if (len <= iclog->ic_size - iclog->ic_offset)
297862306a36Sopenharmony_ci		iclog->ic_offset += len;
297962306a36Sopenharmony_ci	else
298062306a36Sopenharmony_ci		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
298162306a36Sopenharmony_ci	*iclogp = iclog;
298262306a36Sopenharmony_ci
298362306a36Sopenharmony_ci	ASSERT(iclog->ic_offset <= iclog->ic_size);
298462306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
298562306a36Sopenharmony_ci
298662306a36Sopenharmony_ci	*logoffsetp = log_offset;
298762306a36Sopenharmony_ci	return 0;
298862306a36Sopenharmony_ci}
298962306a36Sopenharmony_ci
299062306a36Sopenharmony_ci/*
299162306a36Sopenharmony_ci * The first cnt-1 times a ticket goes through here we don't need to move the
299262306a36Sopenharmony_ci * grant write head because the permanent reservation has reserved cnt times the
299362306a36Sopenharmony_ci * unit amount.  Release part of current permanent unit reservation and reset
299462306a36Sopenharmony_ci * current reservation to be one units worth.  Also move grant reservation head
299562306a36Sopenharmony_ci * forward.
299662306a36Sopenharmony_ci */
299762306a36Sopenharmony_civoid
299862306a36Sopenharmony_cixfs_log_ticket_regrant(
299962306a36Sopenharmony_ci	struct xlog		*log,
300062306a36Sopenharmony_ci	struct xlog_ticket	*ticket)
300162306a36Sopenharmony_ci{
300262306a36Sopenharmony_ci	trace_xfs_log_ticket_regrant(log, ticket);
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_ci	if (ticket->t_cnt > 0)
300562306a36Sopenharmony_ci		ticket->t_cnt--;
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci	xlog_grant_sub_space(log, &log->l_reserve_head.grant,
300862306a36Sopenharmony_ci					ticket->t_curr_res);
300962306a36Sopenharmony_ci	xlog_grant_sub_space(log, &log->l_write_head.grant,
301062306a36Sopenharmony_ci					ticket->t_curr_res);
301162306a36Sopenharmony_ci	ticket->t_curr_res = ticket->t_unit_res;
301262306a36Sopenharmony_ci
301362306a36Sopenharmony_ci	trace_xfs_log_ticket_regrant_sub(log, ticket);
301462306a36Sopenharmony_ci
301562306a36Sopenharmony_ci	/* just return if we still have some of the pre-reserved space */
301662306a36Sopenharmony_ci	if (!ticket->t_cnt) {
301762306a36Sopenharmony_ci		xlog_grant_add_space(log, &log->l_reserve_head.grant,
301862306a36Sopenharmony_ci				     ticket->t_unit_res);
301962306a36Sopenharmony_ci		trace_xfs_log_ticket_regrant_exit(log, ticket);
302062306a36Sopenharmony_ci
302162306a36Sopenharmony_ci		ticket->t_curr_res = ticket->t_unit_res;
302262306a36Sopenharmony_ci	}
302362306a36Sopenharmony_ci
302462306a36Sopenharmony_ci	xfs_log_ticket_put(ticket);
302562306a36Sopenharmony_ci}
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci/*
302862306a36Sopenharmony_ci * Give back the space left from a reservation.
302962306a36Sopenharmony_ci *
303062306a36Sopenharmony_ci * All the information we need to make a correct determination of space left
303162306a36Sopenharmony_ci * is present.  For non-permanent reservations, things are quite easy.  The
303262306a36Sopenharmony_ci * count should have been decremented to zero.  We only need to deal with the
303362306a36Sopenharmony_ci * space remaining in the current reservation part of the ticket.  If the
303462306a36Sopenharmony_ci * ticket contains a permanent reservation, there may be left over space which
303562306a36Sopenharmony_ci * needs to be released.  A count of N means that N-1 refills of the current
303662306a36Sopenharmony_ci * reservation can be done before we need to ask for more space.  The first
303762306a36Sopenharmony_ci * one goes to fill up the first current reservation.  Once we run out of
303862306a36Sopenharmony_ci * space, the count will stay at zero and the only space remaining will be
303962306a36Sopenharmony_ci * in the current reservation field.
304062306a36Sopenharmony_ci */
304162306a36Sopenharmony_civoid
304262306a36Sopenharmony_cixfs_log_ticket_ungrant(
304362306a36Sopenharmony_ci	struct xlog		*log,
304462306a36Sopenharmony_ci	struct xlog_ticket	*ticket)
304562306a36Sopenharmony_ci{
304662306a36Sopenharmony_ci	int			bytes;
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_ci	trace_xfs_log_ticket_ungrant(log, ticket);
304962306a36Sopenharmony_ci
305062306a36Sopenharmony_ci	if (ticket->t_cnt > 0)
305162306a36Sopenharmony_ci		ticket->t_cnt--;
305262306a36Sopenharmony_ci
305362306a36Sopenharmony_ci	trace_xfs_log_ticket_ungrant_sub(log, ticket);
305462306a36Sopenharmony_ci
305562306a36Sopenharmony_ci	/*
305662306a36Sopenharmony_ci	 * If this is a permanent reservation ticket, we may be able to free
305762306a36Sopenharmony_ci	 * up more space based on the remaining count.
305862306a36Sopenharmony_ci	 */
305962306a36Sopenharmony_ci	bytes = ticket->t_curr_res;
306062306a36Sopenharmony_ci	if (ticket->t_cnt > 0) {
306162306a36Sopenharmony_ci		ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
306262306a36Sopenharmony_ci		bytes += ticket->t_unit_res*ticket->t_cnt;
306362306a36Sopenharmony_ci	}
306462306a36Sopenharmony_ci
306562306a36Sopenharmony_ci	xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
306662306a36Sopenharmony_ci	xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
306762306a36Sopenharmony_ci
306862306a36Sopenharmony_ci	trace_xfs_log_ticket_ungrant_exit(log, ticket);
306962306a36Sopenharmony_ci
307062306a36Sopenharmony_ci	xfs_log_space_wake(log->l_mp);
307162306a36Sopenharmony_ci	xfs_log_ticket_put(ticket);
307262306a36Sopenharmony_ci}
307362306a36Sopenharmony_ci
307462306a36Sopenharmony_ci/*
307562306a36Sopenharmony_ci * This routine will mark the current iclog in the ring as WANT_SYNC and move
307662306a36Sopenharmony_ci * the current iclog pointer to the next iclog in the ring.
307762306a36Sopenharmony_ci */
307862306a36Sopenharmony_civoid
307962306a36Sopenharmony_cixlog_state_switch_iclogs(
308062306a36Sopenharmony_ci	struct xlog		*log,
308162306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
308262306a36Sopenharmony_ci	int			eventual_size)
308362306a36Sopenharmony_ci{
308462306a36Sopenharmony_ci	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
308562306a36Sopenharmony_ci	assert_spin_locked(&log->l_icloglock);
308662306a36Sopenharmony_ci	trace_xlog_iclog_switch(iclog, _RET_IP_);
308762306a36Sopenharmony_ci
308862306a36Sopenharmony_ci	if (!eventual_size)
308962306a36Sopenharmony_ci		eventual_size = iclog->ic_offset;
309062306a36Sopenharmony_ci	iclog->ic_state = XLOG_STATE_WANT_SYNC;
309162306a36Sopenharmony_ci	iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block);
309262306a36Sopenharmony_ci	log->l_prev_block = log->l_curr_block;
309362306a36Sopenharmony_ci	log->l_prev_cycle = log->l_curr_cycle;
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_ci	/* roll log?: ic_offset changed later */
309662306a36Sopenharmony_ci	log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
309762306a36Sopenharmony_ci
309862306a36Sopenharmony_ci	/* Round up to next log-sunit */
309962306a36Sopenharmony_ci	if (log->l_iclog_roundoff > BBSIZE) {
310062306a36Sopenharmony_ci		uint32_t sunit_bb = BTOBB(log->l_iclog_roundoff);
310162306a36Sopenharmony_ci		log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
310262306a36Sopenharmony_ci	}
310362306a36Sopenharmony_ci
310462306a36Sopenharmony_ci	if (log->l_curr_block >= log->l_logBBsize) {
310562306a36Sopenharmony_ci		/*
310662306a36Sopenharmony_ci		 * Rewind the current block before the cycle is bumped to make
310762306a36Sopenharmony_ci		 * sure that the combined LSN never transiently moves forward
310862306a36Sopenharmony_ci		 * when the log wraps to the next cycle. This is to support the
310962306a36Sopenharmony_ci		 * unlocked sample of these fields from xlog_valid_lsn(). Most
311062306a36Sopenharmony_ci		 * other cases should acquire l_icloglock.
311162306a36Sopenharmony_ci		 */
311262306a36Sopenharmony_ci		log->l_curr_block -= log->l_logBBsize;
311362306a36Sopenharmony_ci		ASSERT(log->l_curr_block >= 0);
311462306a36Sopenharmony_ci		smp_wmb();
311562306a36Sopenharmony_ci		log->l_curr_cycle++;
311662306a36Sopenharmony_ci		if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
311762306a36Sopenharmony_ci			log->l_curr_cycle++;
311862306a36Sopenharmony_ci	}
311962306a36Sopenharmony_ci	ASSERT(iclog == log->l_iclog);
312062306a36Sopenharmony_ci	log->l_iclog = iclog->ic_next;
312162306a36Sopenharmony_ci}
312262306a36Sopenharmony_ci
312362306a36Sopenharmony_ci/*
312462306a36Sopenharmony_ci * Force the iclog to disk and check if the iclog has been completed before
312562306a36Sopenharmony_ci * xlog_force_iclog() returns. This can happen on synchronous (e.g.
312662306a36Sopenharmony_ci * pmem) or fast async storage because we drop the icloglock to issue the IO.
312762306a36Sopenharmony_ci * If completion has already occurred, tell the caller so that it can avoid an
312862306a36Sopenharmony_ci * unnecessary wait on the iclog.
312962306a36Sopenharmony_ci */
313062306a36Sopenharmony_cistatic int
313162306a36Sopenharmony_cixlog_force_and_check_iclog(
313262306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
313362306a36Sopenharmony_ci	bool			*completed)
313462306a36Sopenharmony_ci{
313562306a36Sopenharmony_ci	xfs_lsn_t		lsn = be64_to_cpu(iclog->ic_header.h_lsn);
313662306a36Sopenharmony_ci	int			error;
313762306a36Sopenharmony_ci
313862306a36Sopenharmony_ci	*completed = false;
313962306a36Sopenharmony_ci	error = xlog_force_iclog(iclog);
314062306a36Sopenharmony_ci	if (error)
314162306a36Sopenharmony_ci		return error;
314262306a36Sopenharmony_ci
314362306a36Sopenharmony_ci	/*
314462306a36Sopenharmony_ci	 * If the iclog has already been completed and reused the header LSN
314562306a36Sopenharmony_ci	 * will have been rewritten by completion
314662306a36Sopenharmony_ci	 */
314762306a36Sopenharmony_ci	if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
314862306a36Sopenharmony_ci		*completed = true;
314962306a36Sopenharmony_ci	return 0;
315062306a36Sopenharmony_ci}
315162306a36Sopenharmony_ci
315262306a36Sopenharmony_ci/*
315362306a36Sopenharmony_ci * Write out all data in the in-core log as of this exact moment in time.
315462306a36Sopenharmony_ci *
315562306a36Sopenharmony_ci * Data may be written to the in-core log during this call.  However,
315662306a36Sopenharmony_ci * we don't guarantee this data will be written out.  A change from past
315762306a36Sopenharmony_ci * implementation means this routine will *not* write out zero length LRs.
315862306a36Sopenharmony_ci *
315962306a36Sopenharmony_ci * Basically, we try and perform an intelligent scan of the in-core logs.
316062306a36Sopenharmony_ci * If we determine there is no flushable data, we just return.  There is no
316162306a36Sopenharmony_ci * flushable data if:
316262306a36Sopenharmony_ci *
316362306a36Sopenharmony_ci *	1. the current iclog is active and has no data; the previous iclog
316462306a36Sopenharmony_ci *		is in the active or dirty state.
316562306a36Sopenharmony_ci *	2. the current iclog is drity, and the previous iclog is in the
316662306a36Sopenharmony_ci *		active or dirty state.
316762306a36Sopenharmony_ci *
316862306a36Sopenharmony_ci * We may sleep if:
316962306a36Sopenharmony_ci *
317062306a36Sopenharmony_ci *	1. the current iclog is not in the active nor dirty state.
317162306a36Sopenharmony_ci *	2. the current iclog dirty, and the previous iclog is not in the
317262306a36Sopenharmony_ci *		active nor dirty state.
317362306a36Sopenharmony_ci *	3. the current iclog is active, and there is another thread writing
317462306a36Sopenharmony_ci *		to this particular iclog.
317562306a36Sopenharmony_ci *	4. a) the current iclog is active and has no other writers
317662306a36Sopenharmony_ci *	   b) when we return from flushing out this iclog, it is still
317762306a36Sopenharmony_ci *		not in the active nor dirty state.
317862306a36Sopenharmony_ci */
317962306a36Sopenharmony_ciint
318062306a36Sopenharmony_cixfs_log_force(
318162306a36Sopenharmony_ci	struct xfs_mount	*mp,
318262306a36Sopenharmony_ci	uint			flags)
318362306a36Sopenharmony_ci{
318462306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
318562306a36Sopenharmony_ci	struct xlog_in_core	*iclog;
318662306a36Sopenharmony_ci
318762306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_log_force);
318862306a36Sopenharmony_ci	trace_xfs_log_force(mp, 0, _RET_IP_);
318962306a36Sopenharmony_ci
319062306a36Sopenharmony_ci	xlog_cil_force(log);
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
319362306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
319462306a36Sopenharmony_ci		goto out_error;
319562306a36Sopenharmony_ci
319662306a36Sopenharmony_ci	iclog = log->l_iclog;
319762306a36Sopenharmony_ci	trace_xlog_iclog_force(iclog, _RET_IP_);
319862306a36Sopenharmony_ci
319962306a36Sopenharmony_ci	if (iclog->ic_state == XLOG_STATE_DIRTY ||
320062306a36Sopenharmony_ci	    (iclog->ic_state == XLOG_STATE_ACTIVE &&
320162306a36Sopenharmony_ci	     atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
320262306a36Sopenharmony_ci		/*
320362306a36Sopenharmony_ci		 * If the head is dirty or (active and empty), then we need to
320462306a36Sopenharmony_ci		 * look at the previous iclog.
320562306a36Sopenharmony_ci		 *
320662306a36Sopenharmony_ci		 * If the previous iclog is active or dirty we are done.  There
320762306a36Sopenharmony_ci		 * is nothing to sync out. Otherwise, we attach ourselves to the
320862306a36Sopenharmony_ci		 * previous iclog and go to sleep.
320962306a36Sopenharmony_ci		 */
321062306a36Sopenharmony_ci		iclog = iclog->ic_prev;
321162306a36Sopenharmony_ci	} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
321262306a36Sopenharmony_ci		if (atomic_read(&iclog->ic_refcnt) == 0) {
321362306a36Sopenharmony_ci			/* We have exclusive access to this iclog. */
321462306a36Sopenharmony_ci			bool	completed;
321562306a36Sopenharmony_ci
321662306a36Sopenharmony_ci			if (xlog_force_and_check_iclog(iclog, &completed))
321762306a36Sopenharmony_ci				goto out_error;
321862306a36Sopenharmony_ci
321962306a36Sopenharmony_ci			if (completed)
322062306a36Sopenharmony_ci				goto out_unlock;
322162306a36Sopenharmony_ci		} else {
322262306a36Sopenharmony_ci			/*
322362306a36Sopenharmony_ci			 * Someone else is still writing to this iclog, so we
322462306a36Sopenharmony_ci			 * need to ensure that when they release the iclog it
322562306a36Sopenharmony_ci			 * gets synced immediately as we may be waiting on it.
322662306a36Sopenharmony_ci			 */
322762306a36Sopenharmony_ci			xlog_state_switch_iclogs(log, iclog, 0);
322862306a36Sopenharmony_ci		}
322962306a36Sopenharmony_ci	}
323062306a36Sopenharmony_ci
323162306a36Sopenharmony_ci	/*
323262306a36Sopenharmony_ci	 * The iclog we are about to wait on may contain the checkpoint pushed
323362306a36Sopenharmony_ci	 * by the above xlog_cil_force() call, but it may not have been pushed
323462306a36Sopenharmony_ci	 * to disk yet. Like the ACTIVE case above, we need to make sure caches
323562306a36Sopenharmony_ci	 * are flushed when this iclog is written.
323662306a36Sopenharmony_ci	 */
323762306a36Sopenharmony_ci	if (iclog->ic_state == XLOG_STATE_WANT_SYNC)
323862306a36Sopenharmony_ci		iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
323962306a36Sopenharmony_ci
324062306a36Sopenharmony_ci	if (flags & XFS_LOG_SYNC)
324162306a36Sopenharmony_ci		return xlog_wait_on_iclog(iclog);
324262306a36Sopenharmony_ciout_unlock:
324362306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
324462306a36Sopenharmony_ci	return 0;
324562306a36Sopenharmony_ciout_error:
324662306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
324762306a36Sopenharmony_ci	return -EIO;
324862306a36Sopenharmony_ci}
324962306a36Sopenharmony_ci
325062306a36Sopenharmony_ci/*
325162306a36Sopenharmony_ci * Force the log to a specific LSN.
325262306a36Sopenharmony_ci *
325362306a36Sopenharmony_ci * If an iclog with that lsn can be found:
325462306a36Sopenharmony_ci *	If it is in the DIRTY state, just return.
325562306a36Sopenharmony_ci *	If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
325662306a36Sopenharmony_ci *		state and go to sleep or return.
325762306a36Sopenharmony_ci *	If it is in any other state, go to sleep or return.
325862306a36Sopenharmony_ci *
325962306a36Sopenharmony_ci * Synchronous forces are implemented with a wait queue.  All callers trying
326062306a36Sopenharmony_ci * to force a given lsn to disk must wait on the queue attached to the
326162306a36Sopenharmony_ci * specific in-core log.  When given in-core log finally completes its write
326262306a36Sopenharmony_ci * to disk, that thread will wake up all threads waiting on the queue.
326362306a36Sopenharmony_ci */
326462306a36Sopenharmony_cistatic int
326562306a36Sopenharmony_cixlog_force_lsn(
326662306a36Sopenharmony_ci	struct xlog		*log,
326762306a36Sopenharmony_ci	xfs_lsn_t		lsn,
326862306a36Sopenharmony_ci	uint			flags,
326962306a36Sopenharmony_ci	int			*log_flushed,
327062306a36Sopenharmony_ci	bool			already_slept)
327162306a36Sopenharmony_ci{
327262306a36Sopenharmony_ci	struct xlog_in_core	*iclog;
327362306a36Sopenharmony_ci	bool			completed;
327462306a36Sopenharmony_ci
327562306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
327662306a36Sopenharmony_ci	if (xlog_is_shutdown(log))
327762306a36Sopenharmony_ci		goto out_error;
327862306a36Sopenharmony_ci
327962306a36Sopenharmony_ci	iclog = log->l_iclog;
328062306a36Sopenharmony_ci	while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
328162306a36Sopenharmony_ci		trace_xlog_iclog_force_lsn(iclog, _RET_IP_);
328262306a36Sopenharmony_ci		iclog = iclog->ic_next;
328362306a36Sopenharmony_ci		if (iclog == log->l_iclog)
328462306a36Sopenharmony_ci			goto out_unlock;
328562306a36Sopenharmony_ci	}
328662306a36Sopenharmony_ci
328762306a36Sopenharmony_ci	switch (iclog->ic_state) {
328862306a36Sopenharmony_ci	case XLOG_STATE_ACTIVE:
328962306a36Sopenharmony_ci		/*
329062306a36Sopenharmony_ci		 * We sleep here if we haven't already slept (e.g. this is the
329162306a36Sopenharmony_ci		 * first time we've looked at the correct iclog buf) and the
329262306a36Sopenharmony_ci		 * buffer before us is going to be sync'ed.  The reason for this
329362306a36Sopenharmony_ci		 * is that if we are doing sync transactions here, by waiting
329462306a36Sopenharmony_ci		 * for the previous I/O to complete, we can allow a few more
329562306a36Sopenharmony_ci		 * transactions into this iclog before we close it down.
329662306a36Sopenharmony_ci		 *
329762306a36Sopenharmony_ci		 * Otherwise, we mark the buffer WANT_SYNC, and bump up the
329862306a36Sopenharmony_ci		 * refcnt so we can release the log (which drops the ref count).
329962306a36Sopenharmony_ci		 * The state switch keeps new transaction commits from using
330062306a36Sopenharmony_ci		 * this buffer.  When the current commits finish writing into
330162306a36Sopenharmony_ci		 * the buffer, the refcount will drop to zero and the buffer
330262306a36Sopenharmony_ci		 * will go out then.
330362306a36Sopenharmony_ci		 */
330462306a36Sopenharmony_ci		if (!already_slept &&
330562306a36Sopenharmony_ci		    (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
330662306a36Sopenharmony_ci		     iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
330762306a36Sopenharmony_ci			xlog_wait(&iclog->ic_prev->ic_write_wait,
330862306a36Sopenharmony_ci					&log->l_icloglock);
330962306a36Sopenharmony_ci			return -EAGAIN;
331062306a36Sopenharmony_ci		}
331162306a36Sopenharmony_ci		if (xlog_force_and_check_iclog(iclog, &completed))
331262306a36Sopenharmony_ci			goto out_error;
331362306a36Sopenharmony_ci		if (log_flushed)
331462306a36Sopenharmony_ci			*log_flushed = 1;
331562306a36Sopenharmony_ci		if (completed)
331662306a36Sopenharmony_ci			goto out_unlock;
331762306a36Sopenharmony_ci		break;
331862306a36Sopenharmony_ci	case XLOG_STATE_WANT_SYNC:
331962306a36Sopenharmony_ci		/*
332062306a36Sopenharmony_ci		 * This iclog may contain the checkpoint pushed by the
332162306a36Sopenharmony_ci		 * xlog_cil_force_seq() call, but there are other writers still
332262306a36Sopenharmony_ci		 * accessing it so it hasn't been pushed to disk yet. Like the
332362306a36Sopenharmony_ci		 * ACTIVE case above, we need to make sure caches are flushed
332462306a36Sopenharmony_ci		 * when this iclog is written.
332562306a36Sopenharmony_ci		 */
332662306a36Sopenharmony_ci		iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
332762306a36Sopenharmony_ci		break;
332862306a36Sopenharmony_ci	default:
332962306a36Sopenharmony_ci		/*
333062306a36Sopenharmony_ci		 * The entire checkpoint was written by the CIL force and is on
333162306a36Sopenharmony_ci		 * its way to disk already. It will be stable when it
333262306a36Sopenharmony_ci		 * completes, so we don't need to manipulate caches here at all.
333362306a36Sopenharmony_ci		 * We just need to wait for completion if necessary.
333462306a36Sopenharmony_ci		 */
333562306a36Sopenharmony_ci		break;
333662306a36Sopenharmony_ci	}
333762306a36Sopenharmony_ci
333862306a36Sopenharmony_ci	if (flags & XFS_LOG_SYNC)
333962306a36Sopenharmony_ci		return xlog_wait_on_iclog(iclog);
334062306a36Sopenharmony_ciout_unlock:
334162306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
334262306a36Sopenharmony_ci	return 0;
334362306a36Sopenharmony_ciout_error:
334462306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
334562306a36Sopenharmony_ci	return -EIO;
334662306a36Sopenharmony_ci}
334762306a36Sopenharmony_ci
334862306a36Sopenharmony_ci/*
334962306a36Sopenharmony_ci * Force the log to a specific checkpoint sequence.
335062306a36Sopenharmony_ci *
335162306a36Sopenharmony_ci * First force the CIL so that all the required changes have been flushed to the
335262306a36Sopenharmony_ci * iclogs. If the CIL force completed it will return a commit LSN that indicates
335362306a36Sopenharmony_ci * the iclog that needs to be flushed to stable storage. If the caller needs
335462306a36Sopenharmony_ci * a synchronous log force, we will wait on the iclog with the LSN returned by
335562306a36Sopenharmony_ci * xlog_cil_force_seq() to be completed.
335662306a36Sopenharmony_ci */
335762306a36Sopenharmony_ciint
335862306a36Sopenharmony_cixfs_log_force_seq(
335962306a36Sopenharmony_ci	struct xfs_mount	*mp,
336062306a36Sopenharmony_ci	xfs_csn_t		seq,
336162306a36Sopenharmony_ci	uint			flags,
336262306a36Sopenharmony_ci	int			*log_flushed)
336362306a36Sopenharmony_ci{
336462306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
336562306a36Sopenharmony_ci	xfs_lsn_t		lsn;
336662306a36Sopenharmony_ci	int			ret;
336762306a36Sopenharmony_ci	ASSERT(seq != 0);
336862306a36Sopenharmony_ci
336962306a36Sopenharmony_ci	XFS_STATS_INC(mp, xs_log_force);
337062306a36Sopenharmony_ci	trace_xfs_log_force(mp, seq, _RET_IP_);
337162306a36Sopenharmony_ci
337262306a36Sopenharmony_ci	lsn = xlog_cil_force_seq(log, seq);
337362306a36Sopenharmony_ci	if (lsn == NULLCOMMITLSN)
337462306a36Sopenharmony_ci		return 0;
337562306a36Sopenharmony_ci
337662306a36Sopenharmony_ci	ret = xlog_force_lsn(log, lsn, flags, log_flushed, false);
337762306a36Sopenharmony_ci	if (ret == -EAGAIN) {
337862306a36Sopenharmony_ci		XFS_STATS_INC(mp, xs_log_force_sleep);
337962306a36Sopenharmony_ci		ret = xlog_force_lsn(log, lsn, flags, log_flushed, true);
338062306a36Sopenharmony_ci	}
338162306a36Sopenharmony_ci	return ret;
338262306a36Sopenharmony_ci}
338362306a36Sopenharmony_ci
338462306a36Sopenharmony_ci/*
338562306a36Sopenharmony_ci * Free a used ticket when its refcount falls to zero.
338662306a36Sopenharmony_ci */
338762306a36Sopenharmony_civoid
338862306a36Sopenharmony_cixfs_log_ticket_put(
338962306a36Sopenharmony_ci	xlog_ticket_t	*ticket)
339062306a36Sopenharmony_ci{
339162306a36Sopenharmony_ci	ASSERT(atomic_read(&ticket->t_ref) > 0);
339262306a36Sopenharmony_ci	if (atomic_dec_and_test(&ticket->t_ref))
339362306a36Sopenharmony_ci		kmem_cache_free(xfs_log_ticket_cache, ticket);
339462306a36Sopenharmony_ci}
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_cixlog_ticket_t *
339762306a36Sopenharmony_cixfs_log_ticket_get(
339862306a36Sopenharmony_ci	xlog_ticket_t	*ticket)
339962306a36Sopenharmony_ci{
340062306a36Sopenharmony_ci	ASSERT(atomic_read(&ticket->t_ref) > 0);
340162306a36Sopenharmony_ci	atomic_inc(&ticket->t_ref);
340262306a36Sopenharmony_ci	return ticket;
340362306a36Sopenharmony_ci}
340462306a36Sopenharmony_ci
340562306a36Sopenharmony_ci/*
340662306a36Sopenharmony_ci * Figure out the total log space unit (in bytes) that would be
340762306a36Sopenharmony_ci * required for a log ticket.
340862306a36Sopenharmony_ci */
340962306a36Sopenharmony_cistatic int
341062306a36Sopenharmony_cixlog_calc_unit_res(
341162306a36Sopenharmony_ci	struct xlog		*log,
341262306a36Sopenharmony_ci	int			unit_bytes,
341362306a36Sopenharmony_ci	int			*niclogs)
341462306a36Sopenharmony_ci{
341562306a36Sopenharmony_ci	int			iclog_space;
341662306a36Sopenharmony_ci	uint			num_headers;
341762306a36Sopenharmony_ci
341862306a36Sopenharmony_ci	/*
341962306a36Sopenharmony_ci	 * Permanent reservations have up to 'cnt'-1 active log operations
342062306a36Sopenharmony_ci	 * in the log.  A unit in this case is the amount of space for one
342162306a36Sopenharmony_ci	 * of these log operations.  Normal reservations have a cnt of 1
342262306a36Sopenharmony_ci	 * and their unit amount is the total amount of space required.
342362306a36Sopenharmony_ci	 *
342462306a36Sopenharmony_ci	 * The following lines of code account for non-transaction data
342562306a36Sopenharmony_ci	 * which occupy space in the on-disk log.
342662306a36Sopenharmony_ci	 *
342762306a36Sopenharmony_ci	 * Normal form of a transaction is:
342862306a36Sopenharmony_ci	 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
342962306a36Sopenharmony_ci	 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
343062306a36Sopenharmony_ci	 *
343162306a36Sopenharmony_ci	 * We need to account for all the leadup data and trailer data
343262306a36Sopenharmony_ci	 * around the transaction data.
343362306a36Sopenharmony_ci	 * And then we need to account for the worst case in terms of using
343462306a36Sopenharmony_ci	 * more space.
343562306a36Sopenharmony_ci	 * The worst case will happen if:
343662306a36Sopenharmony_ci	 * - the placement of the transaction happens to be such that the
343762306a36Sopenharmony_ci	 *   roundoff is at its maximum
343862306a36Sopenharmony_ci	 * - the transaction data is synced before the commit record is synced
343962306a36Sopenharmony_ci	 *   i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
344062306a36Sopenharmony_ci	 *   Therefore the commit record is in its own Log Record.
344162306a36Sopenharmony_ci	 *   This can happen as the commit record is called with its
344262306a36Sopenharmony_ci	 *   own region to xlog_write().
344362306a36Sopenharmony_ci	 *   This then means that in the worst case, roundoff can happen for
344462306a36Sopenharmony_ci	 *   the commit-rec as well.
344562306a36Sopenharmony_ci	 *   The commit-rec is smaller than padding in this scenario and so it is
344662306a36Sopenharmony_ci	 *   not added separately.
344762306a36Sopenharmony_ci	 */
344862306a36Sopenharmony_ci
344962306a36Sopenharmony_ci	/* for trans header */
345062306a36Sopenharmony_ci	unit_bytes += sizeof(xlog_op_header_t);
345162306a36Sopenharmony_ci	unit_bytes += sizeof(xfs_trans_header_t);
345262306a36Sopenharmony_ci
345362306a36Sopenharmony_ci	/* for start-rec */
345462306a36Sopenharmony_ci	unit_bytes += sizeof(xlog_op_header_t);
345562306a36Sopenharmony_ci
345662306a36Sopenharmony_ci	/*
345762306a36Sopenharmony_ci	 * for LR headers - the space for data in an iclog is the size minus
345862306a36Sopenharmony_ci	 * the space used for the headers. If we use the iclog size, then we
345962306a36Sopenharmony_ci	 * undercalculate the number of headers required.
346062306a36Sopenharmony_ci	 *
346162306a36Sopenharmony_ci	 * Furthermore - the addition of op headers for split-recs might
346262306a36Sopenharmony_ci	 * increase the space required enough to require more log and op
346362306a36Sopenharmony_ci	 * headers, so take that into account too.
346462306a36Sopenharmony_ci	 *
346562306a36Sopenharmony_ci	 * IMPORTANT: This reservation makes the assumption that if this
346662306a36Sopenharmony_ci	 * transaction is the first in an iclog and hence has the LR headers
346762306a36Sopenharmony_ci	 * accounted to it, then the remaining space in the iclog is
346862306a36Sopenharmony_ci	 * exclusively for this transaction.  i.e. if the transaction is larger
346962306a36Sopenharmony_ci	 * than the iclog, it will be the only thing in that iclog.
347062306a36Sopenharmony_ci	 * Fundamentally, this means we must pass the entire log vector to
347162306a36Sopenharmony_ci	 * xlog_write to guarantee this.
347262306a36Sopenharmony_ci	 */
347362306a36Sopenharmony_ci	iclog_space = log->l_iclog_size - log->l_iclog_hsize;
347462306a36Sopenharmony_ci	num_headers = howmany(unit_bytes, iclog_space);
347562306a36Sopenharmony_ci
347662306a36Sopenharmony_ci	/* for split-recs - ophdrs added when data split over LRs */
347762306a36Sopenharmony_ci	unit_bytes += sizeof(xlog_op_header_t) * num_headers;
347862306a36Sopenharmony_ci
347962306a36Sopenharmony_ci	/* add extra header reservations if we overrun */
348062306a36Sopenharmony_ci	while (!num_headers ||
348162306a36Sopenharmony_ci	       howmany(unit_bytes, iclog_space) > num_headers) {
348262306a36Sopenharmony_ci		unit_bytes += sizeof(xlog_op_header_t);
348362306a36Sopenharmony_ci		num_headers++;
348462306a36Sopenharmony_ci	}
348562306a36Sopenharmony_ci	unit_bytes += log->l_iclog_hsize * num_headers;
348662306a36Sopenharmony_ci
348762306a36Sopenharmony_ci	/* for commit-rec LR header - note: padding will subsume the ophdr */
348862306a36Sopenharmony_ci	unit_bytes += log->l_iclog_hsize;
348962306a36Sopenharmony_ci
349062306a36Sopenharmony_ci	/* roundoff padding for transaction data and one for commit record */
349162306a36Sopenharmony_ci	unit_bytes += 2 * log->l_iclog_roundoff;
349262306a36Sopenharmony_ci
349362306a36Sopenharmony_ci	if (niclogs)
349462306a36Sopenharmony_ci		*niclogs = num_headers;
349562306a36Sopenharmony_ci	return unit_bytes;
349662306a36Sopenharmony_ci}
349762306a36Sopenharmony_ci
349862306a36Sopenharmony_ciint
349962306a36Sopenharmony_cixfs_log_calc_unit_res(
350062306a36Sopenharmony_ci	struct xfs_mount	*mp,
350162306a36Sopenharmony_ci	int			unit_bytes)
350262306a36Sopenharmony_ci{
350362306a36Sopenharmony_ci	return xlog_calc_unit_res(mp->m_log, unit_bytes, NULL);
350462306a36Sopenharmony_ci}
350562306a36Sopenharmony_ci
350662306a36Sopenharmony_ci/*
350762306a36Sopenharmony_ci * Allocate and initialise a new log ticket.
350862306a36Sopenharmony_ci */
350962306a36Sopenharmony_cistruct xlog_ticket *
351062306a36Sopenharmony_cixlog_ticket_alloc(
351162306a36Sopenharmony_ci	struct xlog		*log,
351262306a36Sopenharmony_ci	int			unit_bytes,
351362306a36Sopenharmony_ci	int			cnt,
351462306a36Sopenharmony_ci	bool			permanent)
351562306a36Sopenharmony_ci{
351662306a36Sopenharmony_ci	struct xlog_ticket	*tic;
351762306a36Sopenharmony_ci	int			unit_res;
351862306a36Sopenharmony_ci
351962306a36Sopenharmony_ci	tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL);
352062306a36Sopenharmony_ci
352162306a36Sopenharmony_ci	unit_res = xlog_calc_unit_res(log, unit_bytes, &tic->t_iclog_hdrs);
352262306a36Sopenharmony_ci
352362306a36Sopenharmony_ci	atomic_set(&tic->t_ref, 1);
352462306a36Sopenharmony_ci	tic->t_task		= current;
352562306a36Sopenharmony_ci	INIT_LIST_HEAD(&tic->t_queue);
352662306a36Sopenharmony_ci	tic->t_unit_res		= unit_res;
352762306a36Sopenharmony_ci	tic->t_curr_res		= unit_res;
352862306a36Sopenharmony_ci	tic->t_cnt		= cnt;
352962306a36Sopenharmony_ci	tic->t_ocnt		= cnt;
353062306a36Sopenharmony_ci	tic->t_tid		= get_random_u32();
353162306a36Sopenharmony_ci	if (permanent)
353262306a36Sopenharmony_ci		tic->t_flags |= XLOG_TIC_PERM_RESERV;
353362306a36Sopenharmony_ci
353462306a36Sopenharmony_ci	return tic;
353562306a36Sopenharmony_ci}
353662306a36Sopenharmony_ci
353762306a36Sopenharmony_ci#if defined(DEBUG)
353862306a36Sopenharmony_ci/*
353962306a36Sopenharmony_ci * Check to make sure the grant write head didn't just over lap the tail.  If
354062306a36Sopenharmony_ci * the cycles are the same, we can't be overlapping.  Otherwise, make sure that
354162306a36Sopenharmony_ci * the cycles differ by exactly one and check the byte count.
354262306a36Sopenharmony_ci *
354362306a36Sopenharmony_ci * This check is run unlocked, so can give false positives. Rather than assert
354462306a36Sopenharmony_ci * on failures, use a warn-once flag and a panic tag to allow the admin to
354562306a36Sopenharmony_ci * determine if they want to panic the machine when such an error occurs. For
354662306a36Sopenharmony_ci * debug kernels this will have the same effect as using an assert but, unlinke
354762306a36Sopenharmony_ci * an assert, it can be turned off at runtime.
354862306a36Sopenharmony_ci */
354962306a36Sopenharmony_ciSTATIC void
355062306a36Sopenharmony_cixlog_verify_grant_tail(
355162306a36Sopenharmony_ci	struct xlog	*log)
355262306a36Sopenharmony_ci{
355362306a36Sopenharmony_ci	int		tail_cycle, tail_blocks;
355462306a36Sopenharmony_ci	int		cycle, space;
355562306a36Sopenharmony_ci
355662306a36Sopenharmony_ci	xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
355762306a36Sopenharmony_ci	xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
355862306a36Sopenharmony_ci	if (tail_cycle != cycle) {
355962306a36Sopenharmony_ci		if (cycle - 1 != tail_cycle &&
356062306a36Sopenharmony_ci		    !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) {
356162306a36Sopenharmony_ci			xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
356262306a36Sopenharmony_ci				"%s: cycle - 1 != tail_cycle", __func__);
356362306a36Sopenharmony_ci		}
356462306a36Sopenharmony_ci
356562306a36Sopenharmony_ci		if (space > BBTOB(tail_blocks) &&
356662306a36Sopenharmony_ci		    !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) {
356762306a36Sopenharmony_ci			xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
356862306a36Sopenharmony_ci				"%s: space > BBTOB(tail_blocks)", __func__);
356962306a36Sopenharmony_ci		}
357062306a36Sopenharmony_ci	}
357162306a36Sopenharmony_ci}
357262306a36Sopenharmony_ci
357362306a36Sopenharmony_ci/* check if it will fit */
357462306a36Sopenharmony_ciSTATIC void
357562306a36Sopenharmony_cixlog_verify_tail_lsn(
357662306a36Sopenharmony_ci	struct xlog		*log,
357762306a36Sopenharmony_ci	struct xlog_in_core	*iclog)
357862306a36Sopenharmony_ci{
357962306a36Sopenharmony_ci	xfs_lsn_t	tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn);
358062306a36Sopenharmony_ci	int		blocks;
358162306a36Sopenharmony_ci
358262306a36Sopenharmony_ci    if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
358362306a36Sopenharmony_ci	blocks =
358462306a36Sopenharmony_ci	    log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
358562306a36Sopenharmony_ci	if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
358662306a36Sopenharmony_ci		xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
358762306a36Sopenharmony_ci    } else {
358862306a36Sopenharmony_ci	ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
358962306a36Sopenharmony_ci
359062306a36Sopenharmony_ci	if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
359162306a36Sopenharmony_ci		xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
359262306a36Sopenharmony_ci
359362306a36Sopenharmony_ci	blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
359462306a36Sopenharmony_ci	if (blocks < BTOBB(iclog->ic_offset) + 1)
359562306a36Sopenharmony_ci		xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
359662306a36Sopenharmony_ci    }
359762306a36Sopenharmony_ci}
359862306a36Sopenharmony_ci
359962306a36Sopenharmony_ci/*
360062306a36Sopenharmony_ci * Perform a number of checks on the iclog before writing to disk.
360162306a36Sopenharmony_ci *
360262306a36Sopenharmony_ci * 1. Make sure the iclogs are still circular
360362306a36Sopenharmony_ci * 2. Make sure we have a good magic number
360462306a36Sopenharmony_ci * 3. Make sure we don't have magic numbers in the data
360562306a36Sopenharmony_ci * 4. Check fields of each log operation header for:
360662306a36Sopenharmony_ci *	A. Valid client identifier
360762306a36Sopenharmony_ci *	B. tid ptr value falls in valid ptr space (user space code)
360862306a36Sopenharmony_ci *	C. Length in log record header is correct according to the
360962306a36Sopenharmony_ci *		individual operation headers within record.
361062306a36Sopenharmony_ci * 5. When a bwrite will occur within 5 blocks of the front of the physical
361162306a36Sopenharmony_ci *	log, check the preceding blocks of the physical log to make sure all
361262306a36Sopenharmony_ci *	the cycle numbers agree with the current cycle number.
361362306a36Sopenharmony_ci */
361462306a36Sopenharmony_ciSTATIC void
361562306a36Sopenharmony_cixlog_verify_iclog(
361662306a36Sopenharmony_ci	struct xlog		*log,
361762306a36Sopenharmony_ci	struct xlog_in_core	*iclog,
361862306a36Sopenharmony_ci	int			count)
361962306a36Sopenharmony_ci{
362062306a36Sopenharmony_ci	xlog_op_header_t	*ophead;
362162306a36Sopenharmony_ci	xlog_in_core_t		*icptr;
362262306a36Sopenharmony_ci	xlog_in_core_2_t	*xhdr;
362362306a36Sopenharmony_ci	void			*base_ptr, *ptr, *p;
362462306a36Sopenharmony_ci	ptrdiff_t		field_offset;
362562306a36Sopenharmony_ci	uint8_t			clientid;
362662306a36Sopenharmony_ci	int			len, i, j, k, op_len;
362762306a36Sopenharmony_ci	int			idx;
362862306a36Sopenharmony_ci
362962306a36Sopenharmony_ci	/* check validity of iclog pointers */
363062306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
363162306a36Sopenharmony_ci	icptr = log->l_iclog;
363262306a36Sopenharmony_ci	for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next)
363362306a36Sopenharmony_ci		ASSERT(icptr);
363462306a36Sopenharmony_ci
363562306a36Sopenharmony_ci	if (icptr != log->l_iclog)
363662306a36Sopenharmony_ci		xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
363762306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
363862306a36Sopenharmony_ci
363962306a36Sopenharmony_ci	/* check log magic numbers */
364062306a36Sopenharmony_ci	if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
364162306a36Sopenharmony_ci		xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
364262306a36Sopenharmony_ci
364362306a36Sopenharmony_ci	base_ptr = ptr = &iclog->ic_header;
364462306a36Sopenharmony_ci	p = &iclog->ic_header;
364562306a36Sopenharmony_ci	for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
364662306a36Sopenharmony_ci		if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
364762306a36Sopenharmony_ci			xfs_emerg(log->l_mp, "%s: unexpected magic num",
364862306a36Sopenharmony_ci				__func__);
364962306a36Sopenharmony_ci	}
365062306a36Sopenharmony_ci
365162306a36Sopenharmony_ci	/* check fields */
365262306a36Sopenharmony_ci	len = be32_to_cpu(iclog->ic_header.h_num_logops);
365362306a36Sopenharmony_ci	base_ptr = ptr = iclog->ic_datap;
365462306a36Sopenharmony_ci	ophead = ptr;
365562306a36Sopenharmony_ci	xhdr = iclog->ic_data;
365662306a36Sopenharmony_ci	for (i = 0; i < len; i++) {
365762306a36Sopenharmony_ci		ophead = ptr;
365862306a36Sopenharmony_ci
365962306a36Sopenharmony_ci		/* clientid is only 1 byte */
366062306a36Sopenharmony_ci		p = &ophead->oh_clientid;
366162306a36Sopenharmony_ci		field_offset = p - base_ptr;
366262306a36Sopenharmony_ci		if (field_offset & 0x1ff) {
366362306a36Sopenharmony_ci			clientid = ophead->oh_clientid;
366462306a36Sopenharmony_ci		} else {
366562306a36Sopenharmony_ci			idx = BTOBBT((void *)&ophead->oh_clientid - iclog->ic_datap);
366662306a36Sopenharmony_ci			if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
366762306a36Sopenharmony_ci				j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
366862306a36Sopenharmony_ci				k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
366962306a36Sopenharmony_ci				clientid = xlog_get_client_id(
367062306a36Sopenharmony_ci					xhdr[j].hic_xheader.xh_cycle_data[k]);
367162306a36Sopenharmony_ci			} else {
367262306a36Sopenharmony_ci				clientid = xlog_get_client_id(
367362306a36Sopenharmony_ci					iclog->ic_header.h_cycle_data[idx]);
367462306a36Sopenharmony_ci			}
367562306a36Sopenharmony_ci		}
367662306a36Sopenharmony_ci		if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) {
367762306a36Sopenharmony_ci			xfs_warn(log->l_mp,
367862306a36Sopenharmony_ci				"%s: op %d invalid clientid %d op "PTR_FMT" offset 0x%lx",
367962306a36Sopenharmony_ci				__func__, i, clientid, ophead,
368062306a36Sopenharmony_ci				(unsigned long)field_offset);
368162306a36Sopenharmony_ci		}
368262306a36Sopenharmony_ci
368362306a36Sopenharmony_ci		/* check length */
368462306a36Sopenharmony_ci		p = &ophead->oh_len;
368562306a36Sopenharmony_ci		field_offset = p - base_ptr;
368662306a36Sopenharmony_ci		if (field_offset & 0x1ff) {
368762306a36Sopenharmony_ci			op_len = be32_to_cpu(ophead->oh_len);
368862306a36Sopenharmony_ci		} else {
368962306a36Sopenharmony_ci			idx = BTOBBT((void *)&ophead->oh_len - iclog->ic_datap);
369062306a36Sopenharmony_ci			if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
369162306a36Sopenharmony_ci				j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
369262306a36Sopenharmony_ci				k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
369362306a36Sopenharmony_ci				op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]);
369462306a36Sopenharmony_ci			} else {
369562306a36Sopenharmony_ci				op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]);
369662306a36Sopenharmony_ci			}
369762306a36Sopenharmony_ci		}
369862306a36Sopenharmony_ci		ptr += sizeof(xlog_op_header_t) + op_len;
369962306a36Sopenharmony_ci	}
370062306a36Sopenharmony_ci}
370162306a36Sopenharmony_ci#endif
370262306a36Sopenharmony_ci
370362306a36Sopenharmony_ci/*
370462306a36Sopenharmony_ci * Perform a forced shutdown on the log.
370562306a36Sopenharmony_ci *
370662306a36Sopenharmony_ci * This can be called from low level log code to trigger a shutdown, or from the
370762306a36Sopenharmony_ci * high level mount shutdown code when the mount shuts down.
370862306a36Sopenharmony_ci *
370962306a36Sopenharmony_ci * Our main objectives here are to make sure that:
371062306a36Sopenharmony_ci *	a. if the shutdown was not due to a log IO error, flush the logs to
371162306a36Sopenharmony_ci *	   disk. Anything modified after this is ignored.
371262306a36Sopenharmony_ci *	b. the log gets atomically marked 'XLOG_IO_ERROR' for all interested
371362306a36Sopenharmony_ci *	   parties to find out. Nothing new gets queued after this is done.
371462306a36Sopenharmony_ci *	c. Tasks sleeping on log reservations, pinned objects and
371562306a36Sopenharmony_ci *	   other resources get woken up.
371662306a36Sopenharmony_ci *	d. The mount is also marked as shut down so that log triggered shutdowns
371762306a36Sopenharmony_ci *	   still behave the same as if they called xfs_forced_shutdown().
371862306a36Sopenharmony_ci *
371962306a36Sopenharmony_ci * Return true if the shutdown cause was a log IO error and we actually shut the
372062306a36Sopenharmony_ci * log down.
372162306a36Sopenharmony_ci */
372262306a36Sopenharmony_cibool
372362306a36Sopenharmony_cixlog_force_shutdown(
372462306a36Sopenharmony_ci	struct xlog	*log,
372562306a36Sopenharmony_ci	uint32_t	shutdown_flags)
372662306a36Sopenharmony_ci{
372762306a36Sopenharmony_ci	bool		log_error = (shutdown_flags & SHUTDOWN_LOG_IO_ERROR);
372862306a36Sopenharmony_ci
372962306a36Sopenharmony_ci	if (!log)
373062306a36Sopenharmony_ci		return false;
373162306a36Sopenharmony_ci
373262306a36Sopenharmony_ci	/*
373362306a36Sopenharmony_ci	 * Flush all the completed transactions to disk before marking the log
373462306a36Sopenharmony_ci	 * being shut down. We need to do this first as shutting down the log
373562306a36Sopenharmony_ci	 * before the force will prevent the log force from flushing the iclogs
373662306a36Sopenharmony_ci	 * to disk.
373762306a36Sopenharmony_ci	 *
373862306a36Sopenharmony_ci	 * When we are in recovery, there are no transactions to flush, and
373962306a36Sopenharmony_ci	 * we don't want to touch the log because we don't want to perturb the
374062306a36Sopenharmony_ci	 * current head/tail for future recovery attempts. Hence we need to
374162306a36Sopenharmony_ci	 * avoid a log force in this case.
374262306a36Sopenharmony_ci	 *
374362306a36Sopenharmony_ci	 * If we are shutting down due to a log IO error, then we must avoid
374462306a36Sopenharmony_ci	 * trying to write the log as that may just result in more IO errors and
374562306a36Sopenharmony_ci	 * an endless shutdown/force loop.
374662306a36Sopenharmony_ci	 */
374762306a36Sopenharmony_ci	if (!log_error && !xlog_in_recovery(log))
374862306a36Sopenharmony_ci		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
374962306a36Sopenharmony_ci
375062306a36Sopenharmony_ci	/*
375162306a36Sopenharmony_ci	 * Atomically set the shutdown state. If the shutdown state is already
375262306a36Sopenharmony_ci	 * set, there someone else is performing the shutdown and so we are done
375362306a36Sopenharmony_ci	 * here. This should never happen because we should only ever get called
375462306a36Sopenharmony_ci	 * once by the first shutdown caller.
375562306a36Sopenharmony_ci	 *
375662306a36Sopenharmony_ci	 * Much of the log state machine transitions assume that shutdown state
375762306a36Sopenharmony_ci	 * cannot change once they hold the log->l_icloglock. Hence we need to
375862306a36Sopenharmony_ci	 * hold that lock here, even though we use the atomic test_and_set_bit()
375962306a36Sopenharmony_ci	 * operation to set the shutdown state.
376062306a36Sopenharmony_ci	 */
376162306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
376262306a36Sopenharmony_ci	if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) {
376362306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
376462306a36Sopenharmony_ci		return false;
376562306a36Sopenharmony_ci	}
376662306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
376762306a36Sopenharmony_ci
376862306a36Sopenharmony_ci	/*
376962306a36Sopenharmony_ci	 * If this log shutdown also sets the mount shutdown state, issue a
377062306a36Sopenharmony_ci	 * shutdown warning message.
377162306a36Sopenharmony_ci	 */
377262306a36Sopenharmony_ci	if (!test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &log->l_mp->m_opstate)) {
377362306a36Sopenharmony_ci		xfs_alert_tag(log->l_mp, XFS_PTAG_SHUTDOWN_LOGERROR,
377462306a36Sopenharmony_ci"Filesystem has been shut down due to log error (0x%x).",
377562306a36Sopenharmony_ci				shutdown_flags);
377662306a36Sopenharmony_ci		xfs_alert(log->l_mp,
377762306a36Sopenharmony_ci"Please unmount the filesystem and rectify the problem(s).");
377862306a36Sopenharmony_ci		if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
377962306a36Sopenharmony_ci			xfs_stack_trace();
378062306a36Sopenharmony_ci	}
378162306a36Sopenharmony_ci
378262306a36Sopenharmony_ci	/*
378362306a36Sopenharmony_ci	 * We don't want anybody waiting for log reservations after this. That
378462306a36Sopenharmony_ci	 * means we have to wake up everybody queued up on reserveq as well as
378562306a36Sopenharmony_ci	 * writeq.  In addition, we make sure in xlog_{re}grant_log_space that
378662306a36Sopenharmony_ci	 * we don't enqueue anything once the SHUTDOWN flag is set, and this
378762306a36Sopenharmony_ci	 * action is protected by the grant locks.
378862306a36Sopenharmony_ci	 */
378962306a36Sopenharmony_ci	xlog_grant_head_wake_all(&log->l_reserve_head);
379062306a36Sopenharmony_ci	xlog_grant_head_wake_all(&log->l_write_head);
379162306a36Sopenharmony_ci
379262306a36Sopenharmony_ci	/*
379362306a36Sopenharmony_ci	 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
379462306a36Sopenharmony_ci	 * as if the log writes were completed. The abort handling in the log
379562306a36Sopenharmony_ci	 * item committed callback functions will do this again under lock to
379662306a36Sopenharmony_ci	 * avoid races.
379762306a36Sopenharmony_ci	 */
379862306a36Sopenharmony_ci	spin_lock(&log->l_cilp->xc_push_lock);
379962306a36Sopenharmony_ci	wake_up_all(&log->l_cilp->xc_start_wait);
380062306a36Sopenharmony_ci	wake_up_all(&log->l_cilp->xc_commit_wait);
380162306a36Sopenharmony_ci	spin_unlock(&log->l_cilp->xc_push_lock);
380262306a36Sopenharmony_ci
380362306a36Sopenharmony_ci	spin_lock(&log->l_icloglock);
380462306a36Sopenharmony_ci	xlog_state_shutdown_callbacks(log);
380562306a36Sopenharmony_ci	spin_unlock(&log->l_icloglock);
380662306a36Sopenharmony_ci
380762306a36Sopenharmony_ci	wake_up_var(&log->l_opstate);
380862306a36Sopenharmony_ci	return log_error;
380962306a36Sopenharmony_ci}
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_ciSTATIC int
381262306a36Sopenharmony_cixlog_iclogs_empty(
381362306a36Sopenharmony_ci	struct xlog	*log)
381462306a36Sopenharmony_ci{
381562306a36Sopenharmony_ci	xlog_in_core_t	*iclog;
381662306a36Sopenharmony_ci
381762306a36Sopenharmony_ci	iclog = log->l_iclog;
381862306a36Sopenharmony_ci	do {
381962306a36Sopenharmony_ci		/* endianness does not matter here, zero is zero in
382062306a36Sopenharmony_ci		 * any language.
382162306a36Sopenharmony_ci		 */
382262306a36Sopenharmony_ci		if (iclog->ic_header.h_num_logops)
382362306a36Sopenharmony_ci			return 0;
382462306a36Sopenharmony_ci		iclog = iclog->ic_next;
382562306a36Sopenharmony_ci	} while (iclog != log->l_iclog);
382662306a36Sopenharmony_ci	return 1;
382762306a36Sopenharmony_ci}
382862306a36Sopenharmony_ci
382962306a36Sopenharmony_ci/*
383062306a36Sopenharmony_ci * Verify that an LSN stamped into a piece of metadata is valid. This is
383162306a36Sopenharmony_ci * intended for use in read verifiers on v5 superblocks.
383262306a36Sopenharmony_ci */
383362306a36Sopenharmony_cibool
383462306a36Sopenharmony_cixfs_log_check_lsn(
383562306a36Sopenharmony_ci	struct xfs_mount	*mp,
383662306a36Sopenharmony_ci	xfs_lsn_t		lsn)
383762306a36Sopenharmony_ci{
383862306a36Sopenharmony_ci	struct xlog		*log = mp->m_log;
383962306a36Sopenharmony_ci	bool			valid;
384062306a36Sopenharmony_ci
384162306a36Sopenharmony_ci	/*
384262306a36Sopenharmony_ci	 * norecovery mode skips mount-time log processing and unconditionally
384362306a36Sopenharmony_ci	 * resets the in-core LSN. We can't validate in this mode, but
384462306a36Sopenharmony_ci	 * modifications are not allowed anyways so just return true.
384562306a36Sopenharmony_ci	 */
384662306a36Sopenharmony_ci	if (xfs_has_norecovery(mp))
384762306a36Sopenharmony_ci		return true;
384862306a36Sopenharmony_ci
384962306a36Sopenharmony_ci	/*
385062306a36Sopenharmony_ci	 * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
385162306a36Sopenharmony_ci	 * handled by recovery and thus safe to ignore here.
385262306a36Sopenharmony_ci	 */
385362306a36Sopenharmony_ci	if (lsn == NULLCOMMITLSN)
385462306a36Sopenharmony_ci		return true;
385562306a36Sopenharmony_ci
385662306a36Sopenharmony_ci	valid = xlog_valid_lsn(mp->m_log, lsn);
385762306a36Sopenharmony_ci
385862306a36Sopenharmony_ci	/* warn the user about what's gone wrong before verifier failure */
385962306a36Sopenharmony_ci	if (!valid) {
386062306a36Sopenharmony_ci		spin_lock(&log->l_icloglock);
386162306a36Sopenharmony_ci		xfs_warn(mp,
386262306a36Sopenharmony_ci"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
386362306a36Sopenharmony_ci"Please unmount and run xfs_repair (>= v4.3) to resolve.",
386462306a36Sopenharmony_ci			 CYCLE_LSN(lsn), BLOCK_LSN(lsn),
386562306a36Sopenharmony_ci			 log->l_curr_cycle, log->l_curr_block);
386662306a36Sopenharmony_ci		spin_unlock(&log->l_icloglock);
386762306a36Sopenharmony_ci	}
386862306a36Sopenharmony_ci
386962306a36Sopenharmony_ci	return valid;
387062306a36Sopenharmony_ci}
387162306a36Sopenharmony_ci
387262306a36Sopenharmony_ci/*
387362306a36Sopenharmony_ci * Notify the log that we're about to start using a feature that is protected
387462306a36Sopenharmony_ci * by a log incompat feature flag.  This will prevent log covering from
387562306a36Sopenharmony_ci * clearing those flags.
387662306a36Sopenharmony_ci */
387762306a36Sopenharmony_civoid
387862306a36Sopenharmony_cixlog_use_incompat_feat(
387962306a36Sopenharmony_ci	struct xlog		*log)
388062306a36Sopenharmony_ci{
388162306a36Sopenharmony_ci	down_read(&log->l_incompat_users);
388262306a36Sopenharmony_ci}
388362306a36Sopenharmony_ci
388462306a36Sopenharmony_ci/* Notify the log that we've finished using log incompat features. */
388562306a36Sopenharmony_civoid
388662306a36Sopenharmony_cixlog_drop_incompat_feat(
388762306a36Sopenharmony_ci	struct xlog		*log)
388862306a36Sopenharmony_ci{
388962306a36Sopenharmony_ci	up_read(&log->l_incompat_users);
389062306a36Sopenharmony_ci}
3891