162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_log_format.h" 1162306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1262306a36Sopenharmony_ci#include "xfs_mount.h" 1362306a36Sopenharmony_ci#include "xfs_errortag.h" 1462306a36Sopenharmony_ci#include "xfs_error.h" 1562306a36Sopenharmony_ci#include "xfs_trans.h" 1662306a36Sopenharmony_ci#include "xfs_trans_priv.h" 1762306a36Sopenharmony_ci#include "xfs_log.h" 1862306a36Sopenharmony_ci#include "xfs_log_priv.h" 1962306a36Sopenharmony_ci#include "xfs_trace.h" 2062306a36Sopenharmony_ci#include "xfs_sysfs.h" 2162306a36Sopenharmony_ci#include "xfs_sb.h" 2262306a36Sopenharmony_ci#include "xfs_health.h" 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistruct kmem_cache *xfs_log_ticket_cache; 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* Local miscellaneous function prototypes */ 2762306a36Sopenharmony_ciSTATIC struct xlog * 2862306a36Sopenharmony_cixlog_alloc_log( 2962306a36Sopenharmony_ci struct xfs_mount *mp, 3062306a36Sopenharmony_ci struct xfs_buftarg *log_target, 3162306a36Sopenharmony_ci xfs_daddr_t blk_offset, 3262306a36Sopenharmony_ci int num_bblks); 3362306a36Sopenharmony_ciSTATIC int 3462306a36Sopenharmony_cixlog_space_left( 3562306a36Sopenharmony_ci struct xlog *log, 3662306a36Sopenharmony_ci atomic64_t *head); 3762306a36Sopenharmony_ciSTATIC void 3862306a36Sopenharmony_cixlog_dealloc_log( 3962306a36Sopenharmony_ci struct xlog *log); 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci/* local state machine functions */ 4262306a36Sopenharmony_ciSTATIC void xlog_state_done_syncing( 4362306a36Sopenharmony_ci struct xlog_in_core *iclog); 4462306a36Sopenharmony_ciSTATIC void xlog_state_do_callback( 4562306a36Sopenharmony_ci struct xlog *log); 4662306a36Sopenharmony_ciSTATIC int 4762306a36Sopenharmony_cixlog_state_get_iclog_space( 4862306a36Sopenharmony_ci struct xlog *log, 4962306a36Sopenharmony_ci int len, 5062306a36Sopenharmony_ci struct xlog_in_core **iclog, 5162306a36Sopenharmony_ci struct xlog_ticket *ticket, 5262306a36Sopenharmony_ci int *logoffsetp); 5362306a36Sopenharmony_ciSTATIC void 5462306a36Sopenharmony_cixlog_grant_push_ail( 5562306a36Sopenharmony_ci struct xlog *log, 5662306a36Sopenharmony_ci int need_bytes); 5762306a36Sopenharmony_ciSTATIC void 5862306a36Sopenharmony_cixlog_sync( 5962306a36Sopenharmony_ci struct xlog *log, 6062306a36Sopenharmony_ci struct xlog_in_core *iclog, 6162306a36Sopenharmony_ci struct xlog_ticket *ticket); 6262306a36Sopenharmony_ci#if defined(DEBUG) 6362306a36Sopenharmony_ciSTATIC void 6462306a36Sopenharmony_cixlog_verify_grant_tail( 6562306a36Sopenharmony_ci struct xlog *log); 6662306a36Sopenharmony_ciSTATIC void 6762306a36Sopenharmony_cixlog_verify_iclog( 6862306a36Sopenharmony_ci struct xlog *log, 6962306a36Sopenharmony_ci struct xlog_in_core *iclog, 7062306a36Sopenharmony_ci int count); 7162306a36Sopenharmony_ciSTATIC void 7262306a36Sopenharmony_cixlog_verify_tail_lsn( 7362306a36Sopenharmony_ci struct xlog *log, 7462306a36Sopenharmony_ci struct xlog_in_core *iclog); 7562306a36Sopenharmony_ci#else 7662306a36Sopenharmony_ci#define xlog_verify_grant_tail(a) 7762306a36Sopenharmony_ci#define xlog_verify_iclog(a,b,c) 7862306a36Sopenharmony_ci#define xlog_verify_tail_lsn(a,b) 7962306a36Sopenharmony_ci#endif 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ciSTATIC int 8262306a36Sopenharmony_cixlog_iclogs_empty( 8362306a36Sopenharmony_ci struct xlog *log); 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic int 8662306a36Sopenharmony_cixfs_log_cover(struct xfs_mount *); 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci/* 8962306a36Sopenharmony_ci * We need to make sure the buffer pointer returned is naturally aligned for the 9062306a36Sopenharmony_ci * biggest basic data type we put into it. We have already accounted for this 9162306a36Sopenharmony_ci * padding when sizing the buffer. 9262306a36Sopenharmony_ci * 9362306a36Sopenharmony_ci * However, this padding does not get written into the log, and hence we have to 9462306a36Sopenharmony_ci * track the space used by the log vectors separately to prevent log space hangs 9562306a36Sopenharmony_ci * due to inaccurate accounting (i.e. a leak) of the used log space through the 9662306a36Sopenharmony_ci * CIL context ticket. 9762306a36Sopenharmony_ci * 9862306a36Sopenharmony_ci * We also add space for the xlog_op_header that describes this region in the 9962306a36Sopenharmony_ci * log. This prepends the data region we return to the caller to copy their data 10062306a36Sopenharmony_ci * into, so do all the static initialisation of the ophdr now. Because the ophdr 10162306a36Sopenharmony_ci * is not 8 byte aligned, we have to be careful to ensure that we align the 10262306a36Sopenharmony_ci * start of the buffer such that the region we return to the call is 8 byte 10362306a36Sopenharmony_ci * aligned and packed against the tail of the ophdr. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_civoid * 10662306a36Sopenharmony_cixlog_prepare_iovec( 10762306a36Sopenharmony_ci struct xfs_log_vec *lv, 10862306a36Sopenharmony_ci struct xfs_log_iovec **vecp, 10962306a36Sopenharmony_ci uint type) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci struct xfs_log_iovec *vec = *vecp; 11262306a36Sopenharmony_ci struct xlog_op_header *oph; 11362306a36Sopenharmony_ci uint32_t len; 11462306a36Sopenharmony_ci void *buf; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci if (vec) { 11762306a36Sopenharmony_ci ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); 11862306a36Sopenharmony_ci vec++; 11962306a36Sopenharmony_ci } else { 12062306a36Sopenharmony_ci vec = &lv->lv_iovecp[0]; 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci len = lv->lv_buf_len + sizeof(struct xlog_op_header); 12462306a36Sopenharmony_ci if (!IS_ALIGNED(len, sizeof(uint64_t))) { 12562306a36Sopenharmony_ci lv->lv_buf_len = round_up(len, sizeof(uint64_t)) - 12662306a36Sopenharmony_ci sizeof(struct xlog_op_header); 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci vec->i_type = type; 13062306a36Sopenharmony_ci vec->i_addr = lv->lv_buf + lv->lv_buf_len; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci oph = vec->i_addr; 13362306a36Sopenharmony_ci oph->oh_clientid = XFS_TRANSACTION; 13462306a36Sopenharmony_ci oph->oh_res2 = 0; 13562306a36Sopenharmony_ci oph->oh_flags = 0; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci buf = vec->i_addr + sizeof(struct xlog_op_header); 13862306a36Sopenharmony_ci ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci *vecp = vec; 14162306a36Sopenharmony_ci return buf; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_cistatic void 14562306a36Sopenharmony_cixlog_grant_sub_space( 14662306a36Sopenharmony_ci struct xlog *log, 14762306a36Sopenharmony_ci atomic64_t *head, 14862306a36Sopenharmony_ci int bytes) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci int64_t head_val = atomic64_read(head); 15162306a36Sopenharmony_ci int64_t new, old; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci do { 15462306a36Sopenharmony_ci int cycle, space; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci xlog_crack_grant_head_val(head_val, &cycle, &space); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci space -= bytes; 15962306a36Sopenharmony_ci if (space < 0) { 16062306a36Sopenharmony_ci space += log->l_logsize; 16162306a36Sopenharmony_ci cycle--; 16262306a36Sopenharmony_ci } 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci old = head_val; 16562306a36Sopenharmony_ci new = xlog_assign_grant_head_val(cycle, space); 16662306a36Sopenharmony_ci head_val = atomic64_cmpxchg(head, old, new); 16762306a36Sopenharmony_ci } while (head_val != old); 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cistatic void 17162306a36Sopenharmony_cixlog_grant_add_space( 17262306a36Sopenharmony_ci struct xlog *log, 17362306a36Sopenharmony_ci atomic64_t *head, 17462306a36Sopenharmony_ci int bytes) 17562306a36Sopenharmony_ci{ 17662306a36Sopenharmony_ci int64_t head_val = atomic64_read(head); 17762306a36Sopenharmony_ci int64_t new, old; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci do { 18062306a36Sopenharmony_ci int tmp; 18162306a36Sopenharmony_ci int cycle, space; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci xlog_crack_grant_head_val(head_val, &cycle, &space); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci tmp = log->l_logsize - space; 18662306a36Sopenharmony_ci if (tmp > bytes) 18762306a36Sopenharmony_ci space += bytes; 18862306a36Sopenharmony_ci else { 18962306a36Sopenharmony_ci space = bytes - tmp; 19062306a36Sopenharmony_ci cycle++; 19162306a36Sopenharmony_ci } 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci old = head_val; 19462306a36Sopenharmony_ci new = xlog_assign_grant_head_val(cycle, space); 19562306a36Sopenharmony_ci head_val = atomic64_cmpxchg(head, old, new); 19662306a36Sopenharmony_ci } while (head_val != old); 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ciSTATIC void 20062306a36Sopenharmony_cixlog_grant_head_init( 20162306a36Sopenharmony_ci struct xlog_grant_head *head) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci xlog_assign_grant_head(&head->grant, 1, 0); 20462306a36Sopenharmony_ci INIT_LIST_HEAD(&head->waiters); 20562306a36Sopenharmony_ci spin_lock_init(&head->lock); 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ciSTATIC void 20962306a36Sopenharmony_cixlog_grant_head_wake_all( 21062306a36Sopenharmony_ci struct xlog_grant_head *head) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci struct xlog_ticket *tic; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci spin_lock(&head->lock); 21562306a36Sopenharmony_ci list_for_each_entry(tic, &head->waiters, t_queue) 21662306a36Sopenharmony_ci wake_up_process(tic->t_task); 21762306a36Sopenharmony_ci spin_unlock(&head->lock); 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistatic inline int 22162306a36Sopenharmony_cixlog_ticket_reservation( 22262306a36Sopenharmony_ci struct xlog *log, 22362306a36Sopenharmony_ci struct xlog_grant_head *head, 22462306a36Sopenharmony_ci struct xlog_ticket *tic) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci if (head == &log->l_write_head) { 22762306a36Sopenharmony_ci ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 22862306a36Sopenharmony_ci return tic->t_unit_res; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci if (tic->t_flags & XLOG_TIC_PERM_RESERV) 23262306a36Sopenharmony_ci return tic->t_unit_res * tic->t_cnt; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci return tic->t_unit_res; 23562306a36Sopenharmony_ci} 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ciSTATIC bool 23862306a36Sopenharmony_cixlog_grant_head_wake( 23962306a36Sopenharmony_ci struct xlog *log, 24062306a36Sopenharmony_ci struct xlog_grant_head *head, 24162306a36Sopenharmony_ci int *free_bytes) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci struct xlog_ticket *tic; 24462306a36Sopenharmony_ci int need_bytes; 24562306a36Sopenharmony_ci bool woken_task = false; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci list_for_each_entry(tic, &head->waiters, t_queue) { 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci /* 25062306a36Sopenharmony_ci * There is a chance that the size of the CIL checkpoints in 25162306a36Sopenharmony_ci * progress at the last AIL push target calculation resulted in 25262306a36Sopenharmony_ci * limiting the target to the log head (l_last_sync_lsn) at the 25362306a36Sopenharmony_ci * time. This may not reflect where the log head is now as the 25462306a36Sopenharmony_ci * CIL checkpoints may have completed. 25562306a36Sopenharmony_ci * 25662306a36Sopenharmony_ci * Hence when we are woken here, it may be that the head of the 25762306a36Sopenharmony_ci * log that has moved rather than the tail. As the tail didn't 25862306a36Sopenharmony_ci * move, there still won't be space available for the 25962306a36Sopenharmony_ci * reservation we require. However, if the AIL has already 26062306a36Sopenharmony_ci * pushed to the target defined by the old log head location, we 26162306a36Sopenharmony_ci * will hang here waiting for something else to update the AIL 26262306a36Sopenharmony_ci * push target. 26362306a36Sopenharmony_ci * 26462306a36Sopenharmony_ci * Therefore, if there isn't space to wake the first waiter on 26562306a36Sopenharmony_ci * the grant head, we need to push the AIL again to ensure the 26662306a36Sopenharmony_ci * target reflects both the current log tail and log head 26762306a36Sopenharmony_ci * position before we wait for the tail to move again. 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci need_bytes = xlog_ticket_reservation(log, head, tic); 27162306a36Sopenharmony_ci if (*free_bytes < need_bytes) { 27262306a36Sopenharmony_ci if (!woken_task) 27362306a36Sopenharmony_ci xlog_grant_push_ail(log, need_bytes); 27462306a36Sopenharmony_ci return false; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci *free_bytes -= need_bytes; 27862306a36Sopenharmony_ci trace_xfs_log_grant_wake_up(log, tic); 27962306a36Sopenharmony_ci wake_up_process(tic->t_task); 28062306a36Sopenharmony_ci woken_task = true; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci return true; 28462306a36Sopenharmony_ci} 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ciSTATIC int 28762306a36Sopenharmony_cixlog_grant_head_wait( 28862306a36Sopenharmony_ci struct xlog *log, 28962306a36Sopenharmony_ci struct xlog_grant_head *head, 29062306a36Sopenharmony_ci struct xlog_ticket *tic, 29162306a36Sopenharmony_ci int need_bytes) __releases(&head->lock) 29262306a36Sopenharmony_ci __acquires(&head->lock) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci list_add_tail(&tic->t_queue, &head->waiters); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci do { 29762306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 29862306a36Sopenharmony_ci goto shutdown; 29962306a36Sopenharmony_ci xlog_grant_push_ail(log, need_bytes); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci __set_current_state(TASK_UNINTERRUPTIBLE); 30262306a36Sopenharmony_ci spin_unlock(&head->lock); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci XFS_STATS_INC(log->l_mp, xs_sleep_logspace); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci trace_xfs_log_grant_sleep(log, tic); 30762306a36Sopenharmony_ci schedule(); 30862306a36Sopenharmony_ci trace_xfs_log_grant_wake(log, tic); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci spin_lock(&head->lock); 31162306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 31262306a36Sopenharmony_ci goto shutdown; 31362306a36Sopenharmony_ci } while (xlog_space_left(log, &head->grant) < need_bytes); 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci list_del_init(&tic->t_queue); 31662306a36Sopenharmony_ci return 0; 31762306a36Sopenharmony_cishutdown: 31862306a36Sopenharmony_ci list_del_init(&tic->t_queue); 31962306a36Sopenharmony_ci return -EIO; 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci/* 32362306a36Sopenharmony_ci * Atomically get the log space required for a log ticket. 32462306a36Sopenharmony_ci * 32562306a36Sopenharmony_ci * Once a ticket gets put onto head->waiters, it will only return after the 32662306a36Sopenharmony_ci * needed reservation is satisfied. 32762306a36Sopenharmony_ci * 32862306a36Sopenharmony_ci * This function is structured so that it has a lock free fast path. This is 32962306a36Sopenharmony_ci * necessary because every new transaction reservation will come through this 33062306a36Sopenharmony_ci * path. Hence any lock will be globally hot if we take it unconditionally on 33162306a36Sopenharmony_ci * every pass. 33262306a36Sopenharmony_ci * 33362306a36Sopenharmony_ci * As tickets are only ever moved on and off head->waiters under head->lock, we 33462306a36Sopenharmony_ci * only need to take that lock if we are going to add the ticket to the queue 33562306a36Sopenharmony_ci * and sleep. We can avoid taking the lock if the ticket was never added to 33662306a36Sopenharmony_ci * head->waiters because the t_queue list head will be empty and we hold the 33762306a36Sopenharmony_ci * only reference to it so it can safely be checked unlocked. 33862306a36Sopenharmony_ci */ 33962306a36Sopenharmony_ciSTATIC int 34062306a36Sopenharmony_cixlog_grant_head_check( 34162306a36Sopenharmony_ci struct xlog *log, 34262306a36Sopenharmony_ci struct xlog_grant_head *head, 34362306a36Sopenharmony_ci struct xlog_ticket *tic, 34462306a36Sopenharmony_ci int *need_bytes) 34562306a36Sopenharmony_ci{ 34662306a36Sopenharmony_ci int free_bytes; 34762306a36Sopenharmony_ci int error = 0; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci ASSERT(!xlog_in_recovery(log)); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci /* 35262306a36Sopenharmony_ci * If there are other waiters on the queue then give them a chance at 35362306a36Sopenharmony_ci * logspace before us. Wake up the first waiters, if we do not wake 35462306a36Sopenharmony_ci * up all the waiters then go to sleep waiting for more free space, 35562306a36Sopenharmony_ci * otherwise try to get some space for this transaction. 35662306a36Sopenharmony_ci */ 35762306a36Sopenharmony_ci *need_bytes = xlog_ticket_reservation(log, head, tic); 35862306a36Sopenharmony_ci free_bytes = xlog_space_left(log, &head->grant); 35962306a36Sopenharmony_ci if (!list_empty_careful(&head->waiters)) { 36062306a36Sopenharmony_ci spin_lock(&head->lock); 36162306a36Sopenharmony_ci if (!xlog_grant_head_wake(log, head, &free_bytes) || 36262306a36Sopenharmony_ci free_bytes < *need_bytes) { 36362306a36Sopenharmony_ci error = xlog_grant_head_wait(log, head, tic, 36462306a36Sopenharmony_ci *need_bytes); 36562306a36Sopenharmony_ci } 36662306a36Sopenharmony_ci spin_unlock(&head->lock); 36762306a36Sopenharmony_ci } else if (free_bytes < *need_bytes) { 36862306a36Sopenharmony_ci spin_lock(&head->lock); 36962306a36Sopenharmony_ci error = xlog_grant_head_wait(log, head, tic, *need_bytes); 37062306a36Sopenharmony_ci spin_unlock(&head->lock); 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci return error; 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cibool 37762306a36Sopenharmony_cixfs_log_writable( 37862306a36Sopenharmony_ci struct xfs_mount *mp) 37962306a36Sopenharmony_ci{ 38062306a36Sopenharmony_ci /* 38162306a36Sopenharmony_ci * Do not write to the log on norecovery mounts, if the data or log 38262306a36Sopenharmony_ci * devices are read-only, or if the filesystem is shutdown. Read-only 38362306a36Sopenharmony_ci * mounts allow internal writes for log recovery and unmount purposes, 38462306a36Sopenharmony_ci * so don't restrict that case. 38562306a36Sopenharmony_ci */ 38662306a36Sopenharmony_ci if (xfs_has_norecovery(mp)) 38762306a36Sopenharmony_ci return false; 38862306a36Sopenharmony_ci if (xfs_readonly_buftarg(mp->m_ddev_targp)) 38962306a36Sopenharmony_ci return false; 39062306a36Sopenharmony_ci if (xfs_readonly_buftarg(mp->m_log->l_targ)) 39162306a36Sopenharmony_ci return false; 39262306a36Sopenharmony_ci if (xlog_is_shutdown(mp->m_log)) 39362306a36Sopenharmony_ci return false; 39462306a36Sopenharmony_ci return true; 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci/* 39862306a36Sopenharmony_ci * Replenish the byte reservation required by moving the grant write head. 39962306a36Sopenharmony_ci */ 40062306a36Sopenharmony_ciint 40162306a36Sopenharmony_cixfs_log_regrant( 40262306a36Sopenharmony_ci struct xfs_mount *mp, 40362306a36Sopenharmony_ci struct xlog_ticket *tic) 40462306a36Sopenharmony_ci{ 40562306a36Sopenharmony_ci struct xlog *log = mp->m_log; 40662306a36Sopenharmony_ci int need_bytes; 40762306a36Sopenharmony_ci int error = 0; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 41062306a36Sopenharmony_ci return -EIO; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_try_logspace); 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci /* 41562306a36Sopenharmony_ci * This is a new transaction on the ticket, so we need to change the 41662306a36Sopenharmony_ci * transaction ID so that the next transaction has a different TID in 41762306a36Sopenharmony_ci * the log. Just add one to the existing tid so that we can see chains 41862306a36Sopenharmony_ci * of rolling transactions in the log easily. 41962306a36Sopenharmony_ci */ 42062306a36Sopenharmony_ci tic->t_tid++; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci xlog_grant_push_ail(log, tic->t_unit_res); 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci tic->t_curr_res = tic->t_unit_res; 42562306a36Sopenharmony_ci if (tic->t_cnt > 0) 42662306a36Sopenharmony_ci return 0; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci trace_xfs_log_regrant(log, tic); 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci error = xlog_grant_head_check(log, &log->l_write_head, tic, 43162306a36Sopenharmony_ci &need_bytes); 43262306a36Sopenharmony_ci if (error) 43362306a36Sopenharmony_ci goto out_error; 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); 43662306a36Sopenharmony_ci trace_xfs_log_regrant_exit(log, tic); 43762306a36Sopenharmony_ci xlog_verify_grant_tail(log); 43862306a36Sopenharmony_ci return 0; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ciout_error: 44162306a36Sopenharmony_ci /* 44262306a36Sopenharmony_ci * If we are failing, make sure the ticket doesn't have any current 44362306a36Sopenharmony_ci * reservations. We don't want to add this back when the ticket/ 44462306a36Sopenharmony_ci * transaction gets cancelled. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_ci tic->t_curr_res = 0; 44762306a36Sopenharmony_ci tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 44862306a36Sopenharmony_ci return error; 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci/* 45262306a36Sopenharmony_ci * Reserve log space and return a ticket corresponding to the reservation. 45362306a36Sopenharmony_ci * 45462306a36Sopenharmony_ci * Each reservation is going to reserve extra space for a log record header. 45562306a36Sopenharmony_ci * When writes happen to the on-disk log, we don't subtract the length of the 45662306a36Sopenharmony_ci * log record header from any reservation. By wasting space in each 45762306a36Sopenharmony_ci * reservation, we prevent over allocation problems. 45862306a36Sopenharmony_ci */ 45962306a36Sopenharmony_ciint 46062306a36Sopenharmony_cixfs_log_reserve( 46162306a36Sopenharmony_ci struct xfs_mount *mp, 46262306a36Sopenharmony_ci int unit_bytes, 46362306a36Sopenharmony_ci int cnt, 46462306a36Sopenharmony_ci struct xlog_ticket **ticp, 46562306a36Sopenharmony_ci bool permanent) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci struct xlog *log = mp->m_log; 46862306a36Sopenharmony_ci struct xlog_ticket *tic; 46962306a36Sopenharmony_ci int need_bytes; 47062306a36Sopenharmony_ci int error = 0; 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 47362306a36Sopenharmony_ci return -EIO; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_try_logspace); 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci ASSERT(*ticp == NULL); 47862306a36Sopenharmony_ci tic = xlog_ticket_alloc(log, unit_bytes, cnt, permanent); 47962306a36Sopenharmony_ci *ticp = tic; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt 48262306a36Sopenharmony_ci : tic->t_unit_res); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci trace_xfs_log_reserve(log, tic); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci error = xlog_grant_head_check(log, &log->l_reserve_head, tic, 48762306a36Sopenharmony_ci &need_bytes); 48862306a36Sopenharmony_ci if (error) 48962306a36Sopenharmony_ci goto out_error; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes); 49262306a36Sopenharmony_ci xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); 49362306a36Sopenharmony_ci trace_xfs_log_reserve_exit(log, tic); 49462306a36Sopenharmony_ci xlog_verify_grant_tail(log); 49562306a36Sopenharmony_ci return 0; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ciout_error: 49862306a36Sopenharmony_ci /* 49962306a36Sopenharmony_ci * If we are failing, make sure the ticket doesn't have any current 50062306a36Sopenharmony_ci * reservations. We don't want to add this back when the ticket/ 50162306a36Sopenharmony_ci * transaction gets cancelled. 50262306a36Sopenharmony_ci */ 50362306a36Sopenharmony_ci tic->t_curr_res = 0; 50462306a36Sopenharmony_ci tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 50562306a36Sopenharmony_ci return error; 50662306a36Sopenharmony_ci} 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci/* 50962306a36Sopenharmony_ci * Run all the pending iclog callbacks and wake log force waiters and iclog 51062306a36Sopenharmony_ci * space waiters so they can process the newly set shutdown state. We really 51162306a36Sopenharmony_ci * don't care what order we process callbacks here because the log is shut down 51262306a36Sopenharmony_ci * and so state cannot change on disk anymore. However, we cannot wake waiters 51362306a36Sopenharmony_ci * until the callbacks have been processed because we may be in unmount and 51462306a36Sopenharmony_ci * we must ensure that all AIL operations the callbacks perform have completed 51562306a36Sopenharmony_ci * before we tear down the AIL. 51662306a36Sopenharmony_ci * 51762306a36Sopenharmony_ci * We avoid processing actively referenced iclogs so that we don't run callbacks 51862306a36Sopenharmony_ci * while the iclog owner might still be preparing the iclog for IO submssion. 51962306a36Sopenharmony_ci * These will be caught by xlog_state_iclog_release() and call this function 52062306a36Sopenharmony_ci * again to process any callbacks that may have been added to that iclog. 52162306a36Sopenharmony_ci */ 52262306a36Sopenharmony_cistatic void 52362306a36Sopenharmony_cixlog_state_shutdown_callbacks( 52462306a36Sopenharmony_ci struct xlog *log) 52562306a36Sopenharmony_ci{ 52662306a36Sopenharmony_ci struct xlog_in_core *iclog; 52762306a36Sopenharmony_ci LIST_HEAD(cb_list); 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci iclog = log->l_iclog; 53062306a36Sopenharmony_ci do { 53162306a36Sopenharmony_ci if (atomic_read(&iclog->ic_refcnt)) { 53262306a36Sopenharmony_ci /* Reference holder will re-run iclog callbacks. */ 53362306a36Sopenharmony_ci continue; 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci list_splice_init(&iclog->ic_callbacks, &cb_list); 53662306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci xlog_cil_process_committed(&cb_list); 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 54162306a36Sopenharmony_ci wake_up_all(&iclog->ic_write_wait); 54262306a36Sopenharmony_ci wake_up_all(&iclog->ic_force_wait); 54362306a36Sopenharmony_ci } while ((iclog = iclog->ic_next) != log->l_iclog); 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci wake_up_all(&log->l_flush_wait); 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci/* 54962306a36Sopenharmony_ci * Flush iclog to disk if this is the last reference to the given iclog and the 55062306a36Sopenharmony_ci * it is in the WANT_SYNC state. 55162306a36Sopenharmony_ci * 55262306a36Sopenharmony_ci * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the 55362306a36Sopenharmony_ci * log tail is updated correctly. NEED_FUA indicates that the iclog will be 55462306a36Sopenharmony_ci * written to stable storage, and implies that a commit record is contained 55562306a36Sopenharmony_ci * within the iclog. We need to ensure that the log tail does not move beyond 55662306a36Sopenharmony_ci * the tail that the first commit record in the iclog ordered against, otherwise 55762306a36Sopenharmony_ci * correct recovery of that checkpoint becomes dependent on future operations 55862306a36Sopenharmony_ci * performed on this iclog. 55962306a36Sopenharmony_ci * 56062306a36Sopenharmony_ci * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the 56162306a36Sopenharmony_ci * current tail into iclog. Once the iclog tail is set, future operations must 56262306a36Sopenharmony_ci * not modify it, otherwise they potentially violate ordering constraints for 56362306a36Sopenharmony_ci * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in 56462306a36Sopenharmony_ci * the iclog will get zeroed on activation of the iclog after sync, so we 56562306a36Sopenharmony_ci * always capture the tail lsn on the iclog on the first NEED_FUA release 56662306a36Sopenharmony_ci * regardless of the number of active reference counts on this iclog. 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_ciint 56962306a36Sopenharmony_cixlog_state_release_iclog( 57062306a36Sopenharmony_ci struct xlog *log, 57162306a36Sopenharmony_ci struct xlog_in_core *iclog, 57262306a36Sopenharmony_ci struct xlog_ticket *ticket) 57362306a36Sopenharmony_ci{ 57462306a36Sopenharmony_ci xfs_lsn_t tail_lsn; 57562306a36Sopenharmony_ci bool last_ref; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci lockdep_assert_held(&log->l_icloglock); 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci trace_xlog_iclog_release(iclog, _RET_IP_); 58062306a36Sopenharmony_ci /* 58162306a36Sopenharmony_ci * Grabbing the current log tail needs to be atomic w.r.t. the writing 58262306a36Sopenharmony_ci * of the tail LSN into the iclog so we guarantee that the log tail does 58362306a36Sopenharmony_ci * not move between the first time we know that the iclog needs to be 58462306a36Sopenharmony_ci * made stable and when we eventually submit it. 58562306a36Sopenharmony_ci */ 58662306a36Sopenharmony_ci if ((iclog->ic_state == XLOG_STATE_WANT_SYNC || 58762306a36Sopenharmony_ci (iclog->ic_flags & XLOG_ICL_NEED_FUA)) && 58862306a36Sopenharmony_ci !iclog->ic_header.h_tail_lsn) { 58962306a36Sopenharmony_ci tail_lsn = xlog_assign_tail_lsn(log->l_mp); 59062306a36Sopenharmony_ci iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci last_ref = atomic_dec_and_test(&iclog->ic_refcnt); 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci if (xlog_is_shutdown(log)) { 59662306a36Sopenharmony_ci /* 59762306a36Sopenharmony_ci * If there are no more references to this iclog, process the 59862306a36Sopenharmony_ci * pending iclog callbacks that were waiting on the release of 59962306a36Sopenharmony_ci * this iclog. 60062306a36Sopenharmony_ci */ 60162306a36Sopenharmony_ci if (last_ref) 60262306a36Sopenharmony_ci xlog_state_shutdown_callbacks(log); 60362306a36Sopenharmony_ci return -EIO; 60462306a36Sopenharmony_ci } 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci if (!last_ref) 60762306a36Sopenharmony_ci return 0; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { 61062306a36Sopenharmony_ci ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 61162306a36Sopenharmony_ci return 0; 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci iclog->ic_state = XLOG_STATE_SYNCING; 61562306a36Sopenharmony_ci xlog_verify_tail_lsn(log, iclog); 61662306a36Sopenharmony_ci trace_xlog_iclog_syncing(iclog, _RET_IP_); 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 61962306a36Sopenharmony_ci xlog_sync(log, iclog, ticket); 62062306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 62162306a36Sopenharmony_ci return 0; 62262306a36Sopenharmony_ci} 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci/* 62562306a36Sopenharmony_ci * Mount a log filesystem 62662306a36Sopenharmony_ci * 62762306a36Sopenharmony_ci * mp - ubiquitous xfs mount point structure 62862306a36Sopenharmony_ci * log_target - buftarg of on-disk log device 62962306a36Sopenharmony_ci * blk_offset - Start block # where block size is 512 bytes (BBSIZE) 63062306a36Sopenharmony_ci * num_bblocks - Number of BBSIZE blocks in on-disk log 63162306a36Sopenharmony_ci * 63262306a36Sopenharmony_ci * Return error or zero. 63362306a36Sopenharmony_ci */ 63462306a36Sopenharmony_ciint 63562306a36Sopenharmony_cixfs_log_mount( 63662306a36Sopenharmony_ci xfs_mount_t *mp, 63762306a36Sopenharmony_ci xfs_buftarg_t *log_target, 63862306a36Sopenharmony_ci xfs_daddr_t blk_offset, 63962306a36Sopenharmony_ci int num_bblks) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci struct xlog *log; 64262306a36Sopenharmony_ci int error = 0; 64362306a36Sopenharmony_ci int min_logfsbs; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci if (!xfs_has_norecovery(mp)) { 64662306a36Sopenharmony_ci xfs_notice(mp, "Mounting V%d Filesystem %pU", 64762306a36Sopenharmony_ci XFS_SB_VERSION_NUM(&mp->m_sb), 64862306a36Sopenharmony_ci &mp->m_sb.sb_uuid); 64962306a36Sopenharmony_ci } else { 65062306a36Sopenharmony_ci xfs_notice(mp, 65162306a36Sopenharmony_ci"Mounting V%d filesystem %pU in no-recovery mode. Filesystem will be inconsistent.", 65262306a36Sopenharmony_ci XFS_SB_VERSION_NUM(&mp->m_sb), 65362306a36Sopenharmony_ci &mp->m_sb.sb_uuid); 65462306a36Sopenharmony_ci ASSERT(xfs_is_readonly(mp)); 65562306a36Sopenharmony_ci } 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 65862306a36Sopenharmony_ci if (IS_ERR(log)) { 65962306a36Sopenharmony_ci error = PTR_ERR(log); 66062306a36Sopenharmony_ci goto out; 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci mp->m_log = log; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci /* 66562306a36Sopenharmony_ci * Now that we have set up the log and it's internal geometry 66662306a36Sopenharmony_ci * parameters, we can validate the given log space and drop a critical 66762306a36Sopenharmony_ci * message via syslog if the log size is too small. A log that is too 66862306a36Sopenharmony_ci * small can lead to unexpected situations in transaction log space 66962306a36Sopenharmony_ci * reservation stage. The superblock verifier has already validated all 67062306a36Sopenharmony_ci * the other log geometry constraints, so we don't have to check those 67162306a36Sopenharmony_ci * here. 67262306a36Sopenharmony_ci * 67362306a36Sopenharmony_ci * Note: For v4 filesystems, we can't just reject the mount if the 67462306a36Sopenharmony_ci * validation fails. This would mean that people would have to 67562306a36Sopenharmony_ci * downgrade their kernel just to remedy the situation as there is no 67662306a36Sopenharmony_ci * way to grow the log (short of black magic surgery with xfs_db). 67762306a36Sopenharmony_ci * 67862306a36Sopenharmony_ci * We can, however, reject mounts for V5 format filesystems, as the 67962306a36Sopenharmony_ci * mkfs binary being used to make the filesystem should never create a 68062306a36Sopenharmony_ci * filesystem with a log that is too small. 68162306a36Sopenharmony_ci */ 68262306a36Sopenharmony_ci min_logfsbs = xfs_log_calc_minimum_size(mp); 68362306a36Sopenharmony_ci if (mp->m_sb.sb_logblocks < min_logfsbs) { 68462306a36Sopenharmony_ci xfs_warn(mp, 68562306a36Sopenharmony_ci "Log size %d blocks too small, minimum size is %d blocks", 68662306a36Sopenharmony_ci mp->m_sb.sb_logblocks, min_logfsbs); 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci /* 68962306a36Sopenharmony_ci * Log check errors are always fatal on v5; or whenever bad 69062306a36Sopenharmony_ci * metadata leads to a crash. 69162306a36Sopenharmony_ci */ 69262306a36Sopenharmony_ci if (xfs_has_crc(mp)) { 69362306a36Sopenharmony_ci xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); 69462306a36Sopenharmony_ci ASSERT(0); 69562306a36Sopenharmony_ci error = -EINVAL; 69662306a36Sopenharmony_ci goto out_free_log; 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci xfs_crit(mp, "Log size out of supported range."); 69962306a36Sopenharmony_ci xfs_crit(mp, 70062306a36Sopenharmony_ci"Continuing onwards, but if log hangs are experienced then please report this message in the bug report."); 70162306a36Sopenharmony_ci } 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci /* 70462306a36Sopenharmony_ci * Initialize the AIL now we have a log. 70562306a36Sopenharmony_ci */ 70662306a36Sopenharmony_ci error = xfs_trans_ail_init(mp); 70762306a36Sopenharmony_ci if (error) { 70862306a36Sopenharmony_ci xfs_warn(mp, "AIL initialisation failed: error %d", error); 70962306a36Sopenharmony_ci goto out_free_log; 71062306a36Sopenharmony_ci } 71162306a36Sopenharmony_ci log->l_ailp = mp->m_ail; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci /* 71462306a36Sopenharmony_ci * skip log recovery on a norecovery mount. pretend it all 71562306a36Sopenharmony_ci * just worked. 71662306a36Sopenharmony_ci */ 71762306a36Sopenharmony_ci if (!xfs_has_norecovery(mp)) { 71862306a36Sopenharmony_ci error = xlog_recover(log); 71962306a36Sopenharmony_ci if (error) { 72062306a36Sopenharmony_ci xfs_warn(mp, "log mount/recovery failed: error %d", 72162306a36Sopenharmony_ci error); 72262306a36Sopenharmony_ci xlog_recover_cancel(log); 72362306a36Sopenharmony_ci goto out_destroy_ail; 72462306a36Sopenharmony_ci } 72562306a36Sopenharmony_ci } 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci error = xfs_sysfs_init(&log->l_kobj, &xfs_log_ktype, &mp->m_kobj, 72862306a36Sopenharmony_ci "log"); 72962306a36Sopenharmony_ci if (error) 73062306a36Sopenharmony_ci goto out_destroy_ail; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci /* Normal transactions can now occur */ 73362306a36Sopenharmony_ci clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* 73662306a36Sopenharmony_ci * Now the log has been fully initialised and we know were our 73762306a36Sopenharmony_ci * space grant counters are, we can initialise the permanent ticket 73862306a36Sopenharmony_ci * needed for delayed logging to work. 73962306a36Sopenharmony_ci */ 74062306a36Sopenharmony_ci xlog_cil_init_post_recovery(log); 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci return 0; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ciout_destroy_ail: 74562306a36Sopenharmony_ci xfs_trans_ail_destroy(mp); 74662306a36Sopenharmony_ciout_free_log: 74762306a36Sopenharmony_ci xlog_dealloc_log(log); 74862306a36Sopenharmony_ciout: 74962306a36Sopenharmony_ci return error; 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci/* 75362306a36Sopenharmony_ci * Finish the recovery of the file system. This is separate from the 75462306a36Sopenharmony_ci * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read 75562306a36Sopenharmony_ci * in the root and real-time bitmap inodes between calling xfs_log_mount() and 75662306a36Sopenharmony_ci * here. 75762306a36Sopenharmony_ci * 75862306a36Sopenharmony_ci * If we finish recovery successfully, start the background log work. If we are 75962306a36Sopenharmony_ci * not doing recovery, then we have a RO filesystem and we don't need to start 76062306a36Sopenharmony_ci * it. 76162306a36Sopenharmony_ci */ 76262306a36Sopenharmony_ciint 76362306a36Sopenharmony_cixfs_log_mount_finish( 76462306a36Sopenharmony_ci struct xfs_mount *mp) 76562306a36Sopenharmony_ci{ 76662306a36Sopenharmony_ci struct xlog *log = mp->m_log; 76762306a36Sopenharmony_ci int error = 0; 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci if (xfs_has_norecovery(mp)) { 77062306a36Sopenharmony_ci ASSERT(xfs_is_readonly(mp)); 77162306a36Sopenharmony_ci return 0; 77262306a36Sopenharmony_ci } 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci /* 77562306a36Sopenharmony_ci * During the second phase of log recovery, we need iget and 77662306a36Sopenharmony_ci * iput to behave like they do for an active filesystem. 77762306a36Sopenharmony_ci * xfs_fs_drop_inode needs to be able to prevent the deletion 77862306a36Sopenharmony_ci * of inodes before we're done replaying log items on those 77962306a36Sopenharmony_ci * inodes. Turn it off immediately after recovery finishes 78062306a36Sopenharmony_ci * so that we don't leak the quota inodes if subsequent mount 78162306a36Sopenharmony_ci * activities fail. 78262306a36Sopenharmony_ci * 78362306a36Sopenharmony_ci * We let all inodes involved in redo item processing end up on 78462306a36Sopenharmony_ci * the LRU instead of being evicted immediately so that if we do 78562306a36Sopenharmony_ci * something to an unlinked inode, the irele won't cause 78662306a36Sopenharmony_ci * premature truncation and freeing of the inode, which results 78762306a36Sopenharmony_ci * in log recovery failure. We have to evict the unreferenced 78862306a36Sopenharmony_ci * lru inodes after clearing SB_ACTIVE because we don't 78962306a36Sopenharmony_ci * otherwise clean up the lru if there's a subsequent failure in 79062306a36Sopenharmony_ci * xfs_mountfs, which leads to us leaking the inodes if nothing 79162306a36Sopenharmony_ci * else (e.g. quotacheck) references the inodes before the 79262306a36Sopenharmony_ci * mount failure occurs. 79362306a36Sopenharmony_ci */ 79462306a36Sopenharmony_ci mp->m_super->s_flags |= SB_ACTIVE; 79562306a36Sopenharmony_ci xfs_log_work_queue(mp); 79662306a36Sopenharmony_ci if (xlog_recovery_needed(log)) 79762306a36Sopenharmony_ci error = xlog_recover_finish(log); 79862306a36Sopenharmony_ci mp->m_super->s_flags &= ~SB_ACTIVE; 79962306a36Sopenharmony_ci evict_inodes(mp->m_super); 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * Drain the buffer LRU after log recovery. This is required for v4 80362306a36Sopenharmony_ci * filesystems to avoid leaving around buffers with NULL verifier ops, 80462306a36Sopenharmony_ci * but we do it unconditionally to make sure we're always in a clean 80562306a36Sopenharmony_ci * cache state after mount. 80662306a36Sopenharmony_ci * 80762306a36Sopenharmony_ci * Don't push in the error case because the AIL may have pending intents 80862306a36Sopenharmony_ci * that aren't removed until recovery is cancelled. 80962306a36Sopenharmony_ci */ 81062306a36Sopenharmony_ci if (xlog_recovery_needed(log)) { 81162306a36Sopenharmony_ci if (!error) { 81262306a36Sopenharmony_ci xfs_log_force(mp, XFS_LOG_SYNC); 81362306a36Sopenharmony_ci xfs_ail_push_all_sync(mp->m_ail); 81462306a36Sopenharmony_ci } 81562306a36Sopenharmony_ci xfs_notice(mp, "Ending recovery (logdev: %s)", 81662306a36Sopenharmony_ci mp->m_logname ? mp->m_logname : "internal"); 81762306a36Sopenharmony_ci } else { 81862306a36Sopenharmony_ci xfs_info(mp, "Ending clean mount"); 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci xfs_buftarg_drain(mp->m_ddev_targp); 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci /* Make sure the log is dead if we're returning failure. */ 82562306a36Sopenharmony_ci ASSERT(!error || xlog_is_shutdown(log)); 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci return error; 82862306a36Sopenharmony_ci} 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci/* 83162306a36Sopenharmony_ci * The mount has failed. Cancel the recovery if it hasn't completed and destroy 83262306a36Sopenharmony_ci * the log. 83362306a36Sopenharmony_ci */ 83462306a36Sopenharmony_civoid 83562306a36Sopenharmony_cixfs_log_mount_cancel( 83662306a36Sopenharmony_ci struct xfs_mount *mp) 83762306a36Sopenharmony_ci{ 83862306a36Sopenharmony_ci xlog_recover_cancel(mp->m_log); 83962306a36Sopenharmony_ci xfs_log_unmount(mp); 84062306a36Sopenharmony_ci} 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci/* 84362306a36Sopenharmony_ci * Flush out the iclog to disk ensuring that device caches are flushed and 84462306a36Sopenharmony_ci * the iclog hits stable storage before any completion waiters are woken. 84562306a36Sopenharmony_ci */ 84662306a36Sopenharmony_cistatic inline int 84762306a36Sopenharmony_cixlog_force_iclog( 84862306a36Sopenharmony_ci struct xlog_in_core *iclog) 84962306a36Sopenharmony_ci{ 85062306a36Sopenharmony_ci atomic_inc(&iclog->ic_refcnt); 85162306a36Sopenharmony_ci iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; 85262306a36Sopenharmony_ci if (iclog->ic_state == XLOG_STATE_ACTIVE) 85362306a36Sopenharmony_ci xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); 85462306a36Sopenharmony_ci return xlog_state_release_iclog(iclog->ic_log, iclog, NULL); 85562306a36Sopenharmony_ci} 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci/* 85862306a36Sopenharmony_ci * Cycle all the iclogbuf locks to make sure all log IO completion 85962306a36Sopenharmony_ci * is done before we tear down these buffers. 86062306a36Sopenharmony_ci */ 86162306a36Sopenharmony_cistatic void 86262306a36Sopenharmony_cixlog_wait_iclog_completion(struct xlog *log) 86362306a36Sopenharmony_ci{ 86462306a36Sopenharmony_ci int i; 86562306a36Sopenharmony_ci struct xlog_in_core *iclog = log->l_iclog; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci for (i = 0; i < log->l_iclog_bufs; i++) { 86862306a36Sopenharmony_ci down(&iclog->ic_sema); 86962306a36Sopenharmony_ci up(&iclog->ic_sema); 87062306a36Sopenharmony_ci iclog = iclog->ic_next; 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci} 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci/* 87562306a36Sopenharmony_ci * Wait for the iclog and all prior iclogs to be written disk as required by the 87662306a36Sopenharmony_ci * log force state machine. Waiting on ic_force_wait ensures iclog completions 87762306a36Sopenharmony_ci * have been ordered and callbacks run before we are woken here, hence 87862306a36Sopenharmony_ci * guaranteeing that all the iclogs up to this one are on stable storage. 87962306a36Sopenharmony_ci */ 88062306a36Sopenharmony_ciint 88162306a36Sopenharmony_cixlog_wait_on_iclog( 88262306a36Sopenharmony_ci struct xlog_in_core *iclog) 88362306a36Sopenharmony_ci __releases(iclog->ic_log->l_icloglock) 88462306a36Sopenharmony_ci{ 88562306a36Sopenharmony_ci struct xlog *log = iclog->ic_log; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci trace_xlog_iclog_wait_on(iclog, _RET_IP_); 88862306a36Sopenharmony_ci if (!xlog_is_shutdown(log) && 88962306a36Sopenharmony_ci iclog->ic_state != XLOG_STATE_ACTIVE && 89062306a36Sopenharmony_ci iclog->ic_state != XLOG_STATE_DIRTY) { 89162306a36Sopenharmony_ci XFS_STATS_INC(log->l_mp, xs_log_force_sleep); 89262306a36Sopenharmony_ci xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 89362306a36Sopenharmony_ci } else { 89462306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 89562306a36Sopenharmony_ci } 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 89862306a36Sopenharmony_ci return -EIO; 89962306a36Sopenharmony_ci return 0; 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci/* 90362306a36Sopenharmony_ci * Write out an unmount record using the ticket provided. We have to account for 90462306a36Sopenharmony_ci * the data space used in the unmount ticket as this write is not done from a 90562306a36Sopenharmony_ci * transaction context that has already done the accounting for us. 90662306a36Sopenharmony_ci */ 90762306a36Sopenharmony_cistatic int 90862306a36Sopenharmony_cixlog_write_unmount_record( 90962306a36Sopenharmony_ci struct xlog *log, 91062306a36Sopenharmony_ci struct xlog_ticket *ticket) 91162306a36Sopenharmony_ci{ 91262306a36Sopenharmony_ci struct { 91362306a36Sopenharmony_ci struct xlog_op_header ophdr; 91462306a36Sopenharmony_ci struct xfs_unmount_log_format ulf; 91562306a36Sopenharmony_ci } unmount_rec = { 91662306a36Sopenharmony_ci .ophdr = { 91762306a36Sopenharmony_ci .oh_clientid = XFS_LOG, 91862306a36Sopenharmony_ci .oh_tid = cpu_to_be32(ticket->t_tid), 91962306a36Sopenharmony_ci .oh_flags = XLOG_UNMOUNT_TRANS, 92062306a36Sopenharmony_ci }, 92162306a36Sopenharmony_ci .ulf = { 92262306a36Sopenharmony_ci .magic = XLOG_UNMOUNT_TYPE, 92362306a36Sopenharmony_ci }, 92462306a36Sopenharmony_ci }; 92562306a36Sopenharmony_ci struct xfs_log_iovec reg = { 92662306a36Sopenharmony_ci .i_addr = &unmount_rec, 92762306a36Sopenharmony_ci .i_len = sizeof(unmount_rec), 92862306a36Sopenharmony_ci .i_type = XLOG_REG_TYPE_UNMOUNT, 92962306a36Sopenharmony_ci }; 93062306a36Sopenharmony_ci struct xfs_log_vec vec = { 93162306a36Sopenharmony_ci .lv_niovecs = 1, 93262306a36Sopenharmony_ci .lv_iovecp = ®, 93362306a36Sopenharmony_ci }; 93462306a36Sopenharmony_ci LIST_HEAD(lv_chain); 93562306a36Sopenharmony_ci list_add(&vec.lv_list, &lv_chain); 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci BUILD_BUG_ON((sizeof(struct xlog_op_header) + 93862306a36Sopenharmony_ci sizeof(struct xfs_unmount_log_format)) != 93962306a36Sopenharmony_ci sizeof(unmount_rec)); 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci /* account for space used by record data */ 94262306a36Sopenharmony_ci ticket->t_curr_res -= sizeof(unmount_rec); 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len); 94562306a36Sopenharmony_ci} 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ci/* 94862306a36Sopenharmony_ci * Mark the filesystem clean by writing an unmount record to the head of the 94962306a36Sopenharmony_ci * log. 95062306a36Sopenharmony_ci */ 95162306a36Sopenharmony_cistatic void 95262306a36Sopenharmony_cixlog_unmount_write( 95362306a36Sopenharmony_ci struct xlog *log) 95462306a36Sopenharmony_ci{ 95562306a36Sopenharmony_ci struct xfs_mount *mp = log->l_mp; 95662306a36Sopenharmony_ci struct xlog_in_core *iclog; 95762306a36Sopenharmony_ci struct xlog_ticket *tic = NULL; 95862306a36Sopenharmony_ci int error; 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci error = xfs_log_reserve(mp, 600, 1, &tic, 0); 96162306a36Sopenharmony_ci if (error) 96262306a36Sopenharmony_ci goto out_err; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci error = xlog_write_unmount_record(log, tic); 96562306a36Sopenharmony_ci /* 96662306a36Sopenharmony_ci * At this point, we're umounting anyway, so there's no point in 96762306a36Sopenharmony_ci * transitioning log state to shutdown. Just continue... 96862306a36Sopenharmony_ci */ 96962306a36Sopenharmony_ciout_err: 97062306a36Sopenharmony_ci if (error) 97162306a36Sopenharmony_ci xfs_alert(mp, "%s: unmount record failed", __func__); 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 97462306a36Sopenharmony_ci iclog = log->l_iclog; 97562306a36Sopenharmony_ci error = xlog_force_iclog(iclog); 97662306a36Sopenharmony_ci xlog_wait_on_iclog(iclog); 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci if (tic) { 97962306a36Sopenharmony_ci trace_xfs_log_umount_write(log, tic); 98062306a36Sopenharmony_ci xfs_log_ticket_ungrant(log, tic); 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci} 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_cistatic void 98562306a36Sopenharmony_cixfs_log_unmount_verify_iclog( 98662306a36Sopenharmony_ci struct xlog *log) 98762306a36Sopenharmony_ci{ 98862306a36Sopenharmony_ci struct xlog_in_core *iclog = log->l_iclog; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci do { 99162306a36Sopenharmony_ci ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 99262306a36Sopenharmony_ci ASSERT(iclog->ic_offset == 0); 99362306a36Sopenharmony_ci } while ((iclog = iclog->ic_next) != log->l_iclog); 99462306a36Sopenharmony_ci} 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci/* 99762306a36Sopenharmony_ci * Unmount record used to have a string "Unmount filesystem--" in the 99862306a36Sopenharmony_ci * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE). 99962306a36Sopenharmony_ci * We just write the magic number now since that particular field isn't 100062306a36Sopenharmony_ci * currently architecture converted and "Unmount" is a bit foo. 100162306a36Sopenharmony_ci * As far as I know, there weren't any dependencies on the old behaviour. 100262306a36Sopenharmony_ci */ 100362306a36Sopenharmony_cistatic void 100462306a36Sopenharmony_cixfs_log_unmount_write( 100562306a36Sopenharmony_ci struct xfs_mount *mp) 100662306a36Sopenharmony_ci{ 100762306a36Sopenharmony_ci struct xlog *log = mp->m_log; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci if (!xfs_log_writable(mp)) 101062306a36Sopenharmony_ci return; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci xfs_log_force(mp, XFS_LOG_SYNC); 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 101562306a36Sopenharmony_ci return; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci /* 101862306a36Sopenharmony_ci * If we think the summary counters are bad, avoid writing the unmount 101962306a36Sopenharmony_ci * record to force log recovery at next mount, after which the summary 102062306a36Sopenharmony_ci * counters will be recalculated. Refer to xlog_check_unmount_rec for 102162306a36Sopenharmony_ci * more details. 102262306a36Sopenharmony_ci */ 102362306a36Sopenharmony_ci if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp, 102462306a36Sopenharmony_ci XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { 102562306a36Sopenharmony_ci xfs_alert(mp, "%s: will fix summary counters at next mount", 102662306a36Sopenharmony_ci __func__); 102762306a36Sopenharmony_ci return; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci xfs_log_unmount_verify_iclog(log); 103162306a36Sopenharmony_ci xlog_unmount_write(log); 103262306a36Sopenharmony_ci} 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci/* 103562306a36Sopenharmony_ci * Empty the log for unmount/freeze. 103662306a36Sopenharmony_ci * 103762306a36Sopenharmony_ci * To do this, we first need to shut down the background log work so it is not 103862306a36Sopenharmony_ci * trying to cover the log as we clean up. We then need to unpin all objects in 103962306a36Sopenharmony_ci * the log so we can then flush them out. Once they have completed their IO and 104062306a36Sopenharmony_ci * run the callbacks removing themselves from the AIL, we can cover the log. 104162306a36Sopenharmony_ci */ 104262306a36Sopenharmony_ciint 104362306a36Sopenharmony_cixfs_log_quiesce( 104462306a36Sopenharmony_ci struct xfs_mount *mp) 104562306a36Sopenharmony_ci{ 104662306a36Sopenharmony_ci /* 104762306a36Sopenharmony_ci * Clear log incompat features since we're quiescing the log. Report 104862306a36Sopenharmony_ci * failures, though it's not fatal to have a higher log feature 104962306a36Sopenharmony_ci * protection level than the log contents actually require. 105062306a36Sopenharmony_ci */ 105162306a36Sopenharmony_ci if (xfs_clear_incompat_log_features(mp)) { 105262306a36Sopenharmony_ci int error; 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci error = xfs_sync_sb(mp, false); 105562306a36Sopenharmony_ci if (error) 105662306a36Sopenharmony_ci xfs_warn(mp, 105762306a36Sopenharmony_ci "Failed to clear log incompat features on quiesce"); 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci cancel_delayed_work_sync(&mp->m_log->l_work); 106162306a36Sopenharmony_ci xfs_log_force(mp, XFS_LOG_SYNC); 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci /* 106462306a36Sopenharmony_ci * The superblock buffer is uncached and while xfs_ail_push_all_sync() 106562306a36Sopenharmony_ci * will push it, xfs_buftarg_wait() will not wait for it. Further, 106662306a36Sopenharmony_ci * xfs_buf_iowait() cannot be used because it was pushed with the 106762306a36Sopenharmony_ci * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for 106862306a36Sopenharmony_ci * the IO to complete. 106962306a36Sopenharmony_ci */ 107062306a36Sopenharmony_ci xfs_ail_push_all_sync(mp->m_ail); 107162306a36Sopenharmony_ci xfs_buftarg_wait(mp->m_ddev_targp); 107262306a36Sopenharmony_ci xfs_buf_lock(mp->m_sb_bp); 107362306a36Sopenharmony_ci xfs_buf_unlock(mp->m_sb_bp); 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci return xfs_log_cover(mp); 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_civoid 107962306a36Sopenharmony_cixfs_log_clean( 108062306a36Sopenharmony_ci struct xfs_mount *mp) 108162306a36Sopenharmony_ci{ 108262306a36Sopenharmony_ci xfs_log_quiesce(mp); 108362306a36Sopenharmony_ci xfs_log_unmount_write(mp); 108462306a36Sopenharmony_ci} 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci/* 108762306a36Sopenharmony_ci * Shut down and release the AIL and Log. 108862306a36Sopenharmony_ci * 108962306a36Sopenharmony_ci * During unmount, we need to ensure we flush all the dirty metadata objects 109062306a36Sopenharmony_ci * from the AIL so that the log is empty before we write the unmount record to 109162306a36Sopenharmony_ci * the log. Once this is done, we can tear down the AIL and the log. 109262306a36Sopenharmony_ci */ 109362306a36Sopenharmony_civoid 109462306a36Sopenharmony_cixfs_log_unmount( 109562306a36Sopenharmony_ci struct xfs_mount *mp) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci xfs_log_clean(mp); 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci /* 110062306a36Sopenharmony_ci * If shutdown has come from iclog IO context, the log 110162306a36Sopenharmony_ci * cleaning will have been skipped and so we need to wait 110262306a36Sopenharmony_ci * for the iclog to complete shutdown processing before we 110362306a36Sopenharmony_ci * tear anything down. 110462306a36Sopenharmony_ci */ 110562306a36Sopenharmony_ci xlog_wait_iclog_completion(mp->m_log); 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci xfs_buftarg_drain(mp->m_ddev_targp); 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci xfs_trans_ail_destroy(mp); 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci xfs_sysfs_del(&mp->m_log->l_kobj); 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci xlog_dealloc_log(mp->m_log); 111462306a36Sopenharmony_ci} 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_civoid 111762306a36Sopenharmony_cixfs_log_item_init( 111862306a36Sopenharmony_ci struct xfs_mount *mp, 111962306a36Sopenharmony_ci struct xfs_log_item *item, 112062306a36Sopenharmony_ci int type, 112162306a36Sopenharmony_ci const struct xfs_item_ops *ops) 112262306a36Sopenharmony_ci{ 112362306a36Sopenharmony_ci item->li_log = mp->m_log; 112462306a36Sopenharmony_ci item->li_ailp = mp->m_ail; 112562306a36Sopenharmony_ci item->li_type = type; 112662306a36Sopenharmony_ci item->li_ops = ops; 112762306a36Sopenharmony_ci item->li_lv = NULL; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci INIT_LIST_HEAD(&item->li_ail); 113062306a36Sopenharmony_ci INIT_LIST_HEAD(&item->li_cil); 113162306a36Sopenharmony_ci INIT_LIST_HEAD(&item->li_bio_list); 113262306a36Sopenharmony_ci INIT_LIST_HEAD(&item->li_trans); 113362306a36Sopenharmony_ci} 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci/* 113662306a36Sopenharmony_ci * Wake up processes waiting for log space after we have moved the log tail. 113762306a36Sopenharmony_ci */ 113862306a36Sopenharmony_civoid 113962306a36Sopenharmony_cixfs_log_space_wake( 114062306a36Sopenharmony_ci struct xfs_mount *mp) 114162306a36Sopenharmony_ci{ 114262306a36Sopenharmony_ci struct xlog *log = mp->m_log; 114362306a36Sopenharmony_ci int free_bytes; 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 114662306a36Sopenharmony_ci return; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci if (!list_empty_careful(&log->l_write_head.waiters)) { 114962306a36Sopenharmony_ci ASSERT(!xlog_in_recovery(log)); 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci spin_lock(&log->l_write_head.lock); 115262306a36Sopenharmony_ci free_bytes = xlog_space_left(log, &log->l_write_head.grant); 115362306a36Sopenharmony_ci xlog_grant_head_wake(log, &log->l_write_head, &free_bytes); 115462306a36Sopenharmony_ci spin_unlock(&log->l_write_head.lock); 115562306a36Sopenharmony_ci } 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci if (!list_empty_careful(&log->l_reserve_head.waiters)) { 115862306a36Sopenharmony_ci ASSERT(!xlog_in_recovery(log)); 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci spin_lock(&log->l_reserve_head.lock); 116162306a36Sopenharmony_ci free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); 116262306a36Sopenharmony_ci xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes); 116362306a36Sopenharmony_ci spin_unlock(&log->l_reserve_head.lock); 116462306a36Sopenharmony_ci } 116562306a36Sopenharmony_ci} 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci/* 116862306a36Sopenharmony_ci * Determine if we have a transaction that has gone to disk that needs to be 116962306a36Sopenharmony_ci * covered. To begin the transition to the idle state firstly the log needs to 117062306a36Sopenharmony_ci * be idle. That means the CIL, the AIL and the iclogs needs to be empty before 117162306a36Sopenharmony_ci * we start attempting to cover the log. 117262306a36Sopenharmony_ci * 117362306a36Sopenharmony_ci * Only if we are then in a state where covering is needed, the caller is 117462306a36Sopenharmony_ci * informed that dummy transactions are required to move the log into the idle 117562306a36Sopenharmony_ci * state. 117662306a36Sopenharmony_ci * 117762306a36Sopenharmony_ci * If there are any items in the AIl or CIL, then we do not want to attempt to 117862306a36Sopenharmony_ci * cover the log as we may be in a situation where there isn't log space 117962306a36Sopenharmony_ci * available to run a dummy transaction and this can lead to deadlocks when the 118062306a36Sopenharmony_ci * tail of the log is pinned by an item that is modified in the CIL. Hence 118162306a36Sopenharmony_ci * there's no point in running a dummy transaction at this point because we 118262306a36Sopenharmony_ci * can't start trying to idle the log until both the CIL and AIL are empty. 118362306a36Sopenharmony_ci */ 118462306a36Sopenharmony_cistatic bool 118562306a36Sopenharmony_cixfs_log_need_covered( 118662306a36Sopenharmony_ci struct xfs_mount *mp) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci struct xlog *log = mp->m_log; 118962306a36Sopenharmony_ci bool needed = false; 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci if (!xlog_cil_empty(log)) 119262306a36Sopenharmony_ci return false; 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 119562306a36Sopenharmony_ci switch (log->l_covered_state) { 119662306a36Sopenharmony_ci case XLOG_STATE_COVER_DONE: 119762306a36Sopenharmony_ci case XLOG_STATE_COVER_DONE2: 119862306a36Sopenharmony_ci case XLOG_STATE_COVER_IDLE: 119962306a36Sopenharmony_ci break; 120062306a36Sopenharmony_ci case XLOG_STATE_COVER_NEED: 120162306a36Sopenharmony_ci case XLOG_STATE_COVER_NEED2: 120262306a36Sopenharmony_ci if (xfs_ail_min_lsn(log->l_ailp)) 120362306a36Sopenharmony_ci break; 120462306a36Sopenharmony_ci if (!xlog_iclogs_empty(log)) 120562306a36Sopenharmony_ci break; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci needed = true; 120862306a36Sopenharmony_ci if (log->l_covered_state == XLOG_STATE_COVER_NEED) 120962306a36Sopenharmony_ci log->l_covered_state = XLOG_STATE_COVER_DONE; 121062306a36Sopenharmony_ci else 121162306a36Sopenharmony_ci log->l_covered_state = XLOG_STATE_COVER_DONE2; 121262306a36Sopenharmony_ci break; 121362306a36Sopenharmony_ci default: 121462306a36Sopenharmony_ci needed = true; 121562306a36Sopenharmony_ci break; 121662306a36Sopenharmony_ci } 121762306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 121862306a36Sopenharmony_ci return needed; 121962306a36Sopenharmony_ci} 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci/* 122262306a36Sopenharmony_ci * Explicitly cover the log. This is similar to background log covering but 122362306a36Sopenharmony_ci * intended for usage in quiesce codepaths. The caller is responsible to ensure 122462306a36Sopenharmony_ci * the log is idle and suitable for covering. The CIL, iclog buffers and AIL 122562306a36Sopenharmony_ci * must all be empty. 122662306a36Sopenharmony_ci */ 122762306a36Sopenharmony_cistatic int 122862306a36Sopenharmony_cixfs_log_cover( 122962306a36Sopenharmony_ci struct xfs_mount *mp) 123062306a36Sopenharmony_ci{ 123162306a36Sopenharmony_ci int error = 0; 123262306a36Sopenharmony_ci bool need_covered; 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci ASSERT((xlog_cil_empty(mp->m_log) && xlog_iclogs_empty(mp->m_log) && 123562306a36Sopenharmony_ci !xfs_ail_min_lsn(mp->m_log->l_ailp)) || 123662306a36Sopenharmony_ci xlog_is_shutdown(mp->m_log)); 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci if (!xfs_log_writable(mp)) 123962306a36Sopenharmony_ci return 0; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci /* 124262306a36Sopenharmony_ci * xfs_log_need_covered() is not idempotent because it progresses the 124362306a36Sopenharmony_ci * state machine if the log requires covering. Therefore, we must call 124462306a36Sopenharmony_ci * this function once and use the result until we've issued an sb sync. 124562306a36Sopenharmony_ci * Do so first to make that abundantly clear. 124662306a36Sopenharmony_ci * 124762306a36Sopenharmony_ci * Fall into the covering sequence if the log needs covering or the 124862306a36Sopenharmony_ci * mount has lazy superblock accounting to sync to disk. The sb sync 124962306a36Sopenharmony_ci * used for covering accumulates the in-core counters, so covering 125062306a36Sopenharmony_ci * handles this for us. 125162306a36Sopenharmony_ci */ 125262306a36Sopenharmony_ci need_covered = xfs_log_need_covered(mp); 125362306a36Sopenharmony_ci if (!need_covered && !xfs_has_lazysbcount(mp)) 125462306a36Sopenharmony_ci return 0; 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci /* 125762306a36Sopenharmony_ci * To cover the log, commit the superblock twice (at most) in 125862306a36Sopenharmony_ci * independent checkpoints. The first serves as a reference for the 125962306a36Sopenharmony_ci * tail pointer. The sync transaction and AIL push empties the AIL and 126062306a36Sopenharmony_ci * updates the in-core tail to the LSN of the first checkpoint. The 126162306a36Sopenharmony_ci * second commit updates the on-disk tail with the in-core LSN, 126262306a36Sopenharmony_ci * covering the log. Push the AIL one more time to leave it empty, as 126362306a36Sopenharmony_ci * we found it. 126462306a36Sopenharmony_ci */ 126562306a36Sopenharmony_ci do { 126662306a36Sopenharmony_ci error = xfs_sync_sb(mp, true); 126762306a36Sopenharmony_ci if (error) 126862306a36Sopenharmony_ci break; 126962306a36Sopenharmony_ci xfs_ail_push_all_sync(mp->m_ail); 127062306a36Sopenharmony_ci } while (xfs_log_need_covered(mp)); 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci return error; 127362306a36Sopenharmony_ci} 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci/* 127662306a36Sopenharmony_ci * We may be holding the log iclog lock upon entering this routine. 127762306a36Sopenharmony_ci */ 127862306a36Sopenharmony_cixfs_lsn_t 127962306a36Sopenharmony_cixlog_assign_tail_lsn_locked( 128062306a36Sopenharmony_ci struct xfs_mount *mp) 128162306a36Sopenharmony_ci{ 128262306a36Sopenharmony_ci struct xlog *log = mp->m_log; 128362306a36Sopenharmony_ci struct xfs_log_item *lip; 128462306a36Sopenharmony_ci xfs_lsn_t tail_lsn; 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci assert_spin_locked(&mp->m_ail->ail_lock); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci /* 128962306a36Sopenharmony_ci * To make sure we always have a valid LSN for the log tail we keep 129062306a36Sopenharmony_ci * track of the last LSN which was committed in log->l_last_sync_lsn, 129162306a36Sopenharmony_ci * and use that when the AIL was empty. 129262306a36Sopenharmony_ci */ 129362306a36Sopenharmony_ci lip = xfs_ail_min(mp->m_ail); 129462306a36Sopenharmony_ci if (lip) 129562306a36Sopenharmony_ci tail_lsn = lip->li_lsn; 129662306a36Sopenharmony_ci else 129762306a36Sopenharmony_ci tail_lsn = atomic64_read(&log->l_last_sync_lsn); 129862306a36Sopenharmony_ci trace_xfs_log_assign_tail_lsn(log, tail_lsn); 129962306a36Sopenharmony_ci atomic64_set(&log->l_tail_lsn, tail_lsn); 130062306a36Sopenharmony_ci return tail_lsn; 130162306a36Sopenharmony_ci} 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_cixfs_lsn_t 130462306a36Sopenharmony_cixlog_assign_tail_lsn( 130562306a36Sopenharmony_ci struct xfs_mount *mp) 130662306a36Sopenharmony_ci{ 130762306a36Sopenharmony_ci xfs_lsn_t tail_lsn; 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci spin_lock(&mp->m_ail->ail_lock); 131062306a36Sopenharmony_ci tail_lsn = xlog_assign_tail_lsn_locked(mp); 131162306a36Sopenharmony_ci spin_unlock(&mp->m_ail->ail_lock); 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci return tail_lsn; 131462306a36Sopenharmony_ci} 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci/* 131762306a36Sopenharmony_ci * Return the space in the log between the tail and the head. The head 131862306a36Sopenharmony_ci * is passed in the cycle/bytes formal parms. In the special case where 131962306a36Sopenharmony_ci * the reserve head has wrapped passed the tail, this calculation is no 132062306a36Sopenharmony_ci * longer valid. In this case, just return 0 which means there is no space 132162306a36Sopenharmony_ci * in the log. This works for all places where this function is called 132262306a36Sopenharmony_ci * with the reserve head. Of course, if the write head were to ever 132362306a36Sopenharmony_ci * wrap the tail, we should blow up. Rather than catch this case here, 132462306a36Sopenharmony_ci * we depend on other ASSERTions in other parts of the code. XXXmiken 132562306a36Sopenharmony_ci * 132662306a36Sopenharmony_ci * If reservation head is behind the tail, we have a problem. Warn about it, 132762306a36Sopenharmony_ci * but then treat it as if the log is empty. 132862306a36Sopenharmony_ci * 132962306a36Sopenharmony_ci * If the log is shut down, the head and tail may be invalid or out of whack, so 133062306a36Sopenharmony_ci * shortcut invalidity asserts in this case so that we don't trigger them 133162306a36Sopenharmony_ci * falsely. 133262306a36Sopenharmony_ci */ 133362306a36Sopenharmony_ciSTATIC int 133462306a36Sopenharmony_cixlog_space_left( 133562306a36Sopenharmony_ci struct xlog *log, 133662306a36Sopenharmony_ci atomic64_t *head) 133762306a36Sopenharmony_ci{ 133862306a36Sopenharmony_ci int tail_bytes; 133962306a36Sopenharmony_ci int tail_cycle; 134062306a36Sopenharmony_ci int head_cycle; 134162306a36Sopenharmony_ci int head_bytes; 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_ci xlog_crack_grant_head(head, &head_cycle, &head_bytes); 134462306a36Sopenharmony_ci xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); 134562306a36Sopenharmony_ci tail_bytes = BBTOB(tail_bytes); 134662306a36Sopenharmony_ci if (tail_cycle == head_cycle && head_bytes >= tail_bytes) 134762306a36Sopenharmony_ci return log->l_logsize - (head_bytes - tail_bytes); 134862306a36Sopenharmony_ci if (tail_cycle + 1 < head_cycle) 134962306a36Sopenharmony_ci return 0; 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci /* Ignore potential inconsistency when shutdown. */ 135262306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 135362306a36Sopenharmony_ci return log->l_logsize; 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci if (tail_cycle < head_cycle) { 135662306a36Sopenharmony_ci ASSERT(tail_cycle == (head_cycle - 1)); 135762306a36Sopenharmony_ci return tail_bytes - head_bytes; 135862306a36Sopenharmony_ci } 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci /* 136162306a36Sopenharmony_ci * The reservation head is behind the tail. In this case we just want to 136262306a36Sopenharmony_ci * return the size of the log as the amount of space left. 136362306a36Sopenharmony_ci */ 136462306a36Sopenharmony_ci xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); 136562306a36Sopenharmony_ci xfs_alert(log->l_mp, " tail_cycle = %d, tail_bytes = %d", 136662306a36Sopenharmony_ci tail_cycle, tail_bytes); 136762306a36Sopenharmony_ci xfs_alert(log->l_mp, " GH cycle = %d, GH bytes = %d", 136862306a36Sopenharmony_ci head_cycle, head_bytes); 136962306a36Sopenharmony_ci ASSERT(0); 137062306a36Sopenharmony_ci return log->l_logsize; 137162306a36Sopenharmony_ci} 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_cistatic void 137562306a36Sopenharmony_cixlog_ioend_work( 137662306a36Sopenharmony_ci struct work_struct *work) 137762306a36Sopenharmony_ci{ 137862306a36Sopenharmony_ci struct xlog_in_core *iclog = 137962306a36Sopenharmony_ci container_of(work, struct xlog_in_core, ic_end_io_work); 138062306a36Sopenharmony_ci struct xlog *log = iclog->ic_log; 138162306a36Sopenharmony_ci int error; 138262306a36Sopenharmony_ci 138362306a36Sopenharmony_ci error = blk_status_to_errno(iclog->ic_bio.bi_status); 138462306a36Sopenharmony_ci#ifdef DEBUG 138562306a36Sopenharmony_ci /* treat writes with injected CRC errors as failed */ 138662306a36Sopenharmony_ci if (iclog->ic_fail_crc) 138762306a36Sopenharmony_ci error = -EIO; 138862306a36Sopenharmony_ci#endif 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci /* 139162306a36Sopenharmony_ci * Race to shutdown the filesystem if we see an error. 139262306a36Sopenharmony_ci */ 139362306a36Sopenharmony_ci if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { 139462306a36Sopenharmony_ci xfs_alert(log->l_mp, "log I/O error %d", error); 139562306a36Sopenharmony_ci xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); 139662306a36Sopenharmony_ci } 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci xlog_state_done_syncing(iclog); 139962306a36Sopenharmony_ci bio_uninit(&iclog->ic_bio); 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci /* 140262306a36Sopenharmony_ci * Drop the lock to signal that we are done. Nothing references the 140362306a36Sopenharmony_ci * iclog after this, so an unmount waiting on this lock can now tear it 140462306a36Sopenharmony_ci * down safely. As such, it is unsafe to reference the iclog after the 140562306a36Sopenharmony_ci * unlock as we could race with it being freed. 140662306a36Sopenharmony_ci */ 140762306a36Sopenharmony_ci up(&iclog->ic_sema); 140862306a36Sopenharmony_ci} 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci/* 141162306a36Sopenharmony_ci * Return size of each in-core log record buffer. 141262306a36Sopenharmony_ci * 141362306a36Sopenharmony_ci * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 141462306a36Sopenharmony_ci * 141562306a36Sopenharmony_ci * If the filesystem blocksize is too large, we may need to choose a 141662306a36Sopenharmony_ci * larger size since the directory code currently logs entire blocks. 141762306a36Sopenharmony_ci */ 141862306a36Sopenharmony_ciSTATIC void 141962306a36Sopenharmony_cixlog_get_iclog_buffer_size( 142062306a36Sopenharmony_ci struct xfs_mount *mp, 142162306a36Sopenharmony_ci struct xlog *log) 142262306a36Sopenharmony_ci{ 142362306a36Sopenharmony_ci if (mp->m_logbufs <= 0) 142462306a36Sopenharmony_ci mp->m_logbufs = XLOG_MAX_ICLOGS; 142562306a36Sopenharmony_ci if (mp->m_logbsize <= 0) 142662306a36Sopenharmony_ci mp->m_logbsize = XLOG_BIG_RECORD_BSIZE; 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci log->l_iclog_bufs = mp->m_logbufs; 142962306a36Sopenharmony_ci log->l_iclog_size = mp->m_logbsize; 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci /* 143262306a36Sopenharmony_ci * # headers = size / 32k - one header holds cycles from 32k of data. 143362306a36Sopenharmony_ci */ 143462306a36Sopenharmony_ci log->l_iclog_heads = 143562306a36Sopenharmony_ci DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE); 143662306a36Sopenharmony_ci log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT; 143762306a36Sopenharmony_ci} 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_civoid 144062306a36Sopenharmony_cixfs_log_work_queue( 144162306a36Sopenharmony_ci struct xfs_mount *mp) 144262306a36Sopenharmony_ci{ 144362306a36Sopenharmony_ci queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work, 144462306a36Sopenharmony_ci msecs_to_jiffies(xfs_syncd_centisecs * 10)); 144562306a36Sopenharmony_ci} 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci/* 144862306a36Sopenharmony_ci * Clear the log incompat flags if we have the opportunity. 144962306a36Sopenharmony_ci * 145062306a36Sopenharmony_ci * This only happens if we're about to log the second dummy transaction as part 145162306a36Sopenharmony_ci * of covering the log and we can get the log incompat feature usage lock. 145262306a36Sopenharmony_ci */ 145362306a36Sopenharmony_cistatic inline void 145462306a36Sopenharmony_cixlog_clear_incompat( 145562306a36Sopenharmony_ci struct xlog *log) 145662306a36Sopenharmony_ci{ 145762306a36Sopenharmony_ci struct xfs_mount *mp = log->l_mp; 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci if (!xfs_sb_has_incompat_log_feature(&mp->m_sb, 146062306a36Sopenharmony_ci XFS_SB_FEAT_INCOMPAT_LOG_ALL)) 146162306a36Sopenharmony_ci return; 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci if (log->l_covered_state != XLOG_STATE_COVER_DONE2) 146462306a36Sopenharmony_ci return; 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci if (!down_write_trylock(&log->l_incompat_users)) 146762306a36Sopenharmony_ci return; 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_ci xfs_clear_incompat_log_features(mp); 147062306a36Sopenharmony_ci up_write(&log->l_incompat_users); 147162306a36Sopenharmony_ci} 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci/* 147462306a36Sopenharmony_ci * Every sync period we need to unpin all items in the AIL and push them to 147562306a36Sopenharmony_ci * disk. If there is nothing dirty, then we might need to cover the log to 147662306a36Sopenharmony_ci * indicate that the filesystem is idle. 147762306a36Sopenharmony_ci */ 147862306a36Sopenharmony_cistatic void 147962306a36Sopenharmony_cixfs_log_worker( 148062306a36Sopenharmony_ci struct work_struct *work) 148162306a36Sopenharmony_ci{ 148262306a36Sopenharmony_ci struct xlog *log = container_of(to_delayed_work(work), 148362306a36Sopenharmony_ci struct xlog, l_work); 148462306a36Sopenharmony_ci struct xfs_mount *mp = log->l_mp; 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci /* dgc: errors ignored - not fatal and nowhere to report them */ 148762306a36Sopenharmony_ci if (xfs_fs_writable(mp, SB_FREEZE_WRITE) && xfs_log_need_covered(mp)) { 148862306a36Sopenharmony_ci /* 148962306a36Sopenharmony_ci * Dump a transaction into the log that contains no real change. 149062306a36Sopenharmony_ci * This is needed to stamp the current tail LSN into the log 149162306a36Sopenharmony_ci * during the covering operation. 149262306a36Sopenharmony_ci * 149362306a36Sopenharmony_ci * We cannot use an inode here for this - that will push dirty 149462306a36Sopenharmony_ci * state back up into the VFS and then periodic inode flushing 149562306a36Sopenharmony_ci * will prevent log covering from making progress. Hence we 149662306a36Sopenharmony_ci * synchronously log the superblock instead to ensure the 149762306a36Sopenharmony_ci * superblock is immediately unpinned and can be written back. 149862306a36Sopenharmony_ci */ 149962306a36Sopenharmony_ci xlog_clear_incompat(log); 150062306a36Sopenharmony_ci xfs_sync_sb(mp, true); 150162306a36Sopenharmony_ci } else 150262306a36Sopenharmony_ci xfs_log_force(mp, 0); 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci /* start pushing all the metadata that is currently dirty */ 150562306a36Sopenharmony_ci xfs_ail_push_all(mp->m_ail); 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci /* queue us up again */ 150862306a36Sopenharmony_ci xfs_log_work_queue(mp); 150962306a36Sopenharmony_ci} 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci/* 151262306a36Sopenharmony_ci * This routine initializes some of the log structure for a given mount point. 151362306a36Sopenharmony_ci * Its primary purpose is to fill in enough, so recovery can occur. However, 151462306a36Sopenharmony_ci * some other stuff may be filled in too. 151562306a36Sopenharmony_ci */ 151662306a36Sopenharmony_ciSTATIC struct xlog * 151762306a36Sopenharmony_cixlog_alloc_log( 151862306a36Sopenharmony_ci struct xfs_mount *mp, 151962306a36Sopenharmony_ci struct xfs_buftarg *log_target, 152062306a36Sopenharmony_ci xfs_daddr_t blk_offset, 152162306a36Sopenharmony_ci int num_bblks) 152262306a36Sopenharmony_ci{ 152362306a36Sopenharmony_ci struct xlog *log; 152462306a36Sopenharmony_ci xlog_rec_header_t *head; 152562306a36Sopenharmony_ci xlog_in_core_t **iclogp; 152662306a36Sopenharmony_ci xlog_in_core_t *iclog, *prev_iclog=NULL; 152762306a36Sopenharmony_ci int i; 152862306a36Sopenharmony_ci int error = -ENOMEM; 152962306a36Sopenharmony_ci uint log2_size = 0; 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL); 153262306a36Sopenharmony_ci if (!log) { 153362306a36Sopenharmony_ci xfs_warn(mp, "Log allocation failed: No memory!"); 153462306a36Sopenharmony_ci goto out; 153562306a36Sopenharmony_ci } 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci log->l_mp = mp; 153862306a36Sopenharmony_ci log->l_targ = log_target; 153962306a36Sopenharmony_ci log->l_logsize = BBTOB(num_bblks); 154062306a36Sopenharmony_ci log->l_logBBstart = blk_offset; 154162306a36Sopenharmony_ci log->l_logBBsize = num_bblks; 154262306a36Sopenharmony_ci log->l_covered_state = XLOG_STATE_COVER_IDLE; 154362306a36Sopenharmony_ci set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); 154462306a36Sopenharmony_ci INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci log->l_prev_block = -1; 154762306a36Sopenharmony_ci /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ 154862306a36Sopenharmony_ci xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); 154962306a36Sopenharmony_ci xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); 155062306a36Sopenharmony_ci log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) 155362306a36Sopenharmony_ci log->l_iclog_roundoff = mp->m_sb.sb_logsunit; 155462306a36Sopenharmony_ci else 155562306a36Sopenharmony_ci log->l_iclog_roundoff = BBSIZE; 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci xlog_grant_head_init(&log->l_reserve_head); 155862306a36Sopenharmony_ci xlog_grant_head_init(&log->l_write_head); 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci error = -EFSCORRUPTED; 156162306a36Sopenharmony_ci if (xfs_has_sector(mp)) { 156262306a36Sopenharmony_ci log2_size = mp->m_sb.sb_logsectlog; 156362306a36Sopenharmony_ci if (log2_size < BBSHIFT) { 156462306a36Sopenharmony_ci xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)", 156562306a36Sopenharmony_ci log2_size, BBSHIFT); 156662306a36Sopenharmony_ci goto out_free_log; 156762306a36Sopenharmony_ci } 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci log2_size -= BBSHIFT; 157062306a36Sopenharmony_ci if (log2_size > mp->m_sectbb_log) { 157162306a36Sopenharmony_ci xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)", 157262306a36Sopenharmony_ci log2_size, mp->m_sectbb_log); 157362306a36Sopenharmony_ci goto out_free_log; 157462306a36Sopenharmony_ci } 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci /* for larger sector sizes, must have v2 or external log */ 157762306a36Sopenharmony_ci if (log2_size && log->l_logBBstart > 0 && 157862306a36Sopenharmony_ci !xfs_has_logv2(mp)) { 157962306a36Sopenharmony_ci xfs_warn(mp, 158062306a36Sopenharmony_ci "log sector size (0x%x) invalid for configuration.", 158162306a36Sopenharmony_ci log2_size); 158262306a36Sopenharmony_ci goto out_free_log; 158362306a36Sopenharmony_ci } 158462306a36Sopenharmony_ci } 158562306a36Sopenharmony_ci log->l_sectBBsize = 1 << log2_size; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci init_rwsem(&log->l_incompat_users); 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci xlog_get_iclog_buffer_size(mp, log); 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci spin_lock_init(&log->l_icloglock); 159262306a36Sopenharmony_ci init_waitqueue_head(&log->l_flush_wait); 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci iclogp = &log->l_iclog; 159562306a36Sopenharmony_ci /* 159662306a36Sopenharmony_ci * The amount of memory to allocate for the iclog structure is 159762306a36Sopenharmony_ci * rather funky due to the way the structure is defined. It is 159862306a36Sopenharmony_ci * done this way so that we can use different sizes for machines 159962306a36Sopenharmony_ci * with different amounts of memory. See the definition of 160062306a36Sopenharmony_ci * xlog_in_core_t in xfs_log_priv.h for details. 160162306a36Sopenharmony_ci */ 160262306a36Sopenharmony_ci ASSERT(log->l_iclog_size >= 4096); 160362306a36Sopenharmony_ci for (i = 0; i < log->l_iclog_bufs; i++) { 160462306a36Sopenharmony_ci size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * 160562306a36Sopenharmony_ci sizeof(struct bio_vec); 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL); 160862306a36Sopenharmony_ci if (!iclog) 160962306a36Sopenharmony_ci goto out_free_iclog; 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_ci *iclogp = iclog; 161262306a36Sopenharmony_ci iclog->ic_prev = prev_iclog; 161362306a36Sopenharmony_ci prev_iclog = iclog; 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci iclog->ic_data = kvzalloc(log->l_iclog_size, 161662306a36Sopenharmony_ci GFP_KERNEL | __GFP_RETRY_MAYFAIL); 161762306a36Sopenharmony_ci if (!iclog->ic_data) 161862306a36Sopenharmony_ci goto out_free_iclog; 161962306a36Sopenharmony_ci head = &iclog->ic_header; 162062306a36Sopenharmony_ci memset(head, 0, sizeof(xlog_rec_header_t)); 162162306a36Sopenharmony_ci head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 162262306a36Sopenharmony_ci head->h_version = cpu_to_be32( 162362306a36Sopenharmony_ci xfs_has_logv2(log->l_mp) ? 2 : 1); 162462306a36Sopenharmony_ci head->h_size = cpu_to_be32(log->l_iclog_size); 162562306a36Sopenharmony_ci /* new fields */ 162662306a36Sopenharmony_ci head->h_fmt = cpu_to_be32(XLOG_FMT); 162762306a36Sopenharmony_ci memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize; 163062306a36Sopenharmony_ci iclog->ic_state = XLOG_STATE_ACTIVE; 163162306a36Sopenharmony_ci iclog->ic_log = log; 163262306a36Sopenharmony_ci atomic_set(&iclog->ic_refcnt, 0); 163362306a36Sopenharmony_ci INIT_LIST_HEAD(&iclog->ic_callbacks); 163462306a36Sopenharmony_ci iclog->ic_datap = (void *)iclog->ic_data + log->l_iclog_hsize; 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci init_waitqueue_head(&iclog->ic_force_wait); 163762306a36Sopenharmony_ci init_waitqueue_head(&iclog->ic_write_wait); 163862306a36Sopenharmony_ci INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work); 163962306a36Sopenharmony_ci sema_init(&iclog->ic_sema, 1); 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci iclogp = &iclog->ic_next; 164262306a36Sopenharmony_ci } 164362306a36Sopenharmony_ci *iclogp = log->l_iclog; /* complete ring */ 164462306a36Sopenharmony_ci log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", 164762306a36Sopenharmony_ci XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | 164862306a36Sopenharmony_ci WQ_HIGHPRI), 164962306a36Sopenharmony_ci 0, mp->m_super->s_id); 165062306a36Sopenharmony_ci if (!log->l_ioend_workqueue) 165162306a36Sopenharmony_ci goto out_free_iclog; 165262306a36Sopenharmony_ci 165362306a36Sopenharmony_ci error = xlog_cil_init(log); 165462306a36Sopenharmony_ci if (error) 165562306a36Sopenharmony_ci goto out_destroy_workqueue; 165662306a36Sopenharmony_ci return log; 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ciout_destroy_workqueue: 165962306a36Sopenharmony_ci destroy_workqueue(log->l_ioend_workqueue); 166062306a36Sopenharmony_ciout_free_iclog: 166162306a36Sopenharmony_ci for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { 166262306a36Sopenharmony_ci prev_iclog = iclog->ic_next; 166362306a36Sopenharmony_ci kmem_free(iclog->ic_data); 166462306a36Sopenharmony_ci kmem_free(iclog); 166562306a36Sopenharmony_ci if (prev_iclog == log->l_iclog) 166662306a36Sopenharmony_ci break; 166762306a36Sopenharmony_ci } 166862306a36Sopenharmony_ciout_free_log: 166962306a36Sopenharmony_ci kmem_free(log); 167062306a36Sopenharmony_ciout: 167162306a36Sopenharmony_ci return ERR_PTR(error); 167262306a36Sopenharmony_ci} /* xlog_alloc_log */ 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci/* 167562306a36Sopenharmony_ci * Compute the LSN that we'd need to push the log tail towards in order to have 167662306a36Sopenharmony_ci * (a) enough on-disk log space to log the number of bytes specified, (b) at 167762306a36Sopenharmony_ci * least 25% of the log space free, and (c) at least 256 blocks free. If the 167862306a36Sopenharmony_ci * log free space already meets all three thresholds, this function returns 167962306a36Sopenharmony_ci * NULLCOMMITLSN. 168062306a36Sopenharmony_ci */ 168162306a36Sopenharmony_cixfs_lsn_t 168262306a36Sopenharmony_cixlog_grant_push_threshold( 168362306a36Sopenharmony_ci struct xlog *log, 168462306a36Sopenharmony_ci int need_bytes) 168562306a36Sopenharmony_ci{ 168662306a36Sopenharmony_ci xfs_lsn_t threshold_lsn = 0; 168762306a36Sopenharmony_ci xfs_lsn_t last_sync_lsn; 168862306a36Sopenharmony_ci int free_blocks; 168962306a36Sopenharmony_ci int free_bytes; 169062306a36Sopenharmony_ci int threshold_block; 169162306a36Sopenharmony_ci int threshold_cycle; 169262306a36Sopenharmony_ci int free_threshold; 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); 169762306a36Sopenharmony_ci free_blocks = BTOBBT(free_bytes); 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci /* 170062306a36Sopenharmony_ci * Set the threshold for the minimum number of free blocks in the 170162306a36Sopenharmony_ci * log to the maximum of what the caller needs, one quarter of the 170262306a36Sopenharmony_ci * log, and 256 blocks. 170362306a36Sopenharmony_ci */ 170462306a36Sopenharmony_ci free_threshold = BTOBB(need_bytes); 170562306a36Sopenharmony_ci free_threshold = max(free_threshold, (log->l_logBBsize >> 2)); 170662306a36Sopenharmony_ci free_threshold = max(free_threshold, 256); 170762306a36Sopenharmony_ci if (free_blocks >= free_threshold) 170862306a36Sopenharmony_ci return NULLCOMMITLSN; 170962306a36Sopenharmony_ci 171062306a36Sopenharmony_ci xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, 171162306a36Sopenharmony_ci &threshold_block); 171262306a36Sopenharmony_ci threshold_block += free_threshold; 171362306a36Sopenharmony_ci if (threshold_block >= log->l_logBBsize) { 171462306a36Sopenharmony_ci threshold_block -= log->l_logBBsize; 171562306a36Sopenharmony_ci threshold_cycle += 1; 171662306a36Sopenharmony_ci } 171762306a36Sopenharmony_ci threshold_lsn = xlog_assign_lsn(threshold_cycle, 171862306a36Sopenharmony_ci threshold_block); 171962306a36Sopenharmony_ci /* 172062306a36Sopenharmony_ci * Don't pass in an lsn greater than the lsn of the last 172162306a36Sopenharmony_ci * log record known to be on disk. Use a snapshot of the last sync lsn 172262306a36Sopenharmony_ci * so that it doesn't change between the compare and the set. 172362306a36Sopenharmony_ci */ 172462306a36Sopenharmony_ci last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); 172562306a36Sopenharmony_ci if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) 172662306a36Sopenharmony_ci threshold_lsn = last_sync_lsn; 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci return threshold_lsn; 172962306a36Sopenharmony_ci} 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci/* 173262306a36Sopenharmony_ci * Push the tail of the log if we need to do so to maintain the free log space 173362306a36Sopenharmony_ci * thresholds set out by xlog_grant_push_threshold. We may need to adopt a 173462306a36Sopenharmony_ci * policy which pushes on an lsn which is further along in the log once we 173562306a36Sopenharmony_ci * reach the high water mark. In this manner, we would be creating a low water 173662306a36Sopenharmony_ci * mark. 173762306a36Sopenharmony_ci */ 173862306a36Sopenharmony_ciSTATIC void 173962306a36Sopenharmony_cixlog_grant_push_ail( 174062306a36Sopenharmony_ci struct xlog *log, 174162306a36Sopenharmony_ci int need_bytes) 174262306a36Sopenharmony_ci{ 174362306a36Sopenharmony_ci xfs_lsn_t threshold_lsn; 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci threshold_lsn = xlog_grant_push_threshold(log, need_bytes); 174662306a36Sopenharmony_ci if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log)) 174762306a36Sopenharmony_ci return; 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_ci /* 175062306a36Sopenharmony_ci * Get the transaction layer to kick the dirty buffers out to 175162306a36Sopenharmony_ci * disk asynchronously. No point in trying to do this if 175262306a36Sopenharmony_ci * the filesystem is shutting down. 175362306a36Sopenharmony_ci */ 175462306a36Sopenharmony_ci xfs_ail_push(log->l_ailp, threshold_lsn); 175562306a36Sopenharmony_ci} 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci/* 175862306a36Sopenharmony_ci * Stamp cycle number in every block 175962306a36Sopenharmony_ci */ 176062306a36Sopenharmony_ciSTATIC void 176162306a36Sopenharmony_cixlog_pack_data( 176262306a36Sopenharmony_ci struct xlog *log, 176362306a36Sopenharmony_ci struct xlog_in_core *iclog, 176462306a36Sopenharmony_ci int roundoff) 176562306a36Sopenharmony_ci{ 176662306a36Sopenharmony_ci int i, j, k; 176762306a36Sopenharmony_ci int size = iclog->ic_offset + roundoff; 176862306a36Sopenharmony_ci __be32 cycle_lsn; 176962306a36Sopenharmony_ci char *dp; 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_ci cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci dp = iclog->ic_datap; 177462306a36Sopenharmony_ci for (i = 0; i < BTOBB(size); i++) { 177562306a36Sopenharmony_ci if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) 177662306a36Sopenharmony_ci break; 177762306a36Sopenharmony_ci iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; 177862306a36Sopenharmony_ci *(__be32 *)dp = cycle_lsn; 177962306a36Sopenharmony_ci dp += BBSIZE; 178062306a36Sopenharmony_ci } 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci if (xfs_has_logv2(log->l_mp)) { 178362306a36Sopenharmony_ci xlog_in_core_2_t *xhdr = iclog->ic_data; 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_ci for ( ; i < BTOBB(size); i++) { 178662306a36Sopenharmony_ci j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 178762306a36Sopenharmony_ci k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 178862306a36Sopenharmony_ci xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; 178962306a36Sopenharmony_ci *(__be32 *)dp = cycle_lsn; 179062306a36Sopenharmony_ci dp += BBSIZE; 179162306a36Sopenharmony_ci } 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci for (i = 1; i < log->l_iclog_heads; i++) 179462306a36Sopenharmony_ci xhdr[i].hic_xheader.xh_cycle = cycle_lsn; 179562306a36Sopenharmony_ci } 179662306a36Sopenharmony_ci} 179762306a36Sopenharmony_ci 179862306a36Sopenharmony_ci/* 179962306a36Sopenharmony_ci * Calculate the checksum for a log buffer. 180062306a36Sopenharmony_ci * 180162306a36Sopenharmony_ci * This is a little more complicated than it should be because the various 180262306a36Sopenharmony_ci * headers and the actual data are non-contiguous. 180362306a36Sopenharmony_ci */ 180462306a36Sopenharmony_ci__le32 180562306a36Sopenharmony_cixlog_cksum( 180662306a36Sopenharmony_ci struct xlog *log, 180762306a36Sopenharmony_ci struct xlog_rec_header *rhead, 180862306a36Sopenharmony_ci char *dp, 180962306a36Sopenharmony_ci int size) 181062306a36Sopenharmony_ci{ 181162306a36Sopenharmony_ci uint32_t crc; 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ci /* first generate the crc for the record header ... */ 181462306a36Sopenharmony_ci crc = xfs_start_cksum_update((char *)rhead, 181562306a36Sopenharmony_ci sizeof(struct xlog_rec_header), 181662306a36Sopenharmony_ci offsetof(struct xlog_rec_header, h_crc)); 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci /* ... then for additional cycle data for v2 logs ... */ 181962306a36Sopenharmony_ci if (xfs_has_logv2(log->l_mp)) { 182062306a36Sopenharmony_ci union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead; 182162306a36Sopenharmony_ci int i; 182262306a36Sopenharmony_ci int xheads; 182362306a36Sopenharmony_ci 182462306a36Sopenharmony_ci xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE); 182562306a36Sopenharmony_ci 182662306a36Sopenharmony_ci for (i = 1; i < xheads; i++) { 182762306a36Sopenharmony_ci crc = crc32c(crc, &xhdr[i].hic_xheader, 182862306a36Sopenharmony_ci sizeof(struct xlog_rec_ext_header)); 182962306a36Sopenharmony_ci } 183062306a36Sopenharmony_ci } 183162306a36Sopenharmony_ci 183262306a36Sopenharmony_ci /* ... and finally for the payload */ 183362306a36Sopenharmony_ci crc = crc32c(crc, dp, size); 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci return xfs_end_cksum(crc); 183662306a36Sopenharmony_ci} 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_cistatic void 183962306a36Sopenharmony_cixlog_bio_end_io( 184062306a36Sopenharmony_ci struct bio *bio) 184162306a36Sopenharmony_ci{ 184262306a36Sopenharmony_ci struct xlog_in_core *iclog = bio->bi_private; 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci queue_work(iclog->ic_log->l_ioend_workqueue, 184562306a36Sopenharmony_ci &iclog->ic_end_io_work); 184662306a36Sopenharmony_ci} 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_cistatic int 184962306a36Sopenharmony_cixlog_map_iclog_data( 185062306a36Sopenharmony_ci struct bio *bio, 185162306a36Sopenharmony_ci void *data, 185262306a36Sopenharmony_ci size_t count) 185362306a36Sopenharmony_ci{ 185462306a36Sopenharmony_ci do { 185562306a36Sopenharmony_ci struct page *page = kmem_to_page(data); 185662306a36Sopenharmony_ci unsigned int off = offset_in_page(data); 185762306a36Sopenharmony_ci size_t len = min_t(size_t, count, PAGE_SIZE - off); 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci if (bio_add_page(bio, page, len, off) != len) 186062306a36Sopenharmony_ci return -EIO; 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci data += len; 186362306a36Sopenharmony_ci count -= len; 186462306a36Sopenharmony_ci } while (count); 186562306a36Sopenharmony_ci 186662306a36Sopenharmony_ci return 0; 186762306a36Sopenharmony_ci} 186862306a36Sopenharmony_ci 186962306a36Sopenharmony_ciSTATIC void 187062306a36Sopenharmony_cixlog_write_iclog( 187162306a36Sopenharmony_ci struct xlog *log, 187262306a36Sopenharmony_ci struct xlog_in_core *iclog, 187362306a36Sopenharmony_ci uint64_t bno, 187462306a36Sopenharmony_ci unsigned int count) 187562306a36Sopenharmony_ci{ 187662306a36Sopenharmony_ci ASSERT(bno < log->l_logBBsize); 187762306a36Sopenharmony_ci trace_xlog_iclog_write(iclog, _RET_IP_); 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci /* 188062306a36Sopenharmony_ci * We lock the iclogbufs here so that we can serialise against I/O 188162306a36Sopenharmony_ci * completion during unmount. We might be processing a shutdown 188262306a36Sopenharmony_ci * triggered during unmount, and that can occur asynchronously to the 188362306a36Sopenharmony_ci * unmount thread, and hence we need to ensure that completes before 188462306a36Sopenharmony_ci * tearing down the iclogbufs. Hence we need to hold the buffer lock 188562306a36Sopenharmony_ci * across the log IO to archieve that. 188662306a36Sopenharmony_ci */ 188762306a36Sopenharmony_ci down(&iclog->ic_sema); 188862306a36Sopenharmony_ci if (xlog_is_shutdown(log)) { 188962306a36Sopenharmony_ci /* 189062306a36Sopenharmony_ci * It would seem logical to return EIO here, but we rely on 189162306a36Sopenharmony_ci * the log state machine to propagate I/O errors instead of 189262306a36Sopenharmony_ci * doing it here. We kick of the state machine and unlock 189362306a36Sopenharmony_ci * the buffer manually, the code needs to be kept in sync 189462306a36Sopenharmony_ci * with the I/O completion path. 189562306a36Sopenharmony_ci */ 189662306a36Sopenharmony_ci goto sync; 189762306a36Sopenharmony_ci } 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci /* 190062306a36Sopenharmony_ci * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more 190162306a36Sopenharmony_ci * IOs coming immediately after this one. This prevents the block layer 190262306a36Sopenharmony_ci * writeback throttle from throttling log writes behind background 190362306a36Sopenharmony_ci * metadata writeback and causing priority inversions. 190462306a36Sopenharmony_ci */ 190562306a36Sopenharmony_ci bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec, 190662306a36Sopenharmony_ci howmany(count, PAGE_SIZE), 190762306a36Sopenharmony_ci REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE); 190862306a36Sopenharmony_ci iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; 190962306a36Sopenharmony_ci iclog->ic_bio.bi_end_io = xlog_bio_end_io; 191062306a36Sopenharmony_ci iclog->ic_bio.bi_private = iclog; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { 191362306a36Sopenharmony_ci iclog->ic_bio.bi_opf |= REQ_PREFLUSH; 191462306a36Sopenharmony_ci /* 191562306a36Sopenharmony_ci * For external log devices, we also need to flush the data 191662306a36Sopenharmony_ci * device cache first to ensure all metadata writeback covered 191762306a36Sopenharmony_ci * by the LSN in this iclog is on stable storage. This is slow, 191862306a36Sopenharmony_ci * but it *must* complete before we issue the external log IO. 191962306a36Sopenharmony_ci * 192062306a36Sopenharmony_ci * If the flush fails, we cannot conclude that past metadata 192162306a36Sopenharmony_ci * writeback from the log succeeded. Repeating the flush is 192262306a36Sopenharmony_ci * not possible, hence we must shut down with log IO error to 192362306a36Sopenharmony_ci * avoid shutdown re-entering this path and erroring out again. 192462306a36Sopenharmony_ci */ 192562306a36Sopenharmony_ci if (log->l_targ != log->l_mp->m_ddev_targp && 192662306a36Sopenharmony_ci blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) 192762306a36Sopenharmony_ci goto shutdown; 192862306a36Sopenharmony_ci } 192962306a36Sopenharmony_ci if (iclog->ic_flags & XLOG_ICL_NEED_FUA) 193062306a36Sopenharmony_ci iclog->ic_bio.bi_opf |= REQ_FUA; 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) 193562306a36Sopenharmony_ci goto shutdown; 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci if (is_vmalloc_addr(iclog->ic_data)) 193862306a36Sopenharmony_ci flush_kernel_vmap_range(iclog->ic_data, count); 193962306a36Sopenharmony_ci 194062306a36Sopenharmony_ci /* 194162306a36Sopenharmony_ci * If this log buffer would straddle the end of the log we will have 194262306a36Sopenharmony_ci * to split it up into two bios, so that we can continue at the start. 194362306a36Sopenharmony_ci */ 194462306a36Sopenharmony_ci if (bno + BTOBB(count) > log->l_logBBsize) { 194562306a36Sopenharmony_ci struct bio *split; 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno, 194862306a36Sopenharmony_ci GFP_NOIO, &fs_bio_set); 194962306a36Sopenharmony_ci bio_chain(split, &iclog->ic_bio); 195062306a36Sopenharmony_ci submit_bio(split); 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci /* restart at logical offset zero for the remainder */ 195362306a36Sopenharmony_ci iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart; 195462306a36Sopenharmony_ci } 195562306a36Sopenharmony_ci 195662306a36Sopenharmony_ci submit_bio(&iclog->ic_bio); 195762306a36Sopenharmony_ci return; 195862306a36Sopenharmony_cishutdown: 195962306a36Sopenharmony_ci xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); 196062306a36Sopenharmony_cisync: 196162306a36Sopenharmony_ci xlog_state_done_syncing(iclog); 196262306a36Sopenharmony_ci up(&iclog->ic_sema); 196362306a36Sopenharmony_ci} 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_ci/* 196662306a36Sopenharmony_ci * We need to bump cycle number for the part of the iclog that is 196762306a36Sopenharmony_ci * written to the start of the log. Watch out for the header magic 196862306a36Sopenharmony_ci * number case, though. 196962306a36Sopenharmony_ci */ 197062306a36Sopenharmony_cistatic void 197162306a36Sopenharmony_cixlog_split_iclog( 197262306a36Sopenharmony_ci struct xlog *log, 197362306a36Sopenharmony_ci void *data, 197462306a36Sopenharmony_ci uint64_t bno, 197562306a36Sopenharmony_ci unsigned int count) 197662306a36Sopenharmony_ci{ 197762306a36Sopenharmony_ci unsigned int split_offset = BBTOB(log->l_logBBsize - bno); 197862306a36Sopenharmony_ci unsigned int i; 197962306a36Sopenharmony_ci 198062306a36Sopenharmony_ci for (i = split_offset; i < count; i += BBSIZE) { 198162306a36Sopenharmony_ci uint32_t cycle = get_unaligned_be32(data + i); 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ci if (++cycle == XLOG_HEADER_MAGIC_NUM) 198462306a36Sopenharmony_ci cycle++; 198562306a36Sopenharmony_ci put_unaligned_be32(cycle, data + i); 198662306a36Sopenharmony_ci } 198762306a36Sopenharmony_ci} 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_cistatic int 199062306a36Sopenharmony_cixlog_calc_iclog_size( 199162306a36Sopenharmony_ci struct xlog *log, 199262306a36Sopenharmony_ci struct xlog_in_core *iclog, 199362306a36Sopenharmony_ci uint32_t *roundoff) 199462306a36Sopenharmony_ci{ 199562306a36Sopenharmony_ci uint32_t count_init, count; 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci /* Add for LR header */ 199862306a36Sopenharmony_ci count_init = log->l_iclog_hsize + iclog->ic_offset; 199962306a36Sopenharmony_ci count = roundup(count_init, log->l_iclog_roundoff); 200062306a36Sopenharmony_ci 200162306a36Sopenharmony_ci *roundoff = count - count_init; 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci ASSERT(count >= count_init); 200462306a36Sopenharmony_ci ASSERT(*roundoff < log->l_iclog_roundoff); 200562306a36Sopenharmony_ci return count; 200662306a36Sopenharmony_ci} 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci/* 200962306a36Sopenharmony_ci * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 201062306a36Sopenharmony_ci * fashion. Previously, we should have moved the current iclog 201162306a36Sopenharmony_ci * ptr in the log to point to the next available iclog. This allows further 201262306a36Sopenharmony_ci * write to continue while this code syncs out an iclog ready to go. 201362306a36Sopenharmony_ci * Before an in-core log can be written out, the data section must be scanned 201462306a36Sopenharmony_ci * to save away the 1st word of each BBSIZE block into the header. We replace 201562306a36Sopenharmony_ci * it with the current cycle count. Each BBSIZE block is tagged with the 201662306a36Sopenharmony_ci * cycle count because there in an implicit assumption that drives will 201762306a36Sopenharmony_ci * guarantee that entire 512 byte blocks get written at once. In other words, 201862306a36Sopenharmony_ci * we can't have part of a 512 byte block written and part not written. By 201962306a36Sopenharmony_ci * tagging each block, we will know which blocks are valid when recovering 202062306a36Sopenharmony_ci * after an unclean shutdown. 202162306a36Sopenharmony_ci * 202262306a36Sopenharmony_ci * This routine is single threaded on the iclog. No other thread can be in 202362306a36Sopenharmony_ci * this routine with the same iclog. Changing contents of iclog can there- 202462306a36Sopenharmony_ci * fore be done without grabbing the state machine lock. Updating the global 202562306a36Sopenharmony_ci * log will require grabbing the lock though. 202662306a36Sopenharmony_ci * 202762306a36Sopenharmony_ci * The entire log manager uses a logical block numbering scheme. Only 202862306a36Sopenharmony_ci * xlog_write_iclog knows about the fact that the log may not start with 202962306a36Sopenharmony_ci * block zero on a given device. 203062306a36Sopenharmony_ci */ 203162306a36Sopenharmony_ciSTATIC void 203262306a36Sopenharmony_cixlog_sync( 203362306a36Sopenharmony_ci struct xlog *log, 203462306a36Sopenharmony_ci struct xlog_in_core *iclog, 203562306a36Sopenharmony_ci struct xlog_ticket *ticket) 203662306a36Sopenharmony_ci{ 203762306a36Sopenharmony_ci unsigned int count; /* byte count of bwrite */ 203862306a36Sopenharmony_ci unsigned int roundoff; /* roundoff to BB or stripe */ 203962306a36Sopenharmony_ci uint64_t bno; 204062306a36Sopenharmony_ci unsigned int size; 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 204362306a36Sopenharmony_ci trace_xlog_iclog_sync(iclog, _RET_IP_); 204462306a36Sopenharmony_ci 204562306a36Sopenharmony_ci count = xlog_calc_iclog_size(log, iclog, &roundoff); 204662306a36Sopenharmony_ci 204762306a36Sopenharmony_ci /* 204862306a36Sopenharmony_ci * If we have a ticket, account for the roundoff via the ticket 204962306a36Sopenharmony_ci * reservation to avoid touching the hot grant heads needlessly. 205062306a36Sopenharmony_ci * Otherwise, we have to move grant heads directly. 205162306a36Sopenharmony_ci */ 205262306a36Sopenharmony_ci if (ticket) { 205362306a36Sopenharmony_ci ticket->t_curr_res -= roundoff; 205462306a36Sopenharmony_ci } else { 205562306a36Sopenharmony_ci xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); 205662306a36Sopenharmony_ci xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); 205762306a36Sopenharmony_ci } 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_ci /* put cycle number in every block */ 206062306a36Sopenharmony_ci xlog_pack_data(log, iclog, roundoff); 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci /* real byte length */ 206362306a36Sopenharmony_ci size = iclog->ic_offset; 206462306a36Sopenharmony_ci if (xfs_has_logv2(log->l_mp)) 206562306a36Sopenharmony_ci size += roundoff; 206662306a36Sopenharmony_ci iclog->ic_header.h_len = cpu_to_be32(size); 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci XFS_STATS_INC(log->l_mp, xs_log_writes); 206962306a36Sopenharmony_ci XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)); 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci /* Do we need to split this write into 2 parts? */ 207462306a36Sopenharmony_ci if (bno + BTOBB(count) > log->l_logBBsize) 207562306a36Sopenharmony_ci xlog_split_iclog(log, &iclog->ic_header, bno, count); 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci /* calculcate the checksum */ 207862306a36Sopenharmony_ci iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, 207962306a36Sopenharmony_ci iclog->ic_datap, size); 208062306a36Sopenharmony_ci /* 208162306a36Sopenharmony_ci * Intentionally corrupt the log record CRC based on the error injection 208262306a36Sopenharmony_ci * frequency, if defined. This facilitates testing log recovery in the 208362306a36Sopenharmony_ci * event of torn writes. Hence, set the IOABORT state to abort the log 208462306a36Sopenharmony_ci * write on I/O completion and shutdown the fs. The subsequent mount 208562306a36Sopenharmony_ci * detects the bad CRC and attempts to recover. 208662306a36Sopenharmony_ci */ 208762306a36Sopenharmony_ci#ifdef DEBUG 208862306a36Sopenharmony_ci if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { 208962306a36Sopenharmony_ci iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); 209062306a36Sopenharmony_ci iclog->ic_fail_crc = true; 209162306a36Sopenharmony_ci xfs_warn(log->l_mp, 209262306a36Sopenharmony_ci "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", 209362306a36Sopenharmony_ci be64_to_cpu(iclog->ic_header.h_lsn)); 209462306a36Sopenharmony_ci } 209562306a36Sopenharmony_ci#endif 209662306a36Sopenharmony_ci xlog_verify_iclog(log, iclog, count); 209762306a36Sopenharmony_ci xlog_write_iclog(log, iclog, bno, count); 209862306a36Sopenharmony_ci} 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci/* 210162306a36Sopenharmony_ci * Deallocate a log structure 210262306a36Sopenharmony_ci */ 210362306a36Sopenharmony_ciSTATIC void 210462306a36Sopenharmony_cixlog_dealloc_log( 210562306a36Sopenharmony_ci struct xlog *log) 210662306a36Sopenharmony_ci{ 210762306a36Sopenharmony_ci xlog_in_core_t *iclog, *next_iclog; 210862306a36Sopenharmony_ci int i; 210962306a36Sopenharmony_ci 211062306a36Sopenharmony_ci /* 211162306a36Sopenharmony_ci * Destroy the CIL after waiting for iclog IO completion because an 211262306a36Sopenharmony_ci * iclog EIO error will try to shut down the log, which accesses the 211362306a36Sopenharmony_ci * CIL to wake up the waiters. 211462306a36Sopenharmony_ci */ 211562306a36Sopenharmony_ci xlog_cil_destroy(log); 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci iclog = log->l_iclog; 211862306a36Sopenharmony_ci for (i = 0; i < log->l_iclog_bufs; i++) { 211962306a36Sopenharmony_ci next_iclog = iclog->ic_next; 212062306a36Sopenharmony_ci kmem_free(iclog->ic_data); 212162306a36Sopenharmony_ci kmem_free(iclog); 212262306a36Sopenharmony_ci iclog = next_iclog; 212362306a36Sopenharmony_ci } 212462306a36Sopenharmony_ci 212562306a36Sopenharmony_ci log->l_mp->m_log = NULL; 212662306a36Sopenharmony_ci destroy_workqueue(log->l_ioend_workqueue); 212762306a36Sopenharmony_ci kmem_free(log); 212862306a36Sopenharmony_ci} 212962306a36Sopenharmony_ci 213062306a36Sopenharmony_ci/* 213162306a36Sopenharmony_ci * Update counters atomically now that memcpy is done. 213262306a36Sopenharmony_ci */ 213362306a36Sopenharmony_cistatic inline void 213462306a36Sopenharmony_cixlog_state_finish_copy( 213562306a36Sopenharmony_ci struct xlog *log, 213662306a36Sopenharmony_ci struct xlog_in_core *iclog, 213762306a36Sopenharmony_ci int record_cnt, 213862306a36Sopenharmony_ci int copy_bytes) 213962306a36Sopenharmony_ci{ 214062306a36Sopenharmony_ci lockdep_assert_held(&log->l_icloglock); 214162306a36Sopenharmony_ci 214262306a36Sopenharmony_ci be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt); 214362306a36Sopenharmony_ci iclog->ic_offset += copy_bytes; 214462306a36Sopenharmony_ci} 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci/* 214762306a36Sopenharmony_ci * print out info relating to regions written which consume 214862306a36Sopenharmony_ci * the reservation 214962306a36Sopenharmony_ci */ 215062306a36Sopenharmony_civoid 215162306a36Sopenharmony_cixlog_print_tic_res( 215262306a36Sopenharmony_ci struct xfs_mount *mp, 215362306a36Sopenharmony_ci struct xlog_ticket *ticket) 215462306a36Sopenharmony_ci{ 215562306a36Sopenharmony_ci xfs_warn(mp, "ticket reservation summary:"); 215662306a36Sopenharmony_ci xfs_warn(mp, " unit res = %d bytes", ticket->t_unit_res); 215762306a36Sopenharmony_ci xfs_warn(mp, " current res = %d bytes", ticket->t_curr_res); 215862306a36Sopenharmony_ci xfs_warn(mp, " original count = %d", ticket->t_ocnt); 215962306a36Sopenharmony_ci xfs_warn(mp, " remaining count = %d", ticket->t_cnt); 216062306a36Sopenharmony_ci} 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci/* 216362306a36Sopenharmony_ci * Print a summary of the transaction. 216462306a36Sopenharmony_ci */ 216562306a36Sopenharmony_civoid 216662306a36Sopenharmony_cixlog_print_trans( 216762306a36Sopenharmony_ci struct xfs_trans *tp) 216862306a36Sopenharmony_ci{ 216962306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 217062306a36Sopenharmony_ci struct xfs_log_item *lip; 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci /* dump core transaction and ticket info */ 217362306a36Sopenharmony_ci xfs_warn(mp, "transaction summary:"); 217462306a36Sopenharmony_ci xfs_warn(mp, " log res = %d", tp->t_log_res); 217562306a36Sopenharmony_ci xfs_warn(mp, " log count = %d", tp->t_log_count); 217662306a36Sopenharmony_ci xfs_warn(mp, " flags = 0x%x", tp->t_flags); 217762306a36Sopenharmony_ci 217862306a36Sopenharmony_ci xlog_print_tic_res(mp, tp->t_ticket); 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_ci /* dump each log item */ 218162306a36Sopenharmony_ci list_for_each_entry(lip, &tp->t_items, li_trans) { 218262306a36Sopenharmony_ci struct xfs_log_vec *lv = lip->li_lv; 218362306a36Sopenharmony_ci struct xfs_log_iovec *vec; 218462306a36Sopenharmony_ci int i; 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci xfs_warn(mp, "log item: "); 218762306a36Sopenharmony_ci xfs_warn(mp, " type = 0x%x", lip->li_type); 218862306a36Sopenharmony_ci xfs_warn(mp, " flags = 0x%lx", lip->li_flags); 218962306a36Sopenharmony_ci if (!lv) 219062306a36Sopenharmony_ci continue; 219162306a36Sopenharmony_ci xfs_warn(mp, " niovecs = %d", lv->lv_niovecs); 219262306a36Sopenharmony_ci xfs_warn(mp, " size = %d", lv->lv_size); 219362306a36Sopenharmony_ci xfs_warn(mp, " bytes = %d", lv->lv_bytes); 219462306a36Sopenharmony_ci xfs_warn(mp, " buf len = %d", lv->lv_buf_len); 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci /* dump each iovec for the log item */ 219762306a36Sopenharmony_ci vec = lv->lv_iovecp; 219862306a36Sopenharmony_ci for (i = 0; i < lv->lv_niovecs; i++) { 219962306a36Sopenharmony_ci int dumplen = min(vec->i_len, 32); 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci xfs_warn(mp, " iovec[%d]", i); 220262306a36Sopenharmony_ci xfs_warn(mp, " type = 0x%x", vec->i_type); 220362306a36Sopenharmony_ci xfs_warn(mp, " len = %d", vec->i_len); 220462306a36Sopenharmony_ci xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i); 220562306a36Sopenharmony_ci xfs_hex_dump(vec->i_addr, dumplen); 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci vec++; 220862306a36Sopenharmony_ci } 220962306a36Sopenharmony_ci } 221062306a36Sopenharmony_ci} 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_cistatic inline void 221362306a36Sopenharmony_cixlog_write_iovec( 221462306a36Sopenharmony_ci struct xlog_in_core *iclog, 221562306a36Sopenharmony_ci uint32_t *log_offset, 221662306a36Sopenharmony_ci void *data, 221762306a36Sopenharmony_ci uint32_t write_len, 221862306a36Sopenharmony_ci int *bytes_left, 221962306a36Sopenharmony_ci uint32_t *record_cnt, 222062306a36Sopenharmony_ci uint32_t *data_cnt) 222162306a36Sopenharmony_ci{ 222262306a36Sopenharmony_ci ASSERT(*log_offset < iclog->ic_log->l_iclog_size); 222362306a36Sopenharmony_ci ASSERT(*log_offset % sizeof(int32_t) == 0); 222462306a36Sopenharmony_ci ASSERT(write_len % sizeof(int32_t) == 0); 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci memcpy(iclog->ic_datap + *log_offset, data, write_len); 222762306a36Sopenharmony_ci *log_offset += write_len; 222862306a36Sopenharmony_ci *bytes_left -= write_len; 222962306a36Sopenharmony_ci (*record_cnt)++; 223062306a36Sopenharmony_ci *data_cnt += write_len; 223162306a36Sopenharmony_ci} 223262306a36Sopenharmony_ci 223362306a36Sopenharmony_ci/* 223462306a36Sopenharmony_ci * Write log vectors into a single iclog which is guaranteed by the caller 223562306a36Sopenharmony_ci * to have enough space to write the entire log vector into. 223662306a36Sopenharmony_ci */ 223762306a36Sopenharmony_cistatic void 223862306a36Sopenharmony_cixlog_write_full( 223962306a36Sopenharmony_ci struct xfs_log_vec *lv, 224062306a36Sopenharmony_ci struct xlog_ticket *ticket, 224162306a36Sopenharmony_ci struct xlog_in_core *iclog, 224262306a36Sopenharmony_ci uint32_t *log_offset, 224362306a36Sopenharmony_ci uint32_t *len, 224462306a36Sopenharmony_ci uint32_t *record_cnt, 224562306a36Sopenharmony_ci uint32_t *data_cnt) 224662306a36Sopenharmony_ci{ 224762306a36Sopenharmony_ci int index; 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci ASSERT(*log_offset + *len <= iclog->ic_size || 225062306a36Sopenharmony_ci iclog->ic_state == XLOG_STATE_WANT_SYNC); 225162306a36Sopenharmony_ci 225262306a36Sopenharmony_ci /* 225362306a36Sopenharmony_ci * Ordered log vectors have no regions to write so this 225462306a36Sopenharmony_ci * loop will naturally skip them. 225562306a36Sopenharmony_ci */ 225662306a36Sopenharmony_ci for (index = 0; index < lv->lv_niovecs; index++) { 225762306a36Sopenharmony_ci struct xfs_log_iovec *reg = &lv->lv_iovecp[index]; 225862306a36Sopenharmony_ci struct xlog_op_header *ophdr = reg->i_addr; 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci ophdr->oh_tid = cpu_to_be32(ticket->t_tid); 226162306a36Sopenharmony_ci xlog_write_iovec(iclog, log_offset, reg->i_addr, 226262306a36Sopenharmony_ci reg->i_len, len, record_cnt, data_cnt); 226362306a36Sopenharmony_ci } 226462306a36Sopenharmony_ci} 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_cistatic int 226762306a36Sopenharmony_cixlog_write_get_more_iclog_space( 226862306a36Sopenharmony_ci struct xlog_ticket *ticket, 226962306a36Sopenharmony_ci struct xlog_in_core **iclogp, 227062306a36Sopenharmony_ci uint32_t *log_offset, 227162306a36Sopenharmony_ci uint32_t len, 227262306a36Sopenharmony_ci uint32_t *record_cnt, 227362306a36Sopenharmony_ci uint32_t *data_cnt) 227462306a36Sopenharmony_ci{ 227562306a36Sopenharmony_ci struct xlog_in_core *iclog = *iclogp; 227662306a36Sopenharmony_ci struct xlog *log = iclog->ic_log; 227762306a36Sopenharmony_ci int error; 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 228062306a36Sopenharmony_ci ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC); 228162306a36Sopenharmony_ci xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); 228262306a36Sopenharmony_ci error = xlog_state_release_iclog(log, iclog, ticket); 228362306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 228462306a36Sopenharmony_ci if (error) 228562306a36Sopenharmony_ci return error; 228662306a36Sopenharmony_ci 228762306a36Sopenharmony_ci error = xlog_state_get_iclog_space(log, len, &iclog, ticket, 228862306a36Sopenharmony_ci log_offset); 228962306a36Sopenharmony_ci if (error) 229062306a36Sopenharmony_ci return error; 229162306a36Sopenharmony_ci *record_cnt = 0; 229262306a36Sopenharmony_ci *data_cnt = 0; 229362306a36Sopenharmony_ci *iclogp = iclog; 229462306a36Sopenharmony_ci return 0; 229562306a36Sopenharmony_ci} 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci/* 229862306a36Sopenharmony_ci * Write log vectors into a single iclog which is smaller than the current chain 229962306a36Sopenharmony_ci * length. We write until we cannot fit a full record into the remaining space 230062306a36Sopenharmony_ci * and then stop. We return the log vector that is to be written that cannot 230162306a36Sopenharmony_ci * wholly fit in the iclog. 230262306a36Sopenharmony_ci */ 230362306a36Sopenharmony_cistatic int 230462306a36Sopenharmony_cixlog_write_partial( 230562306a36Sopenharmony_ci struct xfs_log_vec *lv, 230662306a36Sopenharmony_ci struct xlog_ticket *ticket, 230762306a36Sopenharmony_ci struct xlog_in_core **iclogp, 230862306a36Sopenharmony_ci uint32_t *log_offset, 230962306a36Sopenharmony_ci uint32_t *len, 231062306a36Sopenharmony_ci uint32_t *record_cnt, 231162306a36Sopenharmony_ci uint32_t *data_cnt) 231262306a36Sopenharmony_ci{ 231362306a36Sopenharmony_ci struct xlog_in_core *iclog = *iclogp; 231462306a36Sopenharmony_ci struct xlog_op_header *ophdr; 231562306a36Sopenharmony_ci int index = 0; 231662306a36Sopenharmony_ci uint32_t rlen; 231762306a36Sopenharmony_ci int error; 231862306a36Sopenharmony_ci 231962306a36Sopenharmony_ci /* walk the logvec, copying until we run out of space in the iclog */ 232062306a36Sopenharmony_ci for (index = 0; index < lv->lv_niovecs; index++) { 232162306a36Sopenharmony_ci struct xfs_log_iovec *reg = &lv->lv_iovecp[index]; 232262306a36Sopenharmony_ci uint32_t reg_offset = 0; 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci /* 232562306a36Sopenharmony_ci * The first region of a continuation must have a non-zero 232662306a36Sopenharmony_ci * length otherwise log recovery will just skip over it and 232762306a36Sopenharmony_ci * start recovering from the next opheader it finds. Because we 232862306a36Sopenharmony_ci * mark the next opheader as a continuation, recovery will then 232962306a36Sopenharmony_ci * incorrectly add the continuation to the previous region and 233062306a36Sopenharmony_ci * that breaks stuff. 233162306a36Sopenharmony_ci * 233262306a36Sopenharmony_ci * Hence if there isn't space for region data after the 233362306a36Sopenharmony_ci * opheader, then we need to start afresh with a new iclog. 233462306a36Sopenharmony_ci */ 233562306a36Sopenharmony_ci if (iclog->ic_size - *log_offset <= 233662306a36Sopenharmony_ci sizeof(struct xlog_op_header)) { 233762306a36Sopenharmony_ci error = xlog_write_get_more_iclog_space(ticket, 233862306a36Sopenharmony_ci &iclog, log_offset, *len, record_cnt, 233962306a36Sopenharmony_ci data_cnt); 234062306a36Sopenharmony_ci if (error) 234162306a36Sopenharmony_ci return error; 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci ophdr = reg->i_addr; 234562306a36Sopenharmony_ci rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset); 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci ophdr->oh_tid = cpu_to_be32(ticket->t_tid); 234862306a36Sopenharmony_ci ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header)); 234962306a36Sopenharmony_ci if (rlen != reg->i_len) 235062306a36Sopenharmony_ci ophdr->oh_flags |= XLOG_CONTINUE_TRANS; 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci xlog_write_iovec(iclog, log_offset, reg->i_addr, 235362306a36Sopenharmony_ci rlen, len, record_cnt, data_cnt); 235462306a36Sopenharmony_ci 235562306a36Sopenharmony_ci /* If we wrote the whole region, move to the next. */ 235662306a36Sopenharmony_ci if (rlen == reg->i_len) 235762306a36Sopenharmony_ci continue; 235862306a36Sopenharmony_ci 235962306a36Sopenharmony_ci /* 236062306a36Sopenharmony_ci * We now have a partially written iovec, but it can span 236162306a36Sopenharmony_ci * multiple iclogs so we loop here. First we release the iclog 236262306a36Sopenharmony_ci * we currently have, then we get a new iclog and add a new 236362306a36Sopenharmony_ci * opheader. Then we continue copying from where we were until 236462306a36Sopenharmony_ci * we either complete the iovec or fill the iclog. If we 236562306a36Sopenharmony_ci * complete the iovec, then we increment the index and go right 236662306a36Sopenharmony_ci * back to the top of the outer loop. if we fill the iclog, we 236762306a36Sopenharmony_ci * run the inner loop again. 236862306a36Sopenharmony_ci * 236962306a36Sopenharmony_ci * This is complicated by the tail of a region using all the 237062306a36Sopenharmony_ci * space in an iclog and hence requiring us to release the iclog 237162306a36Sopenharmony_ci * and get a new one before returning to the outer loop. We must 237262306a36Sopenharmony_ci * always guarantee that we exit this inner loop with at least 237362306a36Sopenharmony_ci * space for log transaction opheaders left in the current 237462306a36Sopenharmony_ci * iclog, hence we cannot just terminate the loop at the end 237562306a36Sopenharmony_ci * of the of the continuation. So we loop while there is no 237662306a36Sopenharmony_ci * space left in the current iclog, and check for the end of the 237762306a36Sopenharmony_ci * continuation after getting a new iclog. 237862306a36Sopenharmony_ci */ 237962306a36Sopenharmony_ci do { 238062306a36Sopenharmony_ci /* 238162306a36Sopenharmony_ci * Ensure we include the continuation opheader in the 238262306a36Sopenharmony_ci * space we need in the new iclog by adding that size 238362306a36Sopenharmony_ci * to the length we require. This continuation opheader 238462306a36Sopenharmony_ci * needs to be accounted to the ticket as the space it 238562306a36Sopenharmony_ci * consumes hasn't been accounted to the lv we are 238662306a36Sopenharmony_ci * writing. 238762306a36Sopenharmony_ci */ 238862306a36Sopenharmony_ci error = xlog_write_get_more_iclog_space(ticket, 238962306a36Sopenharmony_ci &iclog, log_offset, 239062306a36Sopenharmony_ci *len + sizeof(struct xlog_op_header), 239162306a36Sopenharmony_ci record_cnt, data_cnt); 239262306a36Sopenharmony_ci if (error) 239362306a36Sopenharmony_ci return error; 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci ophdr = iclog->ic_datap + *log_offset; 239662306a36Sopenharmony_ci ophdr->oh_tid = cpu_to_be32(ticket->t_tid); 239762306a36Sopenharmony_ci ophdr->oh_clientid = XFS_TRANSACTION; 239862306a36Sopenharmony_ci ophdr->oh_res2 = 0; 239962306a36Sopenharmony_ci ophdr->oh_flags = XLOG_WAS_CONT_TRANS; 240062306a36Sopenharmony_ci 240162306a36Sopenharmony_ci ticket->t_curr_res -= sizeof(struct xlog_op_header); 240262306a36Sopenharmony_ci *log_offset += sizeof(struct xlog_op_header); 240362306a36Sopenharmony_ci *data_cnt += sizeof(struct xlog_op_header); 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci /* 240662306a36Sopenharmony_ci * If rlen fits in the iclog, then end the region 240762306a36Sopenharmony_ci * continuation. Otherwise we're going around again. 240862306a36Sopenharmony_ci */ 240962306a36Sopenharmony_ci reg_offset += rlen; 241062306a36Sopenharmony_ci rlen = reg->i_len - reg_offset; 241162306a36Sopenharmony_ci if (rlen <= iclog->ic_size - *log_offset) 241262306a36Sopenharmony_ci ophdr->oh_flags |= XLOG_END_TRANS; 241362306a36Sopenharmony_ci else 241462306a36Sopenharmony_ci ophdr->oh_flags |= XLOG_CONTINUE_TRANS; 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset); 241762306a36Sopenharmony_ci ophdr->oh_len = cpu_to_be32(rlen); 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci xlog_write_iovec(iclog, log_offset, 242062306a36Sopenharmony_ci reg->i_addr + reg_offset, 242162306a36Sopenharmony_ci rlen, len, record_cnt, data_cnt); 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_ci } while (ophdr->oh_flags & XLOG_CONTINUE_TRANS); 242462306a36Sopenharmony_ci } 242562306a36Sopenharmony_ci 242662306a36Sopenharmony_ci /* 242762306a36Sopenharmony_ci * No more iovecs remain in this logvec so return the next log vec to 242862306a36Sopenharmony_ci * the caller so it can go back to fast path copying. 242962306a36Sopenharmony_ci */ 243062306a36Sopenharmony_ci *iclogp = iclog; 243162306a36Sopenharmony_ci return 0; 243262306a36Sopenharmony_ci} 243362306a36Sopenharmony_ci 243462306a36Sopenharmony_ci/* 243562306a36Sopenharmony_ci * Write some region out to in-core log 243662306a36Sopenharmony_ci * 243762306a36Sopenharmony_ci * This will be called when writing externally provided regions or when 243862306a36Sopenharmony_ci * writing out a commit record for a given transaction. 243962306a36Sopenharmony_ci * 244062306a36Sopenharmony_ci * General algorithm: 244162306a36Sopenharmony_ci * 1. Find total length of this write. This may include adding to the 244262306a36Sopenharmony_ci * lengths passed in. 244362306a36Sopenharmony_ci * 2. Check whether we violate the tickets reservation. 244462306a36Sopenharmony_ci * 3. While writing to this iclog 244562306a36Sopenharmony_ci * A. Reserve as much space in this iclog as can get 244662306a36Sopenharmony_ci * B. If this is first write, save away start lsn 244762306a36Sopenharmony_ci * C. While writing this region: 244862306a36Sopenharmony_ci * 1. If first write of transaction, write start record 244962306a36Sopenharmony_ci * 2. Write log operation header (header per region) 245062306a36Sopenharmony_ci * 3. Find out if we can fit entire region into this iclog 245162306a36Sopenharmony_ci * 4. Potentially, verify destination memcpy ptr 245262306a36Sopenharmony_ci * 5. Memcpy (partial) region 245362306a36Sopenharmony_ci * 6. If partial copy, release iclog; otherwise, continue 245462306a36Sopenharmony_ci * copying more regions into current iclog 245562306a36Sopenharmony_ci * 4. Mark want sync bit (in simulation mode) 245662306a36Sopenharmony_ci * 5. Release iclog for potential flush to on-disk log. 245762306a36Sopenharmony_ci * 245862306a36Sopenharmony_ci * ERRORS: 245962306a36Sopenharmony_ci * 1. Panic if reservation is overrun. This should never happen since 246062306a36Sopenharmony_ci * reservation amounts are generated internal to the filesystem. 246162306a36Sopenharmony_ci * NOTES: 246262306a36Sopenharmony_ci * 1. Tickets are single threaded data structures. 246362306a36Sopenharmony_ci * 2. The XLOG_END_TRANS & XLOG_CONTINUE_TRANS flags are passed down to the 246462306a36Sopenharmony_ci * syncing routine. When a single log_write region needs to span 246562306a36Sopenharmony_ci * multiple in-core logs, the XLOG_CONTINUE_TRANS bit should be set 246662306a36Sopenharmony_ci * on all log operation writes which don't contain the end of the 246762306a36Sopenharmony_ci * region. The XLOG_END_TRANS bit is used for the in-core log 246862306a36Sopenharmony_ci * operation which contains the end of the continued log_write region. 246962306a36Sopenharmony_ci * 3. When xlog_state_get_iclog_space() grabs the rest of the current iclog, 247062306a36Sopenharmony_ci * we don't really know exactly how much space will be used. As a result, 247162306a36Sopenharmony_ci * we don't update ic_offset until the end when we know exactly how many 247262306a36Sopenharmony_ci * bytes have been written out. 247362306a36Sopenharmony_ci */ 247462306a36Sopenharmony_ciint 247562306a36Sopenharmony_cixlog_write( 247662306a36Sopenharmony_ci struct xlog *log, 247762306a36Sopenharmony_ci struct xfs_cil_ctx *ctx, 247862306a36Sopenharmony_ci struct list_head *lv_chain, 247962306a36Sopenharmony_ci struct xlog_ticket *ticket, 248062306a36Sopenharmony_ci uint32_t len) 248162306a36Sopenharmony_ci 248262306a36Sopenharmony_ci{ 248362306a36Sopenharmony_ci struct xlog_in_core *iclog = NULL; 248462306a36Sopenharmony_ci struct xfs_log_vec *lv; 248562306a36Sopenharmony_ci uint32_t record_cnt = 0; 248662306a36Sopenharmony_ci uint32_t data_cnt = 0; 248762306a36Sopenharmony_ci int error = 0; 248862306a36Sopenharmony_ci int log_offset; 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_ci if (ticket->t_curr_res < 0) { 249162306a36Sopenharmony_ci xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 249262306a36Sopenharmony_ci "ctx ticket reservation ran out. Need to up reservation"); 249362306a36Sopenharmony_ci xlog_print_tic_res(log->l_mp, ticket); 249462306a36Sopenharmony_ci xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); 249562306a36Sopenharmony_ci } 249662306a36Sopenharmony_ci 249762306a36Sopenharmony_ci error = xlog_state_get_iclog_space(log, len, &iclog, ticket, 249862306a36Sopenharmony_ci &log_offset); 249962306a36Sopenharmony_ci if (error) 250062306a36Sopenharmony_ci return error; 250162306a36Sopenharmony_ci 250262306a36Sopenharmony_ci ASSERT(log_offset <= iclog->ic_size - 1); 250362306a36Sopenharmony_ci 250462306a36Sopenharmony_ci /* 250562306a36Sopenharmony_ci * If we have a context pointer, pass it the first iclog we are 250662306a36Sopenharmony_ci * writing to so it can record state needed for iclog write 250762306a36Sopenharmony_ci * ordering. 250862306a36Sopenharmony_ci */ 250962306a36Sopenharmony_ci if (ctx) 251062306a36Sopenharmony_ci xlog_cil_set_ctx_write_state(ctx, iclog); 251162306a36Sopenharmony_ci 251262306a36Sopenharmony_ci list_for_each_entry(lv, lv_chain, lv_list) { 251362306a36Sopenharmony_ci /* 251462306a36Sopenharmony_ci * If the entire log vec does not fit in the iclog, punt it to 251562306a36Sopenharmony_ci * the partial copy loop which can handle this case. 251662306a36Sopenharmony_ci */ 251762306a36Sopenharmony_ci if (lv->lv_niovecs && 251862306a36Sopenharmony_ci lv->lv_bytes > iclog->ic_size - log_offset) { 251962306a36Sopenharmony_ci error = xlog_write_partial(lv, ticket, &iclog, 252062306a36Sopenharmony_ci &log_offset, &len, &record_cnt, 252162306a36Sopenharmony_ci &data_cnt); 252262306a36Sopenharmony_ci if (error) { 252362306a36Sopenharmony_ci /* 252462306a36Sopenharmony_ci * We have no iclog to release, so just return 252562306a36Sopenharmony_ci * the error immediately. 252662306a36Sopenharmony_ci */ 252762306a36Sopenharmony_ci return error; 252862306a36Sopenharmony_ci } 252962306a36Sopenharmony_ci } else { 253062306a36Sopenharmony_ci xlog_write_full(lv, ticket, iclog, &log_offset, 253162306a36Sopenharmony_ci &len, &record_cnt, &data_cnt); 253262306a36Sopenharmony_ci } 253362306a36Sopenharmony_ci } 253462306a36Sopenharmony_ci ASSERT(len == 0); 253562306a36Sopenharmony_ci 253662306a36Sopenharmony_ci /* 253762306a36Sopenharmony_ci * We've already been guaranteed that the last writes will fit inside 253862306a36Sopenharmony_ci * the current iclog, and hence it will already have the space used by 253962306a36Sopenharmony_ci * those writes accounted to it. Hence we do not need to update the 254062306a36Sopenharmony_ci * iclog with the number of bytes written here. 254162306a36Sopenharmony_ci */ 254262306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 254362306a36Sopenharmony_ci xlog_state_finish_copy(log, iclog, record_cnt, 0); 254462306a36Sopenharmony_ci error = xlog_state_release_iclog(log, iclog, ticket); 254562306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_ci return error; 254862306a36Sopenharmony_ci} 254962306a36Sopenharmony_ci 255062306a36Sopenharmony_cistatic void 255162306a36Sopenharmony_cixlog_state_activate_iclog( 255262306a36Sopenharmony_ci struct xlog_in_core *iclog, 255362306a36Sopenharmony_ci int *iclogs_changed) 255462306a36Sopenharmony_ci{ 255562306a36Sopenharmony_ci ASSERT(list_empty_careful(&iclog->ic_callbacks)); 255662306a36Sopenharmony_ci trace_xlog_iclog_activate(iclog, _RET_IP_); 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci /* 255962306a36Sopenharmony_ci * If the number of ops in this iclog indicate it just contains the 256062306a36Sopenharmony_ci * dummy transaction, we can change state into IDLE (the second time 256162306a36Sopenharmony_ci * around). Otherwise we should change the state into NEED a dummy. 256262306a36Sopenharmony_ci * We don't need to cover the dummy. 256362306a36Sopenharmony_ci */ 256462306a36Sopenharmony_ci if (*iclogs_changed == 0 && 256562306a36Sopenharmony_ci iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) { 256662306a36Sopenharmony_ci *iclogs_changed = 1; 256762306a36Sopenharmony_ci } else { 256862306a36Sopenharmony_ci /* 256962306a36Sopenharmony_ci * We have two dirty iclogs so start over. This could also be 257062306a36Sopenharmony_ci * num of ops indicating this is not the dummy going out. 257162306a36Sopenharmony_ci */ 257262306a36Sopenharmony_ci *iclogs_changed = 2; 257362306a36Sopenharmony_ci } 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci iclog->ic_state = XLOG_STATE_ACTIVE; 257662306a36Sopenharmony_ci iclog->ic_offset = 0; 257762306a36Sopenharmony_ci iclog->ic_header.h_num_logops = 0; 257862306a36Sopenharmony_ci memset(iclog->ic_header.h_cycle_data, 0, 257962306a36Sopenharmony_ci sizeof(iclog->ic_header.h_cycle_data)); 258062306a36Sopenharmony_ci iclog->ic_header.h_lsn = 0; 258162306a36Sopenharmony_ci iclog->ic_header.h_tail_lsn = 0; 258262306a36Sopenharmony_ci} 258362306a36Sopenharmony_ci 258462306a36Sopenharmony_ci/* 258562306a36Sopenharmony_ci * Loop through all iclogs and mark all iclogs currently marked DIRTY as 258662306a36Sopenharmony_ci * ACTIVE after iclog I/O has completed. 258762306a36Sopenharmony_ci */ 258862306a36Sopenharmony_cistatic void 258962306a36Sopenharmony_cixlog_state_activate_iclogs( 259062306a36Sopenharmony_ci struct xlog *log, 259162306a36Sopenharmony_ci int *iclogs_changed) 259262306a36Sopenharmony_ci{ 259362306a36Sopenharmony_ci struct xlog_in_core *iclog = log->l_iclog; 259462306a36Sopenharmony_ci 259562306a36Sopenharmony_ci do { 259662306a36Sopenharmony_ci if (iclog->ic_state == XLOG_STATE_DIRTY) 259762306a36Sopenharmony_ci xlog_state_activate_iclog(iclog, iclogs_changed); 259862306a36Sopenharmony_ci /* 259962306a36Sopenharmony_ci * The ordering of marking iclogs ACTIVE must be maintained, so 260062306a36Sopenharmony_ci * an iclog doesn't become ACTIVE beyond one that is SYNCING. 260162306a36Sopenharmony_ci */ 260262306a36Sopenharmony_ci else if (iclog->ic_state != XLOG_STATE_ACTIVE) 260362306a36Sopenharmony_ci break; 260462306a36Sopenharmony_ci } while ((iclog = iclog->ic_next) != log->l_iclog); 260562306a36Sopenharmony_ci} 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_cistatic int 260862306a36Sopenharmony_cixlog_covered_state( 260962306a36Sopenharmony_ci int prev_state, 261062306a36Sopenharmony_ci int iclogs_changed) 261162306a36Sopenharmony_ci{ 261262306a36Sopenharmony_ci /* 261362306a36Sopenharmony_ci * We go to NEED for any non-covering writes. We go to NEED2 if we just 261462306a36Sopenharmony_ci * wrote the first covering record (DONE). We go to IDLE if we just 261562306a36Sopenharmony_ci * wrote the second covering record (DONE2) and remain in IDLE until a 261662306a36Sopenharmony_ci * non-covering write occurs. 261762306a36Sopenharmony_ci */ 261862306a36Sopenharmony_ci switch (prev_state) { 261962306a36Sopenharmony_ci case XLOG_STATE_COVER_IDLE: 262062306a36Sopenharmony_ci if (iclogs_changed == 1) 262162306a36Sopenharmony_ci return XLOG_STATE_COVER_IDLE; 262262306a36Sopenharmony_ci fallthrough; 262362306a36Sopenharmony_ci case XLOG_STATE_COVER_NEED: 262462306a36Sopenharmony_ci case XLOG_STATE_COVER_NEED2: 262562306a36Sopenharmony_ci break; 262662306a36Sopenharmony_ci case XLOG_STATE_COVER_DONE: 262762306a36Sopenharmony_ci if (iclogs_changed == 1) 262862306a36Sopenharmony_ci return XLOG_STATE_COVER_NEED2; 262962306a36Sopenharmony_ci break; 263062306a36Sopenharmony_ci case XLOG_STATE_COVER_DONE2: 263162306a36Sopenharmony_ci if (iclogs_changed == 1) 263262306a36Sopenharmony_ci return XLOG_STATE_COVER_IDLE; 263362306a36Sopenharmony_ci break; 263462306a36Sopenharmony_ci default: 263562306a36Sopenharmony_ci ASSERT(0); 263662306a36Sopenharmony_ci } 263762306a36Sopenharmony_ci 263862306a36Sopenharmony_ci return XLOG_STATE_COVER_NEED; 263962306a36Sopenharmony_ci} 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ciSTATIC void 264262306a36Sopenharmony_cixlog_state_clean_iclog( 264362306a36Sopenharmony_ci struct xlog *log, 264462306a36Sopenharmony_ci struct xlog_in_core *dirty_iclog) 264562306a36Sopenharmony_ci{ 264662306a36Sopenharmony_ci int iclogs_changed = 0; 264762306a36Sopenharmony_ci 264862306a36Sopenharmony_ci trace_xlog_iclog_clean(dirty_iclog, _RET_IP_); 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci dirty_iclog->ic_state = XLOG_STATE_DIRTY; 265162306a36Sopenharmony_ci 265262306a36Sopenharmony_ci xlog_state_activate_iclogs(log, &iclogs_changed); 265362306a36Sopenharmony_ci wake_up_all(&dirty_iclog->ic_force_wait); 265462306a36Sopenharmony_ci 265562306a36Sopenharmony_ci if (iclogs_changed) { 265662306a36Sopenharmony_ci log->l_covered_state = xlog_covered_state(log->l_covered_state, 265762306a36Sopenharmony_ci iclogs_changed); 265862306a36Sopenharmony_ci } 265962306a36Sopenharmony_ci} 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ciSTATIC xfs_lsn_t 266262306a36Sopenharmony_cixlog_get_lowest_lsn( 266362306a36Sopenharmony_ci struct xlog *log) 266462306a36Sopenharmony_ci{ 266562306a36Sopenharmony_ci struct xlog_in_core *iclog = log->l_iclog; 266662306a36Sopenharmony_ci xfs_lsn_t lowest_lsn = 0, lsn; 266762306a36Sopenharmony_ci 266862306a36Sopenharmony_ci do { 266962306a36Sopenharmony_ci if (iclog->ic_state == XLOG_STATE_ACTIVE || 267062306a36Sopenharmony_ci iclog->ic_state == XLOG_STATE_DIRTY) 267162306a36Sopenharmony_ci continue; 267262306a36Sopenharmony_ci 267362306a36Sopenharmony_ci lsn = be64_to_cpu(iclog->ic_header.h_lsn); 267462306a36Sopenharmony_ci if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0) 267562306a36Sopenharmony_ci lowest_lsn = lsn; 267662306a36Sopenharmony_ci } while ((iclog = iclog->ic_next) != log->l_iclog); 267762306a36Sopenharmony_ci 267862306a36Sopenharmony_ci return lowest_lsn; 267962306a36Sopenharmony_ci} 268062306a36Sopenharmony_ci 268162306a36Sopenharmony_ci/* 268262306a36Sopenharmony_ci * Completion of a iclog IO does not imply that a transaction has completed, as 268362306a36Sopenharmony_ci * transactions can be large enough to span many iclogs. We cannot change the 268462306a36Sopenharmony_ci * tail of the log half way through a transaction as this may be the only 268562306a36Sopenharmony_ci * transaction in the log and moving the tail to point to the middle of it 268662306a36Sopenharmony_ci * will prevent recovery from finding the start of the transaction. Hence we 268762306a36Sopenharmony_ci * should only update the last_sync_lsn if this iclog contains transaction 268862306a36Sopenharmony_ci * completion callbacks on it. 268962306a36Sopenharmony_ci * 269062306a36Sopenharmony_ci * We have to do this before we drop the icloglock to ensure we are the only one 269162306a36Sopenharmony_ci * that can update it. 269262306a36Sopenharmony_ci * 269362306a36Sopenharmony_ci * If we are moving the last_sync_lsn forwards, we also need to ensure we kick 269462306a36Sopenharmony_ci * the reservation grant head pushing. This is due to the fact that the push 269562306a36Sopenharmony_ci * target is bound by the current last_sync_lsn value. Hence if we have a large 269662306a36Sopenharmony_ci * amount of log space bound up in this committing transaction then the 269762306a36Sopenharmony_ci * last_sync_lsn value may be the limiting factor preventing tail pushing from 269862306a36Sopenharmony_ci * freeing space in the log. Hence once we've updated the last_sync_lsn we 269962306a36Sopenharmony_ci * should push the AIL to ensure the push target (and hence the grant head) is 270062306a36Sopenharmony_ci * no longer bound by the old log head location and can move forwards and make 270162306a36Sopenharmony_ci * progress again. 270262306a36Sopenharmony_ci */ 270362306a36Sopenharmony_cistatic void 270462306a36Sopenharmony_cixlog_state_set_callback( 270562306a36Sopenharmony_ci struct xlog *log, 270662306a36Sopenharmony_ci struct xlog_in_core *iclog, 270762306a36Sopenharmony_ci xfs_lsn_t header_lsn) 270862306a36Sopenharmony_ci{ 270962306a36Sopenharmony_ci trace_xlog_iclog_callback(iclog, _RET_IP_); 271062306a36Sopenharmony_ci iclog->ic_state = XLOG_STATE_CALLBACK; 271162306a36Sopenharmony_ci 271262306a36Sopenharmony_ci ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 271362306a36Sopenharmony_ci header_lsn) <= 0); 271462306a36Sopenharmony_ci 271562306a36Sopenharmony_ci if (list_empty_careful(&iclog->ic_callbacks)) 271662306a36Sopenharmony_ci return; 271762306a36Sopenharmony_ci 271862306a36Sopenharmony_ci atomic64_set(&log->l_last_sync_lsn, header_lsn); 271962306a36Sopenharmony_ci xlog_grant_push_ail(log, 0); 272062306a36Sopenharmony_ci} 272162306a36Sopenharmony_ci 272262306a36Sopenharmony_ci/* 272362306a36Sopenharmony_ci * Return true if we need to stop processing, false to continue to the next 272462306a36Sopenharmony_ci * iclog. The caller will need to run callbacks if the iclog is returned in the 272562306a36Sopenharmony_ci * XLOG_STATE_CALLBACK state. 272662306a36Sopenharmony_ci */ 272762306a36Sopenharmony_cistatic bool 272862306a36Sopenharmony_cixlog_state_iodone_process_iclog( 272962306a36Sopenharmony_ci struct xlog *log, 273062306a36Sopenharmony_ci struct xlog_in_core *iclog) 273162306a36Sopenharmony_ci{ 273262306a36Sopenharmony_ci xfs_lsn_t lowest_lsn; 273362306a36Sopenharmony_ci xfs_lsn_t header_lsn; 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci switch (iclog->ic_state) { 273662306a36Sopenharmony_ci case XLOG_STATE_ACTIVE: 273762306a36Sopenharmony_ci case XLOG_STATE_DIRTY: 273862306a36Sopenharmony_ci /* 273962306a36Sopenharmony_ci * Skip all iclogs in the ACTIVE & DIRTY states: 274062306a36Sopenharmony_ci */ 274162306a36Sopenharmony_ci return false; 274262306a36Sopenharmony_ci case XLOG_STATE_DONE_SYNC: 274362306a36Sopenharmony_ci /* 274462306a36Sopenharmony_ci * Now that we have an iclog that is in the DONE_SYNC state, do 274562306a36Sopenharmony_ci * one more check here to see if we have chased our tail around. 274662306a36Sopenharmony_ci * If this is not the lowest lsn iclog, then we will leave it 274762306a36Sopenharmony_ci * for another completion to process. 274862306a36Sopenharmony_ci */ 274962306a36Sopenharmony_ci header_lsn = be64_to_cpu(iclog->ic_header.h_lsn); 275062306a36Sopenharmony_ci lowest_lsn = xlog_get_lowest_lsn(log); 275162306a36Sopenharmony_ci if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) 275262306a36Sopenharmony_ci return false; 275362306a36Sopenharmony_ci xlog_state_set_callback(log, iclog, header_lsn); 275462306a36Sopenharmony_ci return false; 275562306a36Sopenharmony_ci default: 275662306a36Sopenharmony_ci /* 275762306a36Sopenharmony_ci * Can only perform callbacks in order. Since this iclog is not 275862306a36Sopenharmony_ci * in the DONE_SYNC state, we skip the rest and just try to 275962306a36Sopenharmony_ci * clean up. 276062306a36Sopenharmony_ci */ 276162306a36Sopenharmony_ci return true; 276262306a36Sopenharmony_ci } 276362306a36Sopenharmony_ci} 276462306a36Sopenharmony_ci 276562306a36Sopenharmony_ci/* 276662306a36Sopenharmony_ci * Loop over all the iclogs, running attached callbacks on them. Return true if 276762306a36Sopenharmony_ci * we ran any callbacks, indicating that we dropped the icloglock. We don't need 276862306a36Sopenharmony_ci * to handle transient shutdown state here at all because 276962306a36Sopenharmony_ci * xlog_state_shutdown_callbacks() will be run to do the necessary shutdown 277062306a36Sopenharmony_ci * cleanup of the callbacks. 277162306a36Sopenharmony_ci */ 277262306a36Sopenharmony_cistatic bool 277362306a36Sopenharmony_cixlog_state_do_iclog_callbacks( 277462306a36Sopenharmony_ci struct xlog *log) 277562306a36Sopenharmony_ci __releases(&log->l_icloglock) 277662306a36Sopenharmony_ci __acquires(&log->l_icloglock) 277762306a36Sopenharmony_ci{ 277862306a36Sopenharmony_ci struct xlog_in_core *first_iclog = log->l_iclog; 277962306a36Sopenharmony_ci struct xlog_in_core *iclog = first_iclog; 278062306a36Sopenharmony_ci bool ran_callback = false; 278162306a36Sopenharmony_ci 278262306a36Sopenharmony_ci do { 278362306a36Sopenharmony_ci LIST_HEAD(cb_list); 278462306a36Sopenharmony_ci 278562306a36Sopenharmony_ci if (xlog_state_iodone_process_iclog(log, iclog)) 278662306a36Sopenharmony_ci break; 278762306a36Sopenharmony_ci if (iclog->ic_state != XLOG_STATE_CALLBACK) { 278862306a36Sopenharmony_ci iclog = iclog->ic_next; 278962306a36Sopenharmony_ci continue; 279062306a36Sopenharmony_ci } 279162306a36Sopenharmony_ci list_splice_init(&iclog->ic_callbacks, &cb_list); 279262306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 279362306a36Sopenharmony_ci 279462306a36Sopenharmony_ci trace_xlog_iclog_callbacks_start(iclog, _RET_IP_); 279562306a36Sopenharmony_ci xlog_cil_process_committed(&cb_list); 279662306a36Sopenharmony_ci trace_xlog_iclog_callbacks_done(iclog, _RET_IP_); 279762306a36Sopenharmony_ci ran_callback = true; 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 280062306a36Sopenharmony_ci xlog_state_clean_iclog(log, iclog); 280162306a36Sopenharmony_ci iclog = iclog->ic_next; 280262306a36Sopenharmony_ci } while (iclog != first_iclog); 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci return ran_callback; 280562306a36Sopenharmony_ci} 280662306a36Sopenharmony_ci 280762306a36Sopenharmony_ci 280862306a36Sopenharmony_ci/* 280962306a36Sopenharmony_ci * Loop running iclog completion callbacks until there are no more iclogs in a 281062306a36Sopenharmony_ci * state that can run callbacks. 281162306a36Sopenharmony_ci */ 281262306a36Sopenharmony_ciSTATIC void 281362306a36Sopenharmony_cixlog_state_do_callback( 281462306a36Sopenharmony_ci struct xlog *log) 281562306a36Sopenharmony_ci{ 281662306a36Sopenharmony_ci int flushcnt = 0; 281762306a36Sopenharmony_ci int repeats = 0; 281862306a36Sopenharmony_ci 281962306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 282062306a36Sopenharmony_ci while (xlog_state_do_iclog_callbacks(log)) { 282162306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 282262306a36Sopenharmony_ci break; 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci if (++repeats > 5000) { 282562306a36Sopenharmony_ci flushcnt += repeats; 282662306a36Sopenharmony_ci repeats = 0; 282762306a36Sopenharmony_ci xfs_warn(log->l_mp, 282862306a36Sopenharmony_ci "%s: possible infinite loop (%d iterations)", 282962306a36Sopenharmony_ci __func__, flushcnt); 283062306a36Sopenharmony_ci } 283162306a36Sopenharmony_ci } 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE) 283462306a36Sopenharmony_ci wake_up_all(&log->l_flush_wait); 283562306a36Sopenharmony_ci 283662306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 283762306a36Sopenharmony_ci} 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci 284062306a36Sopenharmony_ci/* 284162306a36Sopenharmony_ci * Finish transitioning this iclog to the dirty state. 284262306a36Sopenharmony_ci * 284362306a36Sopenharmony_ci * Callbacks could take time, so they are done outside the scope of the 284462306a36Sopenharmony_ci * global state machine log lock. 284562306a36Sopenharmony_ci */ 284662306a36Sopenharmony_ciSTATIC void 284762306a36Sopenharmony_cixlog_state_done_syncing( 284862306a36Sopenharmony_ci struct xlog_in_core *iclog) 284962306a36Sopenharmony_ci{ 285062306a36Sopenharmony_ci struct xlog *log = iclog->ic_log; 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 285362306a36Sopenharmony_ci ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 285462306a36Sopenharmony_ci trace_xlog_iclog_sync_done(iclog, _RET_IP_); 285562306a36Sopenharmony_ci 285662306a36Sopenharmony_ci /* 285762306a36Sopenharmony_ci * If we got an error, either on the first buffer, or in the case of 285862306a36Sopenharmony_ci * split log writes, on the second, we shut down the file system and 285962306a36Sopenharmony_ci * no iclogs should ever be attempted to be written to disk again. 286062306a36Sopenharmony_ci */ 286162306a36Sopenharmony_ci if (!xlog_is_shutdown(log)) { 286262306a36Sopenharmony_ci ASSERT(iclog->ic_state == XLOG_STATE_SYNCING); 286362306a36Sopenharmony_ci iclog->ic_state = XLOG_STATE_DONE_SYNC; 286462306a36Sopenharmony_ci } 286562306a36Sopenharmony_ci 286662306a36Sopenharmony_ci /* 286762306a36Sopenharmony_ci * Someone could be sleeping prior to writing out the next 286862306a36Sopenharmony_ci * iclog buffer, we wake them all, one will get to do the 286962306a36Sopenharmony_ci * I/O, the others get to wait for the result. 287062306a36Sopenharmony_ci */ 287162306a36Sopenharmony_ci wake_up_all(&iclog->ic_write_wait); 287262306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 287362306a36Sopenharmony_ci xlog_state_do_callback(log); 287462306a36Sopenharmony_ci} 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci/* 287762306a36Sopenharmony_ci * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must 287862306a36Sopenharmony_ci * sleep. We wait on the flush queue on the head iclog as that should be 287962306a36Sopenharmony_ci * the first iclog to complete flushing. Hence if all iclogs are syncing, 288062306a36Sopenharmony_ci * we will wait here and all new writes will sleep until a sync completes. 288162306a36Sopenharmony_ci * 288262306a36Sopenharmony_ci * The in-core logs are used in a circular fashion. They are not used 288362306a36Sopenharmony_ci * out-of-order even when an iclog past the head is free. 288462306a36Sopenharmony_ci * 288562306a36Sopenharmony_ci * return: 288662306a36Sopenharmony_ci * * log_offset where xlog_write() can start writing into the in-core 288762306a36Sopenharmony_ci * log's data space. 288862306a36Sopenharmony_ci * * in-core log pointer to which xlog_write() should write. 288962306a36Sopenharmony_ci * * boolean indicating this is a continued write to an in-core log. 289062306a36Sopenharmony_ci * If this is the last write, then the in-core log's offset field 289162306a36Sopenharmony_ci * needs to be incremented, depending on the amount of data which 289262306a36Sopenharmony_ci * is copied. 289362306a36Sopenharmony_ci */ 289462306a36Sopenharmony_ciSTATIC int 289562306a36Sopenharmony_cixlog_state_get_iclog_space( 289662306a36Sopenharmony_ci struct xlog *log, 289762306a36Sopenharmony_ci int len, 289862306a36Sopenharmony_ci struct xlog_in_core **iclogp, 289962306a36Sopenharmony_ci struct xlog_ticket *ticket, 290062306a36Sopenharmony_ci int *logoffsetp) 290162306a36Sopenharmony_ci{ 290262306a36Sopenharmony_ci int log_offset; 290362306a36Sopenharmony_ci xlog_rec_header_t *head; 290462306a36Sopenharmony_ci xlog_in_core_t *iclog; 290562306a36Sopenharmony_ci 290662306a36Sopenharmony_cirestart: 290762306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 290862306a36Sopenharmony_ci if (xlog_is_shutdown(log)) { 290962306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 291062306a36Sopenharmony_ci return -EIO; 291162306a36Sopenharmony_ci } 291262306a36Sopenharmony_ci 291362306a36Sopenharmony_ci iclog = log->l_iclog; 291462306a36Sopenharmony_ci if (iclog->ic_state != XLOG_STATE_ACTIVE) { 291562306a36Sopenharmony_ci XFS_STATS_INC(log->l_mp, xs_log_noiclogs); 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci /* Wait for log writes to have flushed */ 291862306a36Sopenharmony_ci xlog_wait(&log->l_flush_wait, &log->l_icloglock); 291962306a36Sopenharmony_ci goto restart; 292062306a36Sopenharmony_ci } 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_ci head = &iclog->ic_header; 292362306a36Sopenharmony_ci 292462306a36Sopenharmony_ci atomic_inc(&iclog->ic_refcnt); /* prevents sync */ 292562306a36Sopenharmony_ci log_offset = iclog->ic_offset; 292662306a36Sopenharmony_ci 292762306a36Sopenharmony_ci trace_xlog_iclog_get_space(iclog, _RET_IP_); 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci /* On the 1st write to an iclog, figure out lsn. This works 293062306a36Sopenharmony_ci * if iclogs marked XLOG_STATE_WANT_SYNC always write out what they are 293162306a36Sopenharmony_ci * committing to. If the offset is set, that's how many blocks 293262306a36Sopenharmony_ci * must be written. 293362306a36Sopenharmony_ci */ 293462306a36Sopenharmony_ci if (log_offset == 0) { 293562306a36Sopenharmony_ci ticket->t_curr_res -= log->l_iclog_hsize; 293662306a36Sopenharmony_ci head->h_cycle = cpu_to_be32(log->l_curr_cycle); 293762306a36Sopenharmony_ci head->h_lsn = cpu_to_be64( 293862306a36Sopenharmony_ci xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); 293962306a36Sopenharmony_ci ASSERT(log->l_curr_block >= 0); 294062306a36Sopenharmony_ci } 294162306a36Sopenharmony_ci 294262306a36Sopenharmony_ci /* If there is enough room to write everything, then do it. Otherwise, 294362306a36Sopenharmony_ci * claim the rest of the region and make sure the XLOG_STATE_WANT_SYNC 294462306a36Sopenharmony_ci * bit is on, so this will get flushed out. Don't update ic_offset 294562306a36Sopenharmony_ci * until you know exactly how many bytes get copied. Therefore, wait 294662306a36Sopenharmony_ci * until later to update ic_offset. 294762306a36Sopenharmony_ci * 294862306a36Sopenharmony_ci * xlog_write() algorithm assumes that at least 2 xlog_op_header_t's 294962306a36Sopenharmony_ci * can fit into remaining data section. 295062306a36Sopenharmony_ci */ 295162306a36Sopenharmony_ci if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { 295262306a36Sopenharmony_ci int error = 0; 295362306a36Sopenharmony_ci 295462306a36Sopenharmony_ci xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 295562306a36Sopenharmony_ci 295662306a36Sopenharmony_ci /* 295762306a36Sopenharmony_ci * If we are the only one writing to this iclog, sync it to 295862306a36Sopenharmony_ci * disk. We need to do an atomic compare and decrement here to 295962306a36Sopenharmony_ci * avoid racing with concurrent atomic_dec_and_lock() calls in 296062306a36Sopenharmony_ci * xlog_state_release_iclog() when there is more than one 296162306a36Sopenharmony_ci * reference to the iclog. 296262306a36Sopenharmony_ci */ 296362306a36Sopenharmony_ci if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) 296462306a36Sopenharmony_ci error = xlog_state_release_iclog(log, iclog, ticket); 296562306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 296662306a36Sopenharmony_ci if (error) 296762306a36Sopenharmony_ci return error; 296862306a36Sopenharmony_ci goto restart; 296962306a36Sopenharmony_ci } 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_ci /* Do we have enough room to write the full amount in the remainder 297262306a36Sopenharmony_ci * of this iclog? Or must we continue a write on the next iclog and 297362306a36Sopenharmony_ci * mark this iclog as completely taken? In the case where we switch 297462306a36Sopenharmony_ci * iclogs (to mark it taken), this particular iclog will release/sync 297562306a36Sopenharmony_ci * to disk in xlog_write(). 297662306a36Sopenharmony_ci */ 297762306a36Sopenharmony_ci if (len <= iclog->ic_size - iclog->ic_offset) 297862306a36Sopenharmony_ci iclog->ic_offset += len; 297962306a36Sopenharmony_ci else 298062306a36Sopenharmony_ci xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 298162306a36Sopenharmony_ci *iclogp = iclog; 298262306a36Sopenharmony_ci 298362306a36Sopenharmony_ci ASSERT(iclog->ic_offset <= iclog->ic_size); 298462306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 298562306a36Sopenharmony_ci 298662306a36Sopenharmony_ci *logoffsetp = log_offset; 298762306a36Sopenharmony_ci return 0; 298862306a36Sopenharmony_ci} 298962306a36Sopenharmony_ci 299062306a36Sopenharmony_ci/* 299162306a36Sopenharmony_ci * The first cnt-1 times a ticket goes through here we don't need to move the 299262306a36Sopenharmony_ci * grant write head because the permanent reservation has reserved cnt times the 299362306a36Sopenharmony_ci * unit amount. Release part of current permanent unit reservation and reset 299462306a36Sopenharmony_ci * current reservation to be one units worth. Also move grant reservation head 299562306a36Sopenharmony_ci * forward. 299662306a36Sopenharmony_ci */ 299762306a36Sopenharmony_civoid 299862306a36Sopenharmony_cixfs_log_ticket_regrant( 299962306a36Sopenharmony_ci struct xlog *log, 300062306a36Sopenharmony_ci struct xlog_ticket *ticket) 300162306a36Sopenharmony_ci{ 300262306a36Sopenharmony_ci trace_xfs_log_ticket_regrant(log, ticket); 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ci if (ticket->t_cnt > 0) 300562306a36Sopenharmony_ci ticket->t_cnt--; 300662306a36Sopenharmony_ci 300762306a36Sopenharmony_ci xlog_grant_sub_space(log, &log->l_reserve_head.grant, 300862306a36Sopenharmony_ci ticket->t_curr_res); 300962306a36Sopenharmony_ci xlog_grant_sub_space(log, &log->l_write_head.grant, 301062306a36Sopenharmony_ci ticket->t_curr_res); 301162306a36Sopenharmony_ci ticket->t_curr_res = ticket->t_unit_res; 301262306a36Sopenharmony_ci 301362306a36Sopenharmony_ci trace_xfs_log_ticket_regrant_sub(log, ticket); 301462306a36Sopenharmony_ci 301562306a36Sopenharmony_ci /* just return if we still have some of the pre-reserved space */ 301662306a36Sopenharmony_ci if (!ticket->t_cnt) { 301762306a36Sopenharmony_ci xlog_grant_add_space(log, &log->l_reserve_head.grant, 301862306a36Sopenharmony_ci ticket->t_unit_res); 301962306a36Sopenharmony_ci trace_xfs_log_ticket_regrant_exit(log, ticket); 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_ci ticket->t_curr_res = ticket->t_unit_res; 302262306a36Sopenharmony_ci } 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci xfs_log_ticket_put(ticket); 302562306a36Sopenharmony_ci} 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci/* 302862306a36Sopenharmony_ci * Give back the space left from a reservation. 302962306a36Sopenharmony_ci * 303062306a36Sopenharmony_ci * All the information we need to make a correct determination of space left 303162306a36Sopenharmony_ci * is present. For non-permanent reservations, things are quite easy. The 303262306a36Sopenharmony_ci * count should have been decremented to zero. We only need to deal with the 303362306a36Sopenharmony_ci * space remaining in the current reservation part of the ticket. If the 303462306a36Sopenharmony_ci * ticket contains a permanent reservation, there may be left over space which 303562306a36Sopenharmony_ci * needs to be released. A count of N means that N-1 refills of the current 303662306a36Sopenharmony_ci * reservation can be done before we need to ask for more space. The first 303762306a36Sopenharmony_ci * one goes to fill up the first current reservation. Once we run out of 303862306a36Sopenharmony_ci * space, the count will stay at zero and the only space remaining will be 303962306a36Sopenharmony_ci * in the current reservation field. 304062306a36Sopenharmony_ci */ 304162306a36Sopenharmony_civoid 304262306a36Sopenharmony_cixfs_log_ticket_ungrant( 304362306a36Sopenharmony_ci struct xlog *log, 304462306a36Sopenharmony_ci struct xlog_ticket *ticket) 304562306a36Sopenharmony_ci{ 304662306a36Sopenharmony_ci int bytes; 304762306a36Sopenharmony_ci 304862306a36Sopenharmony_ci trace_xfs_log_ticket_ungrant(log, ticket); 304962306a36Sopenharmony_ci 305062306a36Sopenharmony_ci if (ticket->t_cnt > 0) 305162306a36Sopenharmony_ci ticket->t_cnt--; 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_ci trace_xfs_log_ticket_ungrant_sub(log, ticket); 305462306a36Sopenharmony_ci 305562306a36Sopenharmony_ci /* 305662306a36Sopenharmony_ci * If this is a permanent reservation ticket, we may be able to free 305762306a36Sopenharmony_ci * up more space based on the remaining count. 305862306a36Sopenharmony_ci */ 305962306a36Sopenharmony_ci bytes = ticket->t_curr_res; 306062306a36Sopenharmony_ci if (ticket->t_cnt > 0) { 306162306a36Sopenharmony_ci ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 306262306a36Sopenharmony_ci bytes += ticket->t_unit_res*ticket->t_cnt; 306362306a36Sopenharmony_ci } 306462306a36Sopenharmony_ci 306562306a36Sopenharmony_ci xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); 306662306a36Sopenharmony_ci xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); 306762306a36Sopenharmony_ci 306862306a36Sopenharmony_ci trace_xfs_log_ticket_ungrant_exit(log, ticket); 306962306a36Sopenharmony_ci 307062306a36Sopenharmony_ci xfs_log_space_wake(log->l_mp); 307162306a36Sopenharmony_ci xfs_log_ticket_put(ticket); 307262306a36Sopenharmony_ci} 307362306a36Sopenharmony_ci 307462306a36Sopenharmony_ci/* 307562306a36Sopenharmony_ci * This routine will mark the current iclog in the ring as WANT_SYNC and move 307662306a36Sopenharmony_ci * the current iclog pointer to the next iclog in the ring. 307762306a36Sopenharmony_ci */ 307862306a36Sopenharmony_civoid 307962306a36Sopenharmony_cixlog_state_switch_iclogs( 308062306a36Sopenharmony_ci struct xlog *log, 308162306a36Sopenharmony_ci struct xlog_in_core *iclog, 308262306a36Sopenharmony_ci int eventual_size) 308362306a36Sopenharmony_ci{ 308462306a36Sopenharmony_ci ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 308562306a36Sopenharmony_ci assert_spin_locked(&log->l_icloglock); 308662306a36Sopenharmony_ci trace_xlog_iclog_switch(iclog, _RET_IP_); 308762306a36Sopenharmony_ci 308862306a36Sopenharmony_ci if (!eventual_size) 308962306a36Sopenharmony_ci eventual_size = iclog->ic_offset; 309062306a36Sopenharmony_ci iclog->ic_state = XLOG_STATE_WANT_SYNC; 309162306a36Sopenharmony_ci iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block); 309262306a36Sopenharmony_ci log->l_prev_block = log->l_curr_block; 309362306a36Sopenharmony_ci log->l_prev_cycle = log->l_curr_cycle; 309462306a36Sopenharmony_ci 309562306a36Sopenharmony_ci /* roll log?: ic_offset changed later */ 309662306a36Sopenharmony_ci log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize); 309762306a36Sopenharmony_ci 309862306a36Sopenharmony_ci /* Round up to next log-sunit */ 309962306a36Sopenharmony_ci if (log->l_iclog_roundoff > BBSIZE) { 310062306a36Sopenharmony_ci uint32_t sunit_bb = BTOBB(log->l_iclog_roundoff); 310162306a36Sopenharmony_ci log->l_curr_block = roundup(log->l_curr_block, sunit_bb); 310262306a36Sopenharmony_ci } 310362306a36Sopenharmony_ci 310462306a36Sopenharmony_ci if (log->l_curr_block >= log->l_logBBsize) { 310562306a36Sopenharmony_ci /* 310662306a36Sopenharmony_ci * Rewind the current block before the cycle is bumped to make 310762306a36Sopenharmony_ci * sure that the combined LSN never transiently moves forward 310862306a36Sopenharmony_ci * when the log wraps to the next cycle. This is to support the 310962306a36Sopenharmony_ci * unlocked sample of these fields from xlog_valid_lsn(). Most 311062306a36Sopenharmony_ci * other cases should acquire l_icloglock. 311162306a36Sopenharmony_ci */ 311262306a36Sopenharmony_ci log->l_curr_block -= log->l_logBBsize; 311362306a36Sopenharmony_ci ASSERT(log->l_curr_block >= 0); 311462306a36Sopenharmony_ci smp_wmb(); 311562306a36Sopenharmony_ci log->l_curr_cycle++; 311662306a36Sopenharmony_ci if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) 311762306a36Sopenharmony_ci log->l_curr_cycle++; 311862306a36Sopenharmony_ci } 311962306a36Sopenharmony_ci ASSERT(iclog == log->l_iclog); 312062306a36Sopenharmony_ci log->l_iclog = iclog->ic_next; 312162306a36Sopenharmony_ci} 312262306a36Sopenharmony_ci 312362306a36Sopenharmony_ci/* 312462306a36Sopenharmony_ci * Force the iclog to disk and check if the iclog has been completed before 312562306a36Sopenharmony_ci * xlog_force_iclog() returns. This can happen on synchronous (e.g. 312662306a36Sopenharmony_ci * pmem) or fast async storage because we drop the icloglock to issue the IO. 312762306a36Sopenharmony_ci * If completion has already occurred, tell the caller so that it can avoid an 312862306a36Sopenharmony_ci * unnecessary wait on the iclog. 312962306a36Sopenharmony_ci */ 313062306a36Sopenharmony_cistatic int 313162306a36Sopenharmony_cixlog_force_and_check_iclog( 313262306a36Sopenharmony_ci struct xlog_in_core *iclog, 313362306a36Sopenharmony_ci bool *completed) 313462306a36Sopenharmony_ci{ 313562306a36Sopenharmony_ci xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); 313662306a36Sopenharmony_ci int error; 313762306a36Sopenharmony_ci 313862306a36Sopenharmony_ci *completed = false; 313962306a36Sopenharmony_ci error = xlog_force_iclog(iclog); 314062306a36Sopenharmony_ci if (error) 314162306a36Sopenharmony_ci return error; 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci /* 314462306a36Sopenharmony_ci * If the iclog has already been completed and reused the header LSN 314562306a36Sopenharmony_ci * will have been rewritten by completion 314662306a36Sopenharmony_ci */ 314762306a36Sopenharmony_ci if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) 314862306a36Sopenharmony_ci *completed = true; 314962306a36Sopenharmony_ci return 0; 315062306a36Sopenharmony_ci} 315162306a36Sopenharmony_ci 315262306a36Sopenharmony_ci/* 315362306a36Sopenharmony_ci * Write out all data in the in-core log as of this exact moment in time. 315462306a36Sopenharmony_ci * 315562306a36Sopenharmony_ci * Data may be written to the in-core log during this call. However, 315662306a36Sopenharmony_ci * we don't guarantee this data will be written out. A change from past 315762306a36Sopenharmony_ci * implementation means this routine will *not* write out zero length LRs. 315862306a36Sopenharmony_ci * 315962306a36Sopenharmony_ci * Basically, we try and perform an intelligent scan of the in-core logs. 316062306a36Sopenharmony_ci * If we determine there is no flushable data, we just return. There is no 316162306a36Sopenharmony_ci * flushable data if: 316262306a36Sopenharmony_ci * 316362306a36Sopenharmony_ci * 1. the current iclog is active and has no data; the previous iclog 316462306a36Sopenharmony_ci * is in the active or dirty state. 316562306a36Sopenharmony_ci * 2. the current iclog is drity, and the previous iclog is in the 316662306a36Sopenharmony_ci * active or dirty state. 316762306a36Sopenharmony_ci * 316862306a36Sopenharmony_ci * We may sleep if: 316962306a36Sopenharmony_ci * 317062306a36Sopenharmony_ci * 1. the current iclog is not in the active nor dirty state. 317162306a36Sopenharmony_ci * 2. the current iclog dirty, and the previous iclog is not in the 317262306a36Sopenharmony_ci * active nor dirty state. 317362306a36Sopenharmony_ci * 3. the current iclog is active, and there is another thread writing 317462306a36Sopenharmony_ci * to this particular iclog. 317562306a36Sopenharmony_ci * 4. a) the current iclog is active and has no other writers 317662306a36Sopenharmony_ci * b) when we return from flushing out this iclog, it is still 317762306a36Sopenharmony_ci * not in the active nor dirty state. 317862306a36Sopenharmony_ci */ 317962306a36Sopenharmony_ciint 318062306a36Sopenharmony_cixfs_log_force( 318162306a36Sopenharmony_ci struct xfs_mount *mp, 318262306a36Sopenharmony_ci uint flags) 318362306a36Sopenharmony_ci{ 318462306a36Sopenharmony_ci struct xlog *log = mp->m_log; 318562306a36Sopenharmony_ci struct xlog_in_core *iclog; 318662306a36Sopenharmony_ci 318762306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_log_force); 318862306a36Sopenharmony_ci trace_xfs_log_force(mp, 0, _RET_IP_); 318962306a36Sopenharmony_ci 319062306a36Sopenharmony_ci xlog_cil_force(log); 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 319362306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 319462306a36Sopenharmony_ci goto out_error; 319562306a36Sopenharmony_ci 319662306a36Sopenharmony_ci iclog = log->l_iclog; 319762306a36Sopenharmony_ci trace_xlog_iclog_force(iclog, _RET_IP_); 319862306a36Sopenharmony_ci 319962306a36Sopenharmony_ci if (iclog->ic_state == XLOG_STATE_DIRTY || 320062306a36Sopenharmony_ci (iclog->ic_state == XLOG_STATE_ACTIVE && 320162306a36Sopenharmony_ci atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) { 320262306a36Sopenharmony_ci /* 320362306a36Sopenharmony_ci * If the head is dirty or (active and empty), then we need to 320462306a36Sopenharmony_ci * look at the previous iclog. 320562306a36Sopenharmony_ci * 320662306a36Sopenharmony_ci * If the previous iclog is active or dirty we are done. There 320762306a36Sopenharmony_ci * is nothing to sync out. Otherwise, we attach ourselves to the 320862306a36Sopenharmony_ci * previous iclog and go to sleep. 320962306a36Sopenharmony_ci */ 321062306a36Sopenharmony_ci iclog = iclog->ic_prev; 321162306a36Sopenharmony_ci } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { 321262306a36Sopenharmony_ci if (atomic_read(&iclog->ic_refcnt) == 0) { 321362306a36Sopenharmony_ci /* We have exclusive access to this iclog. */ 321462306a36Sopenharmony_ci bool completed; 321562306a36Sopenharmony_ci 321662306a36Sopenharmony_ci if (xlog_force_and_check_iclog(iclog, &completed)) 321762306a36Sopenharmony_ci goto out_error; 321862306a36Sopenharmony_ci 321962306a36Sopenharmony_ci if (completed) 322062306a36Sopenharmony_ci goto out_unlock; 322162306a36Sopenharmony_ci } else { 322262306a36Sopenharmony_ci /* 322362306a36Sopenharmony_ci * Someone else is still writing to this iclog, so we 322462306a36Sopenharmony_ci * need to ensure that when they release the iclog it 322562306a36Sopenharmony_ci * gets synced immediately as we may be waiting on it. 322662306a36Sopenharmony_ci */ 322762306a36Sopenharmony_ci xlog_state_switch_iclogs(log, iclog, 0); 322862306a36Sopenharmony_ci } 322962306a36Sopenharmony_ci } 323062306a36Sopenharmony_ci 323162306a36Sopenharmony_ci /* 323262306a36Sopenharmony_ci * The iclog we are about to wait on may contain the checkpoint pushed 323362306a36Sopenharmony_ci * by the above xlog_cil_force() call, but it may not have been pushed 323462306a36Sopenharmony_ci * to disk yet. Like the ACTIVE case above, we need to make sure caches 323562306a36Sopenharmony_ci * are flushed when this iclog is written. 323662306a36Sopenharmony_ci */ 323762306a36Sopenharmony_ci if (iclog->ic_state == XLOG_STATE_WANT_SYNC) 323862306a36Sopenharmony_ci iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; 323962306a36Sopenharmony_ci 324062306a36Sopenharmony_ci if (flags & XFS_LOG_SYNC) 324162306a36Sopenharmony_ci return xlog_wait_on_iclog(iclog); 324262306a36Sopenharmony_ciout_unlock: 324362306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 324462306a36Sopenharmony_ci return 0; 324562306a36Sopenharmony_ciout_error: 324662306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 324762306a36Sopenharmony_ci return -EIO; 324862306a36Sopenharmony_ci} 324962306a36Sopenharmony_ci 325062306a36Sopenharmony_ci/* 325162306a36Sopenharmony_ci * Force the log to a specific LSN. 325262306a36Sopenharmony_ci * 325362306a36Sopenharmony_ci * If an iclog with that lsn can be found: 325462306a36Sopenharmony_ci * If it is in the DIRTY state, just return. 325562306a36Sopenharmony_ci * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC 325662306a36Sopenharmony_ci * state and go to sleep or return. 325762306a36Sopenharmony_ci * If it is in any other state, go to sleep or return. 325862306a36Sopenharmony_ci * 325962306a36Sopenharmony_ci * Synchronous forces are implemented with a wait queue. All callers trying 326062306a36Sopenharmony_ci * to force a given lsn to disk must wait on the queue attached to the 326162306a36Sopenharmony_ci * specific in-core log. When given in-core log finally completes its write 326262306a36Sopenharmony_ci * to disk, that thread will wake up all threads waiting on the queue. 326362306a36Sopenharmony_ci */ 326462306a36Sopenharmony_cistatic int 326562306a36Sopenharmony_cixlog_force_lsn( 326662306a36Sopenharmony_ci struct xlog *log, 326762306a36Sopenharmony_ci xfs_lsn_t lsn, 326862306a36Sopenharmony_ci uint flags, 326962306a36Sopenharmony_ci int *log_flushed, 327062306a36Sopenharmony_ci bool already_slept) 327162306a36Sopenharmony_ci{ 327262306a36Sopenharmony_ci struct xlog_in_core *iclog; 327362306a36Sopenharmony_ci bool completed; 327462306a36Sopenharmony_ci 327562306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 327662306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 327762306a36Sopenharmony_ci goto out_error; 327862306a36Sopenharmony_ci 327962306a36Sopenharmony_ci iclog = log->l_iclog; 328062306a36Sopenharmony_ci while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { 328162306a36Sopenharmony_ci trace_xlog_iclog_force_lsn(iclog, _RET_IP_); 328262306a36Sopenharmony_ci iclog = iclog->ic_next; 328362306a36Sopenharmony_ci if (iclog == log->l_iclog) 328462306a36Sopenharmony_ci goto out_unlock; 328562306a36Sopenharmony_ci } 328662306a36Sopenharmony_ci 328762306a36Sopenharmony_ci switch (iclog->ic_state) { 328862306a36Sopenharmony_ci case XLOG_STATE_ACTIVE: 328962306a36Sopenharmony_ci /* 329062306a36Sopenharmony_ci * We sleep here if we haven't already slept (e.g. this is the 329162306a36Sopenharmony_ci * first time we've looked at the correct iclog buf) and the 329262306a36Sopenharmony_ci * buffer before us is going to be sync'ed. The reason for this 329362306a36Sopenharmony_ci * is that if we are doing sync transactions here, by waiting 329462306a36Sopenharmony_ci * for the previous I/O to complete, we can allow a few more 329562306a36Sopenharmony_ci * transactions into this iclog before we close it down. 329662306a36Sopenharmony_ci * 329762306a36Sopenharmony_ci * Otherwise, we mark the buffer WANT_SYNC, and bump up the 329862306a36Sopenharmony_ci * refcnt so we can release the log (which drops the ref count). 329962306a36Sopenharmony_ci * The state switch keeps new transaction commits from using 330062306a36Sopenharmony_ci * this buffer. When the current commits finish writing into 330162306a36Sopenharmony_ci * the buffer, the refcount will drop to zero and the buffer 330262306a36Sopenharmony_ci * will go out then. 330362306a36Sopenharmony_ci */ 330462306a36Sopenharmony_ci if (!already_slept && 330562306a36Sopenharmony_ci (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC || 330662306a36Sopenharmony_ci iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) { 330762306a36Sopenharmony_ci xlog_wait(&iclog->ic_prev->ic_write_wait, 330862306a36Sopenharmony_ci &log->l_icloglock); 330962306a36Sopenharmony_ci return -EAGAIN; 331062306a36Sopenharmony_ci } 331162306a36Sopenharmony_ci if (xlog_force_and_check_iclog(iclog, &completed)) 331262306a36Sopenharmony_ci goto out_error; 331362306a36Sopenharmony_ci if (log_flushed) 331462306a36Sopenharmony_ci *log_flushed = 1; 331562306a36Sopenharmony_ci if (completed) 331662306a36Sopenharmony_ci goto out_unlock; 331762306a36Sopenharmony_ci break; 331862306a36Sopenharmony_ci case XLOG_STATE_WANT_SYNC: 331962306a36Sopenharmony_ci /* 332062306a36Sopenharmony_ci * This iclog may contain the checkpoint pushed by the 332162306a36Sopenharmony_ci * xlog_cil_force_seq() call, but there are other writers still 332262306a36Sopenharmony_ci * accessing it so it hasn't been pushed to disk yet. Like the 332362306a36Sopenharmony_ci * ACTIVE case above, we need to make sure caches are flushed 332462306a36Sopenharmony_ci * when this iclog is written. 332562306a36Sopenharmony_ci */ 332662306a36Sopenharmony_ci iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; 332762306a36Sopenharmony_ci break; 332862306a36Sopenharmony_ci default: 332962306a36Sopenharmony_ci /* 333062306a36Sopenharmony_ci * The entire checkpoint was written by the CIL force and is on 333162306a36Sopenharmony_ci * its way to disk already. It will be stable when it 333262306a36Sopenharmony_ci * completes, so we don't need to manipulate caches here at all. 333362306a36Sopenharmony_ci * We just need to wait for completion if necessary. 333462306a36Sopenharmony_ci */ 333562306a36Sopenharmony_ci break; 333662306a36Sopenharmony_ci } 333762306a36Sopenharmony_ci 333862306a36Sopenharmony_ci if (flags & XFS_LOG_SYNC) 333962306a36Sopenharmony_ci return xlog_wait_on_iclog(iclog); 334062306a36Sopenharmony_ciout_unlock: 334162306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 334262306a36Sopenharmony_ci return 0; 334362306a36Sopenharmony_ciout_error: 334462306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 334562306a36Sopenharmony_ci return -EIO; 334662306a36Sopenharmony_ci} 334762306a36Sopenharmony_ci 334862306a36Sopenharmony_ci/* 334962306a36Sopenharmony_ci * Force the log to a specific checkpoint sequence. 335062306a36Sopenharmony_ci * 335162306a36Sopenharmony_ci * First force the CIL so that all the required changes have been flushed to the 335262306a36Sopenharmony_ci * iclogs. If the CIL force completed it will return a commit LSN that indicates 335362306a36Sopenharmony_ci * the iclog that needs to be flushed to stable storage. If the caller needs 335462306a36Sopenharmony_ci * a synchronous log force, we will wait on the iclog with the LSN returned by 335562306a36Sopenharmony_ci * xlog_cil_force_seq() to be completed. 335662306a36Sopenharmony_ci */ 335762306a36Sopenharmony_ciint 335862306a36Sopenharmony_cixfs_log_force_seq( 335962306a36Sopenharmony_ci struct xfs_mount *mp, 336062306a36Sopenharmony_ci xfs_csn_t seq, 336162306a36Sopenharmony_ci uint flags, 336262306a36Sopenharmony_ci int *log_flushed) 336362306a36Sopenharmony_ci{ 336462306a36Sopenharmony_ci struct xlog *log = mp->m_log; 336562306a36Sopenharmony_ci xfs_lsn_t lsn; 336662306a36Sopenharmony_ci int ret; 336762306a36Sopenharmony_ci ASSERT(seq != 0); 336862306a36Sopenharmony_ci 336962306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_log_force); 337062306a36Sopenharmony_ci trace_xfs_log_force(mp, seq, _RET_IP_); 337162306a36Sopenharmony_ci 337262306a36Sopenharmony_ci lsn = xlog_cil_force_seq(log, seq); 337362306a36Sopenharmony_ci if (lsn == NULLCOMMITLSN) 337462306a36Sopenharmony_ci return 0; 337562306a36Sopenharmony_ci 337662306a36Sopenharmony_ci ret = xlog_force_lsn(log, lsn, flags, log_flushed, false); 337762306a36Sopenharmony_ci if (ret == -EAGAIN) { 337862306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_log_force_sleep); 337962306a36Sopenharmony_ci ret = xlog_force_lsn(log, lsn, flags, log_flushed, true); 338062306a36Sopenharmony_ci } 338162306a36Sopenharmony_ci return ret; 338262306a36Sopenharmony_ci} 338362306a36Sopenharmony_ci 338462306a36Sopenharmony_ci/* 338562306a36Sopenharmony_ci * Free a used ticket when its refcount falls to zero. 338662306a36Sopenharmony_ci */ 338762306a36Sopenharmony_civoid 338862306a36Sopenharmony_cixfs_log_ticket_put( 338962306a36Sopenharmony_ci xlog_ticket_t *ticket) 339062306a36Sopenharmony_ci{ 339162306a36Sopenharmony_ci ASSERT(atomic_read(&ticket->t_ref) > 0); 339262306a36Sopenharmony_ci if (atomic_dec_and_test(&ticket->t_ref)) 339362306a36Sopenharmony_ci kmem_cache_free(xfs_log_ticket_cache, ticket); 339462306a36Sopenharmony_ci} 339562306a36Sopenharmony_ci 339662306a36Sopenharmony_cixlog_ticket_t * 339762306a36Sopenharmony_cixfs_log_ticket_get( 339862306a36Sopenharmony_ci xlog_ticket_t *ticket) 339962306a36Sopenharmony_ci{ 340062306a36Sopenharmony_ci ASSERT(atomic_read(&ticket->t_ref) > 0); 340162306a36Sopenharmony_ci atomic_inc(&ticket->t_ref); 340262306a36Sopenharmony_ci return ticket; 340362306a36Sopenharmony_ci} 340462306a36Sopenharmony_ci 340562306a36Sopenharmony_ci/* 340662306a36Sopenharmony_ci * Figure out the total log space unit (in bytes) that would be 340762306a36Sopenharmony_ci * required for a log ticket. 340862306a36Sopenharmony_ci */ 340962306a36Sopenharmony_cistatic int 341062306a36Sopenharmony_cixlog_calc_unit_res( 341162306a36Sopenharmony_ci struct xlog *log, 341262306a36Sopenharmony_ci int unit_bytes, 341362306a36Sopenharmony_ci int *niclogs) 341462306a36Sopenharmony_ci{ 341562306a36Sopenharmony_ci int iclog_space; 341662306a36Sopenharmony_ci uint num_headers; 341762306a36Sopenharmony_ci 341862306a36Sopenharmony_ci /* 341962306a36Sopenharmony_ci * Permanent reservations have up to 'cnt'-1 active log operations 342062306a36Sopenharmony_ci * in the log. A unit in this case is the amount of space for one 342162306a36Sopenharmony_ci * of these log operations. Normal reservations have a cnt of 1 342262306a36Sopenharmony_ci * and their unit amount is the total amount of space required. 342362306a36Sopenharmony_ci * 342462306a36Sopenharmony_ci * The following lines of code account for non-transaction data 342562306a36Sopenharmony_ci * which occupy space in the on-disk log. 342662306a36Sopenharmony_ci * 342762306a36Sopenharmony_ci * Normal form of a transaction is: 342862306a36Sopenharmony_ci * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph> 342962306a36Sopenharmony_ci * and then there are LR hdrs, split-recs and roundoff at end of syncs. 343062306a36Sopenharmony_ci * 343162306a36Sopenharmony_ci * We need to account for all the leadup data and trailer data 343262306a36Sopenharmony_ci * around the transaction data. 343362306a36Sopenharmony_ci * And then we need to account for the worst case in terms of using 343462306a36Sopenharmony_ci * more space. 343562306a36Sopenharmony_ci * The worst case will happen if: 343662306a36Sopenharmony_ci * - the placement of the transaction happens to be such that the 343762306a36Sopenharmony_ci * roundoff is at its maximum 343862306a36Sopenharmony_ci * - the transaction data is synced before the commit record is synced 343962306a36Sopenharmony_ci * i.e. <transaction-data><roundoff> | <commit-rec><roundoff> 344062306a36Sopenharmony_ci * Therefore the commit record is in its own Log Record. 344162306a36Sopenharmony_ci * This can happen as the commit record is called with its 344262306a36Sopenharmony_ci * own region to xlog_write(). 344362306a36Sopenharmony_ci * This then means that in the worst case, roundoff can happen for 344462306a36Sopenharmony_ci * the commit-rec as well. 344562306a36Sopenharmony_ci * The commit-rec is smaller than padding in this scenario and so it is 344662306a36Sopenharmony_ci * not added separately. 344762306a36Sopenharmony_ci */ 344862306a36Sopenharmony_ci 344962306a36Sopenharmony_ci /* for trans header */ 345062306a36Sopenharmony_ci unit_bytes += sizeof(xlog_op_header_t); 345162306a36Sopenharmony_ci unit_bytes += sizeof(xfs_trans_header_t); 345262306a36Sopenharmony_ci 345362306a36Sopenharmony_ci /* for start-rec */ 345462306a36Sopenharmony_ci unit_bytes += sizeof(xlog_op_header_t); 345562306a36Sopenharmony_ci 345662306a36Sopenharmony_ci /* 345762306a36Sopenharmony_ci * for LR headers - the space for data in an iclog is the size minus 345862306a36Sopenharmony_ci * the space used for the headers. If we use the iclog size, then we 345962306a36Sopenharmony_ci * undercalculate the number of headers required. 346062306a36Sopenharmony_ci * 346162306a36Sopenharmony_ci * Furthermore - the addition of op headers for split-recs might 346262306a36Sopenharmony_ci * increase the space required enough to require more log and op 346362306a36Sopenharmony_ci * headers, so take that into account too. 346462306a36Sopenharmony_ci * 346562306a36Sopenharmony_ci * IMPORTANT: This reservation makes the assumption that if this 346662306a36Sopenharmony_ci * transaction is the first in an iclog and hence has the LR headers 346762306a36Sopenharmony_ci * accounted to it, then the remaining space in the iclog is 346862306a36Sopenharmony_ci * exclusively for this transaction. i.e. if the transaction is larger 346962306a36Sopenharmony_ci * than the iclog, it will be the only thing in that iclog. 347062306a36Sopenharmony_ci * Fundamentally, this means we must pass the entire log vector to 347162306a36Sopenharmony_ci * xlog_write to guarantee this. 347262306a36Sopenharmony_ci */ 347362306a36Sopenharmony_ci iclog_space = log->l_iclog_size - log->l_iclog_hsize; 347462306a36Sopenharmony_ci num_headers = howmany(unit_bytes, iclog_space); 347562306a36Sopenharmony_ci 347662306a36Sopenharmony_ci /* for split-recs - ophdrs added when data split over LRs */ 347762306a36Sopenharmony_ci unit_bytes += sizeof(xlog_op_header_t) * num_headers; 347862306a36Sopenharmony_ci 347962306a36Sopenharmony_ci /* add extra header reservations if we overrun */ 348062306a36Sopenharmony_ci while (!num_headers || 348162306a36Sopenharmony_ci howmany(unit_bytes, iclog_space) > num_headers) { 348262306a36Sopenharmony_ci unit_bytes += sizeof(xlog_op_header_t); 348362306a36Sopenharmony_ci num_headers++; 348462306a36Sopenharmony_ci } 348562306a36Sopenharmony_ci unit_bytes += log->l_iclog_hsize * num_headers; 348662306a36Sopenharmony_ci 348762306a36Sopenharmony_ci /* for commit-rec LR header - note: padding will subsume the ophdr */ 348862306a36Sopenharmony_ci unit_bytes += log->l_iclog_hsize; 348962306a36Sopenharmony_ci 349062306a36Sopenharmony_ci /* roundoff padding for transaction data and one for commit record */ 349162306a36Sopenharmony_ci unit_bytes += 2 * log->l_iclog_roundoff; 349262306a36Sopenharmony_ci 349362306a36Sopenharmony_ci if (niclogs) 349462306a36Sopenharmony_ci *niclogs = num_headers; 349562306a36Sopenharmony_ci return unit_bytes; 349662306a36Sopenharmony_ci} 349762306a36Sopenharmony_ci 349862306a36Sopenharmony_ciint 349962306a36Sopenharmony_cixfs_log_calc_unit_res( 350062306a36Sopenharmony_ci struct xfs_mount *mp, 350162306a36Sopenharmony_ci int unit_bytes) 350262306a36Sopenharmony_ci{ 350362306a36Sopenharmony_ci return xlog_calc_unit_res(mp->m_log, unit_bytes, NULL); 350462306a36Sopenharmony_ci} 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ci/* 350762306a36Sopenharmony_ci * Allocate and initialise a new log ticket. 350862306a36Sopenharmony_ci */ 350962306a36Sopenharmony_cistruct xlog_ticket * 351062306a36Sopenharmony_cixlog_ticket_alloc( 351162306a36Sopenharmony_ci struct xlog *log, 351262306a36Sopenharmony_ci int unit_bytes, 351362306a36Sopenharmony_ci int cnt, 351462306a36Sopenharmony_ci bool permanent) 351562306a36Sopenharmony_ci{ 351662306a36Sopenharmony_ci struct xlog_ticket *tic; 351762306a36Sopenharmony_ci int unit_res; 351862306a36Sopenharmony_ci 351962306a36Sopenharmony_ci tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL); 352062306a36Sopenharmony_ci 352162306a36Sopenharmony_ci unit_res = xlog_calc_unit_res(log, unit_bytes, &tic->t_iclog_hdrs); 352262306a36Sopenharmony_ci 352362306a36Sopenharmony_ci atomic_set(&tic->t_ref, 1); 352462306a36Sopenharmony_ci tic->t_task = current; 352562306a36Sopenharmony_ci INIT_LIST_HEAD(&tic->t_queue); 352662306a36Sopenharmony_ci tic->t_unit_res = unit_res; 352762306a36Sopenharmony_ci tic->t_curr_res = unit_res; 352862306a36Sopenharmony_ci tic->t_cnt = cnt; 352962306a36Sopenharmony_ci tic->t_ocnt = cnt; 353062306a36Sopenharmony_ci tic->t_tid = get_random_u32(); 353162306a36Sopenharmony_ci if (permanent) 353262306a36Sopenharmony_ci tic->t_flags |= XLOG_TIC_PERM_RESERV; 353362306a36Sopenharmony_ci 353462306a36Sopenharmony_ci return tic; 353562306a36Sopenharmony_ci} 353662306a36Sopenharmony_ci 353762306a36Sopenharmony_ci#if defined(DEBUG) 353862306a36Sopenharmony_ci/* 353962306a36Sopenharmony_ci * Check to make sure the grant write head didn't just over lap the tail. If 354062306a36Sopenharmony_ci * the cycles are the same, we can't be overlapping. Otherwise, make sure that 354162306a36Sopenharmony_ci * the cycles differ by exactly one and check the byte count. 354262306a36Sopenharmony_ci * 354362306a36Sopenharmony_ci * This check is run unlocked, so can give false positives. Rather than assert 354462306a36Sopenharmony_ci * on failures, use a warn-once flag and a panic tag to allow the admin to 354562306a36Sopenharmony_ci * determine if they want to panic the machine when such an error occurs. For 354662306a36Sopenharmony_ci * debug kernels this will have the same effect as using an assert but, unlinke 354762306a36Sopenharmony_ci * an assert, it can be turned off at runtime. 354862306a36Sopenharmony_ci */ 354962306a36Sopenharmony_ciSTATIC void 355062306a36Sopenharmony_cixlog_verify_grant_tail( 355162306a36Sopenharmony_ci struct xlog *log) 355262306a36Sopenharmony_ci{ 355362306a36Sopenharmony_ci int tail_cycle, tail_blocks; 355462306a36Sopenharmony_ci int cycle, space; 355562306a36Sopenharmony_ci 355662306a36Sopenharmony_ci xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space); 355762306a36Sopenharmony_ci xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 355862306a36Sopenharmony_ci if (tail_cycle != cycle) { 355962306a36Sopenharmony_ci if (cycle - 1 != tail_cycle && 356062306a36Sopenharmony_ci !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { 356162306a36Sopenharmony_ci xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 356262306a36Sopenharmony_ci "%s: cycle - 1 != tail_cycle", __func__); 356362306a36Sopenharmony_ci } 356462306a36Sopenharmony_ci 356562306a36Sopenharmony_ci if (space > BBTOB(tail_blocks) && 356662306a36Sopenharmony_ci !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { 356762306a36Sopenharmony_ci xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 356862306a36Sopenharmony_ci "%s: space > BBTOB(tail_blocks)", __func__); 356962306a36Sopenharmony_ci } 357062306a36Sopenharmony_ci } 357162306a36Sopenharmony_ci} 357262306a36Sopenharmony_ci 357362306a36Sopenharmony_ci/* check if it will fit */ 357462306a36Sopenharmony_ciSTATIC void 357562306a36Sopenharmony_cixlog_verify_tail_lsn( 357662306a36Sopenharmony_ci struct xlog *log, 357762306a36Sopenharmony_ci struct xlog_in_core *iclog) 357862306a36Sopenharmony_ci{ 357962306a36Sopenharmony_ci xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); 358062306a36Sopenharmony_ci int blocks; 358162306a36Sopenharmony_ci 358262306a36Sopenharmony_ci if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { 358362306a36Sopenharmony_ci blocks = 358462306a36Sopenharmony_ci log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); 358562306a36Sopenharmony_ci if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) 358662306a36Sopenharmony_ci xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); 358762306a36Sopenharmony_ci } else { 358862306a36Sopenharmony_ci ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); 358962306a36Sopenharmony_ci 359062306a36Sopenharmony_ci if (BLOCK_LSN(tail_lsn) == log->l_prev_block) 359162306a36Sopenharmony_ci xfs_emerg(log->l_mp, "%s: tail wrapped", __func__); 359262306a36Sopenharmony_ci 359362306a36Sopenharmony_ci blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; 359462306a36Sopenharmony_ci if (blocks < BTOBB(iclog->ic_offset) + 1) 359562306a36Sopenharmony_ci xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); 359662306a36Sopenharmony_ci } 359762306a36Sopenharmony_ci} 359862306a36Sopenharmony_ci 359962306a36Sopenharmony_ci/* 360062306a36Sopenharmony_ci * Perform a number of checks on the iclog before writing to disk. 360162306a36Sopenharmony_ci * 360262306a36Sopenharmony_ci * 1. Make sure the iclogs are still circular 360362306a36Sopenharmony_ci * 2. Make sure we have a good magic number 360462306a36Sopenharmony_ci * 3. Make sure we don't have magic numbers in the data 360562306a36Sopenharmony_ci * 4. Check fields of each log operation header for: 360662306a36Sopenharmony_ci * A. Valid client identifier 360762306a36Sopenharmony_ci * B. tid ptr value falls in valid ptr space (user space code) 360862306a36Sopenharmony_ci * C. Length in log record header is correct according to the 360962306a36Sopenharmony_ci * individual operation headers within record. 361062306a36Sopenharmony_ci * 5. When a bwrite will occur within 5 blocks of the front of the physical 361162306a36Sopenharmony_ci * log, check the preceding blocks of the physical log to make sure all 361262306a36Sopenharmony_ci * the cycle numbers agree with the current cycle number. 361362306a36Sopenharmony_ci */ 361462306a36Sopenharmony_ciSTATIC void 361562306a36Sopenharmony_cixlog_verify_iclog( 361662306a36Sopenharmony_ci struct xlog *log, 361762306a36Sopenharmony_ci struct xlog_in_core *iclog, 361862306a36Sopenharmony_ci int count) 361962306a36Sopenharmony_ci{ 362062306a36Sopenharmony_ci xlog_op_header_t *ophead; 362162306a36Sopenharmony_ci xlog_in_core_t *icptr; 362262306a36Sopenharmony_ci xlog_in_core_2_t *xhdr; 362362306a36Sopenharmony_ci void *base_ptr, *ptr, *p; 362462306a36Sopenharmony_ci ptrdiff_t field_offset; 362562306a36Sopenharmony_ci uint8_t clientid; 362662306a36Sopenharmony_ci int len, i, j, k, op_len; 362762306a36Sopenharmony_ci int idx; 362862306a36Sopenharmony_ci 362962306a36Sopenharmony_ci /* check validity of iclog pointers */ 363062306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 363162306a36Sopenharmony_ci icptr = log->l_iclog; 363262306a36Sopenharmony_ci for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next) 363362306a36Sopenharmony_ci ASSERT(icptr); 363462306a36Sopenharmony_ci 363562306a36Sopenharmony_ci if (icptr != log->l_iclog) 363662306a36Sopenharmony_ci xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); 363762306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 363862306a36Sopenharmony_ci 363962306a36Sopenharmony_ci /* check log magic numbers */ 364062306a36Sopenharmony_ci if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 364162306a36Sopenharmony_ci xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); 364262306a36Sopenharmony_ci 364362306a36Sopenharmony_ci base_ptr = ptr = &iclog->ic_header; 364462306a36Sopenharmony_ci p = &iclog->ic_header; 364562306a36Sopenharmony_ci for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) { 364662306a36Sopenharmony_ci if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 364762306a36Sopenharmony_ci xfs_emerg(log->l_mp, "%s: unexpected magic num", 364862306a36Sopenharmony_ci __func__); 364962306a36Sopenharmony_ci } 365062306a36Sopenharmony_ci 365162306a36Sopenharmony_ci /* check fields */ 365262306a36Sopenharmony_ci len = be32_to_cpu(iclog->ic_header.h_num_logops); 365362306a36Sopenharmony_ci base_ptr = ptr = iclog->ic_datap; 365462306a36Sopenharmony_ci ophead = ptr; 365562306a36Sopenharmony_ci xhdr = iclog->ic_data; 365662306a36Sopenharmony_ci for (i = 0; i < len; i++) { 365762306a36Sopenharmony_ci ophead = ptr; 365862306a36Sopenharmony_ci 365962306a36Sopenharmony_ci /* clientid is only 1 byte */ 366062306a36Sopenharmony_ci p = &ophead->oh_clientid; 366162306a36Sopenharmony_ci field_offset = p - base_ptr; 366262306a36Sopenharmony_ci if (field_offset & 0x1ff) { 366362306a36Sopenharmony_ci clientid = ophead->oh_clientid; 366462306a36Sopenharmony_ci } else { 366562306a36Sopenharmony_ci idx = BTOBBT((void *)&ophead->oh_clientid - iclog->ic_datap); 366662306a36Sopenharmony_ci if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { 366762306a36Sopenharmony_ci j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 366862306a36Sopenharmony_ci k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 366962306a36Sopenharmony_ci clientid = xlog_get_client_id( 367062306a36Sopenharmony_ci xhdr[j].hic_xheader.xh_cycle_data[k]); 367162306a36Sopenharmony_ci } else { 367262306a36Sopenharmony_ci clientid = xlog_get_client_id( 367362306a36Sopenharmony_ci iclog->ic_header.h_cycle_data[idx]); 367462306a36Sopenharmony_ci } 367562306a36Sopenharmony_ci } 367662306a36Sopenharmony_ci if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) { 367762306a36Sopenharmony_ci xfs_warn(log->l_mp, 367862306a36Sopenharmony_ci "%s: op %d invalid clientid %d op "PTR_FMT" offset 0x%lx", 367962306a36Sopenharmony_ci __func__, i, clientid, ophead, 368062306a36Sopenharmony_ci (unsigned long)field_offset); 368162306a36Sopenharmony_ci } 368262306a36Sopenharmony_ci 368362306a36Sopenharmony_ci /* check length */ 368462306a36Sopenharmony_ci p = &ophead->oh_len; 368562306a36Sopenharmony_ci field_offset = p - base_ptr; 368662306a36Sopenharmony_ci if (field_offset & 0x1ff) { 368762306a36Sopenharmony_ci op_len = be32_to_cpu(ophead->oh_len); 368862306a36Sopenharmony_ci } else { 368962306a36Sopenharmony_ci idx = BTOBBT((void *)&ophead->oh_len - iclog->ic_datap); 369062306a36Sopenharmony_ci if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { 369162306a36Sopenharmony_ci j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 369262306a36Sopenharmony_ci k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 369362306a36Sopenharmony_ci op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]); 369462306a36Sopenharmony_ci } else { 369562306a36Sopenharmony_ci op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]); 369662306a36Sopenharmony_ci } 369762306a36Sopenharmony_ci } 369862306a36Sopenharmony_ci ptr += sizeof(xlog_op_header_t) + op_len; 369962306a36Sopenharmony_ci } 370062306a36Sopenharmony_ci} 370162306a36Sopenharmony_ci#endif 370262306a36Sopenharmony_ci 370362306a36Sopenharmony_ci/* 370462306a36Sopenharmony_ci * Perform a forced shutdown on the log. 370562306a36Sopenharmony_ci * 370662306a36Sopenharmony_ci * This can be called from low level log code to trigger a shutdown, or from the 370762306a36Sopenharmony_ci * high level mount shutdown code when the mount shuts down. 370862306a36Sopenharmony_ci * 370962306a36Sopenharmony_ci * Our main objectives here are to make sure that: 371062306a36Sopenharmony_ci * a. if the shutdown was not due to a log IO error, flush the logs to 371162306a36Sopenharmony_ci * disk. Anything modified after this is ignored. 371262306a36Sopenharmony_ci * b. the log gets atomically marked 'XLOG_IO_ERROR' for all interested 371362306a36Sopenharmony_ci * parties to find out. Nothing new gets queued after this is done. 371462306a36Sopenharmony_ci * c. Tasks sleeping on log reservations, pinned objects and 371562306a36Sopenharmony_ci * other resources get woken up. 371662306a36Sopenharmony_ci * d. The mount is also marked as shut down so that log triggered shutdowns 371762306a36Sopenharmony_ci * still behave the same as if they called xfs_forced_shutdown(). 371862306a36Sopenharmony_ci * 371962306a36Sopenharmony_ci * Return true if the shutdown cause was a log IO error and we actually shut the 372062306a36Sopenharmony_ci * log down. 372162306a36Sopenharmony_ci */ 372262306a36Sopenharmony_cibool 372362306a36Sopenharmony_cixlog_force_shutdown( 372462306a36Sopenharmony_ci struct xlog *log, 372562306a36Sopenharmony_ci uint32_t shutdown_flags) 372662306a36Sopenharmony_ci{ 372762306a36Sopenharmony_ci bool log_error = (shutdown_flags & SHUTDOWN_LOG_IO_ERROR); 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci if (!log) 373062306a36Sopenharmony_ci return false; 373162306a36Sopenharmony_ci 373262306a36Sopenharmony_ci /* 373362306a36Sopenharmony_ci * Flush all the completed transactions to disk before marking the log 373462306a36Sopenharmony_ci * being shut down. We need to do this first as shutting down the log 373562306a36Sopenharmony_ci * before the force will prevent the log force from flushing the iclogs 373662306a36Sopenharmony_ci * to disk. 373762306a36Sopenharmony_ci * 373862306a36Sopenharmony_ci * When we are in recovery, there are no transactions to flush, and 373962306a36Sopenharmony_ci * we don't want to touch the log because we don't want to perturb the 374062306a36Sopenharmony_ci * current head/tail for future recovery attempts. Hence we need to 374162306a36Sopenharmony_ci * avoid a log force in this case. 374262306a36Sopenharmony_ci * 374362306a36Sopenharmony_ci * If we are shutting down due to a log IO error, then we must avoid 374462306a36Sopenharmony_ci * trying to write the log as that may just result in more IO errors and 374562306a36Sopenharmony_ci * an endless shutdown/force loop. 374662306a36Sopenharmony_ci */ 374762306a36Sopenharmony_ci if (!log_error && !xlog_in_recovery(log)) 374862306a36Sopenharmony_ci xfs_log_force(log->l_mp, XFS_LOG_SYNC); 374962306a36Sopenharmony_ci 375062306a36Sopenharmony_ci /* 375162306a36Sopenharmony_ci * Atomically set the shutdown state. If the shutdown state is already 375262306a36Sopenharmony_ci * set, there someone else is performing the shutdown and so we are done 375362306a36Sopenharmony_ci * here. This should never happen because we should only ever get called 375462306a36Sopenharmony_ci * once by the first shutdown caller. 375562306a36Sopenharmony_ci * 375662306a36Sopenharmony_ci * Much of the log state machine transitions assume that shutdown state 375762306a36Sopenharmony_ci * cannot change once they hold the log->l_icloglock. Hence we need to 375862306a36Sopenharmony_ci * hold that lock here, even though we use the atomic test_and_set_bit() 375962306a36Sopenharmony_ci * operation to set the shutdown state. 376062306a36Sopenharmony_ci */ 376162306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 376262306a36Sopenharmony_ci if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) { 376362306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 376462306a36Sopenharmony_ci return false; 376562306a36Sopenharmony_ci } 376662306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 376762306a36Sopenharmony_ci 376862306a36Sopenharmony_ci /* 376962306a36Sopenharmony_ci * If this log shutdown also sets the mount shutdown state, issue a 377062306a36Sopenharmony_ci * shutdown warning message. 377162306a36Sopenharmony_ci */ 377262306a36Sopenharmony_ci if (!test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &log->l_mp->m_opstate)) { 377362306a36Sopenharmony_ci xfs_alert_tag(log->l_mp, XFS_PTAG_SHUTDOWN_LOGERROR, 377462306a36Sopenharmony_ci"Filesystem has been shut down due to log error (0x%x).", 377562306a36Sopenharmony_ci shutdown_flags); 377662306a36Sopenharmony_ci xfs_alert(log->l_mp, 377762306a36Sopenharmony_ci"Please unmount the filesystem and rectify the problem(s)."); 377862306a36Sopenharmony_ci if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 377962306a36Sopenharmony_ci xfs_stack_trace(); 378062306a36Sopenharmony_ci } 378162306a36Sopenharmony_ci 378262306a36Sopenharmony_ci /* 378362306a36Sopenharmony_ci * We don't want anybody waiting for log reservations after this. That 378462306a36Sopenharmony_ci * means we have to wake up everybody queued up on reserveq as well as 378562306a36Sopenharmony_ci * writeq. In addition, we make sure in xlog_{re}grant_log_space that 378662306a36Sopenharmony_ci * we don't enqueue anything once the SHUTDOWN flag is set, and this 378762306a36Sopenharmony_ci * action is protected by the grant locks. 378862306a36Sopenharmony_ci */ 378962306a36Sopenharmony_ci xlog_grant_head_wake_all(&log->l_reserve_head); 379062306a36Sopenharmony_ci xlog_grant_head_wake_all(&log->l_write_head); 379162306a36Sopenharmony_ci 379262306a36Sopenharmony_ci /* 379362306a36Sopenharmony_ci * Wake up everybody waiting on xfs_log_force. Wake the CIL push first 379462306a36Sopenharmony_ci * as if the log writes were completed. The abort handling in the log 379562306a36Sopenharmony_ci * item committed callback functions will do this again under lock to 379662306a36Sopenharmony_ci * avoid races. 379762306a36Sopenharmony_ci */ 379862306a36Sopenharmony_ci spin_lock(&log->l_cilp->xc_push_lock); 379962306a36Sopenharmony_ci wake_up_all(&log->l_cilp->xc_start_wait); 380062306a36Sopenharmony_ci wake_up_all(&log->l_cilp->xc_commit_wait); 380162306a36Sopenharmony_ci spin_unlock(&log->l_cilp->xc_push_lock); 380262306a36Sopenharmony_ci 380362306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 380462306a36Sopenharmony_ci xlog_state_shutdown_callbacks(log); 380562306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 380662306a36Sopenharmony_ci 380762306a36Sopenharmony_ci wake_up_var(&log->l_opstate); 380862306a36Sopenharmony_ci return log_error; 380962306a36Sopenharmony_ci} 381062306a36Sopenharmony_ci 381162306a36Sopenharmony_ciSTATIC int 381262306a36Sopenharmony_cixlog_iclogs_empty( 381362306a36Sopenharmony_ci struct xlog *log) 381462306a36Sopenharmony_ci{ 381562306a36Sopenharmony_ci xlog_in_core_t *iclog; 381662306a36Sopenharmony_ci 381762306a36Sopenharmony_ci iclog = log->l_iclog; 381862306a36Sopenharmony_ci do { 381962306a36Sopenharmony_ci /* endianness does not matter here, zero is zero in 382062306a36Sopenharmony_ci * any language. 382162306a36Sopenharmony_ci */ 382262306a36Sopenharmony_ci if (iclog->ic_header.h_num_logops) 382362306a36Sopenharmony_ci return 0; 382462306a36Sopenharmony_ci iclog = iclog->ic_next; 382562306a36Sopenharmony_ci } while (iclog != log->l_iclog); 382662306a36Sopenharmony_ci return 1; 382762306a36Sopenharmony_ci} 382862306a36Sopenharmony_ci 382962306a36Sopenharmony_ci/* 383062306a36Sopenharmony_ci * Verify that an LSN stamped into a piece of metadata is valid. This is 383162306a36Sopenharmony_ci * intended for use in read verifiers on v5 superblocks. 383262306a36Sopenharmony_ci */ 383362306a36Sopenharmony_cibool 383462306a36Sopenharmony_cixfs_log_check_lsn( 383562306a36Sopenharmony_ci struct xfs_mount *mp, 383662306a36Sopenharmony_ci xfs_lsn_t lsn) 383762306a36Sopenharmony_ci{ 383862306a36Sopenharmony_ci struct xlog *log = mp->m_log; 383962306a36Sopenharmony_ci bool valid; 384062306a36Sopenharmony_ci 384162306a36Sopenharmony_ci /* 384262306a36Sopenharmony_ci * norecovery mode skips mount-time log processing and unconditionally 384362306a36Sopenharmony_ci * resets the in-core LSN. We can't validate in this mode, but 384462306a36Sopenharmony_ci * modifications are not allowed anyways so just return true. 384562306a36Sopenharmony_ci */ 384662306a36Sopenharmony_ci if (xfs_has_norecovery(mp)) 384762306a36Sopenharmony_ci return true; 384862306a36Sopenharmony_ci 384962306a36Sopenharmony_ci /* 385062306a36Sopenharmony_ci * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is 385162306a36Sopenharmony_ci * handled by recovery and thus safe to ignore here. 385262306a36Sopenharmony_ci */ 385362306a36Sopenharmony_ci if (lsn == NULLCOMMITLSN) 385462306a36Sopenharmony_ci return true; 385562306a36Sopenharmony_ci 385662306a36Sopenharmony_ci valid = xlog_valid_lsn(mp->m_log, lsn); 385762306a36Sopenharmony_ci 385862306a36Sopenharmony_ci /* warn the user about what's gone wrong before verifier failure */ 385962306a36Sopenharmony_ci if (!valid) { 386062306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 386162306a36Sopenharmony_ci xfs_warn(mp, 386262306a36Sopenharmony_ci"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). " 386362306a36Sopenharmony_ci"Please unmount and run xfs_repair (>= v4.3) to resolve.", 386462306a36Sopenharmony_ci CYCLE_LSN(lsn), BLOCK_LSN(lsn), 386562306a36Sopenharmony_ci log->l_curr_cycle, log->l_curr_block); 386662306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 386762306a36Sopenharmony_ci } 386862306a36Sopenharmony_ci 386962306a36Sopenharmony_ci return valid; 387062306a36Sopenharmony_ci} 387162306a36Sopenharmony_ci 387262306a36Sopenharmony_ci/* 387362306a36Sopenharmony_ci * Notify the log that we're about to start using a feature that is protected 387462306a36Sopenharmony_ci * by a log incompat feature flag. This will prevent log covering from 387562306a36Sopenharmony_ci * clearing those flags. 387662306a36Sopenharmony_ci */ 387762306a36Sopenharmony_civoid 387862306a36Sopenharmony_cixlog_use_incompat_feat( 387962306a36Sopenharmony_ci struct xlog *log) 388062306a36Sopenharmony_ci{ 388162306a36Sopenharmony_ci down_read(&log->l_incompat_users); 388262306a36Sopenharmony_ci} 388362306a36Sopenharmony_ci 388462306a36Sopenharmony_ci/* Notify the log that we've finished using log incompat features. */ 388562306a36Sopenharmony_civoid 388662306a36Sopenharmony_cixlog_drop_incompat_feat( 388762306a36Sopenharmony_ci struct xlog *log) 388862306a36Sopenharmony_ci{ 388962306a36Sopenharmony_ci up_read(&log->l_incompat_users); 389062306a36Sopenharmony_ci} 3891