162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#ifndef __XFS_LOG_PRIV_H__ 762306a36Sopenharmony_ci#define __XFS_LOG_PRIV_H__ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include "xfs_extent_busy.h" /* for struct xfs_busy_extents */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_cistruct xfs_buf; 1262306a36Sopenharmony_cistruct xlog; 1362306a36Sopenharmony_cistruct xlog_ticket; 1462306a36Sopenharmony_cistruct xfs_mount; 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci/* 1762306a36Sopenharmony_ci * get client id from packed copy. 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * this hack is here because the xlog_pack code copies four bytes 2062306a36Sopenharmony_ci * of xlog_op_header containing the fields oh_clientid, oh_flags 2162306a36Sopenharmony_ci * and oh_res2 into the packed copy. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * later on this four byte chunk is treated as an int and the 2462306a36Sopenharmony_ci * client id is pulled out. 2562306a36Sopenharmony_ci * 2662306a36Sopenharmony_ci * this has endian issues, of course. 2762306a36Sopenharmony_ci */ 2862306a36Sopenharmony_cistatic inline uint xlog_get_client_id(__be32 i) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci return be32_to_cpu(i) >> 24; 3162306a36Sopenharmony_ci} 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* 3462306a36Sopenharmony_ci * In core log state 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_cienum xlog_iclog_state { 3762306a36Sopenharmony_ci XLOG_STATE_ACTIVE, /* Current IC log being written to */ 3862306a36Sopenharmony_ci XLOG_STATE_WANT_SYNC, /* Want to sync this iclog; no more writes */ 3962306a36Sopenharmony_ci XLOG_STATE_SYNCING, /* This IC log is syncing */ 4062306a36Sopenharmony_ci XLOG_STATE_DONE_SYNC, /* Done syncing to disk */ 4162306a36Sopenharmony_ci XLOG_STATE_CALLBACK, /* Callback functions now */ 4262306a36Sopenharmony_ci XLOG_STATE_DIRTY, /* Dirty IC log, not ready for ACTIVE status */ 4362306a36Sopenharmony_ci}; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci#define XLOG_STATE_STRINGS \ 4662306a36Sopenharmony_ci { XLOG_STATE_ACTIVE, "XLOG_STATE_ACTIVE" }, \ 4762306a36Sopenharmony_ci { XLOG_STATE_WANT_SYNC, "XLOG_STATE_WANT_SYNC" }, \ 4862306a36Sopenharmony_ci { XLOG_STATE_SYNCING, "XLOG_STATE_SYNCING" }, \ 4962306a36Sopenharmony_ci { XLOG_STATE_DONE_SYNC, "XLOG_STATE_DONE_SYNC" }, \ 5062306a36Sopenharmony_ci { XLOG_STATE_CALLBACK, "XLOG_STATE_CALLBACK" }, \ 5162306a36Sopenharmony_ci { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" } 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * In core log flags 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci#define XLOG_ICL_NEED_FLUSH (1u << 0) /* iclog needs REQ_PREFLUSH */ 5762306a36Sopenharmony_ci#define XLOG_ICL_NEED_FUA (1u << 1) /* iclog needs REQ_FUA */ 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci#define XLOG_ICL_STRINGS \ 6062306a36Sopenharmony_ci { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ 6162306a36Sopenharmony_ci { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci/* 6562306a36Sopenharmony_ci * Log ticket flags 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci#define XLOG_TIC_PERM_RESERV (1u << 0) /* permanent reservation */ 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci#define XLOG_TIC_FLAGS \ 7062306a36Sopenharmony_ci { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci/* 7362306a36Sopenharmony_ci * Below are states for covering allocation transactions. 7462306a36Sopenharmony_ci * By covering, we mean changing the h_tail_lsn in the last on-disk 7562306a36Sopenharmony_ci * log write such that no allocation transactions will be re-done during 7662306a36Sopenharmony_ci * recovery after a system crash. Recovery starts at the last on-disk 7762306a36Sopenharmony_ci * log write. 7862306a36Sopenharmony_ci * 7962306a36Sopenharmony_ci * These states are used to insert dummy log entries to cover 8062306a36Sopenharmony_ci * space allocation transactions which can undo non-transactional changes 8162306a36Sopenharmony_ci * after a crash. Writes to a file with space 8262306a36Sopenharmony_ci * already allocated do not result in any transactions. Allocations 8362306a36Sopenharmony_ci * might include space beyond the EOF. So if we just push the EOF a 8462306a36Sopenharmony_ci * little, the last transaction for the file could contain the wrong 8562306a36Sopenharmony_ci * size. If there is no file system activity, after an allocation 8662306a36Sopenharmony_ci * transaction, and the system crashes, the allocation transaction 8762306a36Sopenharmony_ci * will get replayed and the file will be truncated. This could 8862306a36Sopenharmony_ci * be hours/days/... after the allocation occurred. 8962306a36Sopenharmony_ci * 9062306a36Sopenharmony_ci * The fix for this is to do two dummy transactions when the 9162306a36Sopenharmony_ci * system is idle. We need two dummy transaction because the h_tail_lsn 9262306a36Sopenharmony_ci * in the log record header needs to point beyond the last possible 9362306a36Sopenharmony_ci * non-dummy transaction. The first dummy changes the h_tail_lsn to 9462306a36Sopenharmony_ci * the first transaction before the dummy. The second dummy causes 9562306a36Sopenharmony_ci * h_tail_lsn to point to the first dummy. Recovery starts at h_tail_lsn. 9662306a36Sopenharmony_ci * 9762306a36Sopenharmony_ci * These dummy transactions get committed when everything 9862306a36Sopenharmony_ci * is idle (after there has been some activity). 9962306a36Sopenharmony_ci * 10062306a36Sopenharmony_ci * There are 5 states used to control this. 10162306a36Sopenharmony_ci * 10262306a36Sopenharmony_ci * IDLE -- no logging has been done on the file system or 10362306a36Sopenharmony_ci * we are done covering previous transactions. 10462306a36Sopenharmony_ci * NEED -- logging has occurred and we need a dummy transaction 10562306a36Sopenharmony_ci * when the log becomes idle. 10662306a36Sopenharmony_ci * DONE -- we were in the NEED state and have committed a dummy 10762306a36Sopenharmony_ci * transaction. 10862306a36Sopenharmony_ci * NEED2 -- we detected that a dummy transaction has gone to the 10962306a36Sopenharmony_ci * on disk log with no other transactions. 11062306a36Sopenharmony_ci * DONE2 -- we committed a dummy transaction when in the NEED2 state. 11162306a36Sopenharmony_ci * 11262306a36Sopenharmony_ci * There are two places where we switch states: 11362306a36Sopenharmony_ci * 11462306a36Sopenharmony_ci * 1.) In xfs_sync, when we detect an idle log and are in NEED or NEED2. 11562306a36Sopenharmony_ci * We commit the dummy transaction and switch to DONE or DONE2, 11662306a36Sopenharmony_ci * respectively. In all other states, we don't do anything. 11762306a36Sopenharmony_ci * 11862306a36Sopenharmony_ci * 2.) When we finish writing the on-disk log (xlog_state_clean_log). 11962306a36Sopenharmony_ci * 12062306a36Sopenharmony_ci * No matter what state we are in, if this isn't the dummy 12162306a36Sopenharmony_ci * transaction going out, the next state is NEED. 12262306a36Sopenharmony_ci * So, if we aren't in the DONE or DONE2 states, the next state 12362306a36Sopenharmony_ci * is NEED. We can't be finishing a write of the dummy record 12462306a36Sopenharmony_ci * unless it was committed and the state switched to DONE or DONE2. 12562306a36Sopenharmony_ci * 12662306a36Sopenharmony_ci * If we are in the DONE state and this was a write of the 12762306a36Sopenharmony_ci * dummy transaction, we move to NEED2. 12862306a36Sopenharmony_ci * 12962306a36Sopenharmony_ci * If we are in the DONE2 state and this was a write of the 13062306a36Sopenharmony_ci * dummy transaction, we move to IDLE. 13162306a36Sopenharmony_ci * 13262306a36Sopenharmony_ci * 13362306a36Sopenharmony_ci * Writing only one dummy transaction can get appended to 13462306a36Sopenharmony_ci * one file space allocation. When this happens, the log recovery 13562306a36Sopenharmony_ci * code replays the space allocation and a file could be truncated. 13662306a36Sopenharmony_ci * This is why we have the NEED2 and DONE2 states before going idle. 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci#define XLOG_STATE_COVER_IDLE 0 14062306a36Sopenharmony_ci#define XLOG_STATE_COVER_NEED 1 14162306a36Sopenharmony_ci#define XLOG_STATE_COVER_DONE 2 14262306a36Sopenharmony_ci#define XLOG_STATE_COVER_NEED2 3 14362306a36Sopenharmony_ci#define XLOG_STATE_COVER_DONE2 4 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci#define XLOG_COVER_OPS 5 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_citypedef struct xlog_ticket { 14862306a36Sopenharmony_ci struct list_head t_queue; /* reserve/write queue */ 14962306a36Sopenharmony_ci struct task_struct *t_task; /* task that owns this ticket */ 15062306a36Sopenharmony_ci xlog_tid_t t_tid; /* transaction identifier */ 15162306a36Sopenharmony_ci atomic_t t_ref; /* ticket reference count */ 15262306a36Sopenharmony_ci int t_curr_res; /* current reservation */ 15362306a36Sopenharmony_ci int t_unit_res; /* unit reservation */ 15462306a36Sopenharmony_ci char t_ocnt; /* original unit count */ 15562306a36Sopenharmony_ci char t_cnt; /* current unit count */ 15662306a36Sopenharmony_ci uint8_t t_flags; /* properties of reservation */ 15762306a36Sopenharmony_ci int t_iclog_hdrs; /* iclog hdrs in t_curr_res */ 15862306a36Sopenharmony_ci} xlog_ticket_t; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci/* 16162306a36Sopenharmony_ci * - A log record header is 512 bytes. There is plenty of room to grow the 16262306a36Sopenharmony_ci * xlog_rec_header_t into the reserved space. 16362306a36Sopenharmony_ci * - ic_data follows, so a write to disk can start at the beginning of 16462306a36Sopenharmony_ci * the iclog. 16562306a36Sopenharmony_ci * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. 16662306a36Sopenharmony_ci * - ic_next is the pointer to the next iclog in the ring. 16762306a36Sopenharmony_ci * - ic_log is a pointer back to the global log structure. 16862306a36Sopenharmony_ci * - ic_size is the full size of the log buffer, minus the cycle headers. 16962306a36Sopenharmony_ci * - ic_offset is the current number of bytes written to in this iclog. 17062306a36Sopenharmony_ci * - ic_refcnt is bumped when someone is writing to the log. 17162306a36Sopenharmony_ci * - ic_state is the state of the iclog. 17262306a36Sopenharmony_ci * 17362306a36Sopenharmony_ci * Because of cacheline contention on large machines, we need to separate 17462306a36Sopenharmony_ci * various resources onto different cachelines. To start with, make the 17562306a36Sopenharmony_ci * structure cacheline aligned. The following fields can be contended on 17662306a36Sopenharmony_ci * by independent processes: 17762306a36Sopenharmony_ci * 17862306a36Sopenharmony_ci * - ic_callbacks 17962306a36Sopenharmony_ci * - ic_refcnt 18062306a36Sopenharmony_ci * - fields protected by the global l_icloglock 18162306a36Sopenharmony_ci * 18262306a36Sopenharmony_ci * so we need to ensure that these fields are located in separate cachelines. 18362306a36Sopenharmony_ci * We'll put all the read-only and l_icloglock fields in the first cacheline, 18462306a36Sopenharmony_ci * and move everything else out to subsequent cachelines. 18562306a36Sopenharmony_ci */ 18662306a36Sopenharmony_citypedef struct xlog_in_core { 18762306a36Sopenharmony_ci wait_queue_head_t ic_force_wait; 18862306a36Sopenharmony_ci wait_queue_head_t ic_write_wait; 18962306a36Sopenharmony_ci struct xlog_in_core *ic_next; 19062306a36Sopenharmony_ci struct xlog_in_core *ic_prev; 19162306a36Sopenharmony_ci struct xlog *ic_log; 19262306a36Sopenharmony_ci u32 ic_size; 19362306a36Sopenharmony_ci u32 ic_offset; 19462306a36Sopenharmony_ci enum xlog_iclog_state ic_state; 19562306a36Sopenharmony_ci unsigned int ic_flags; 19662306a36Sopenharmony_ci void *ic_datap; /* pointer to iclog data */ 19762306a36Sopenharmony_ci struct list_head ic_callbacks; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci /* reference counts need their own cacheline */ 20062306a36Sopenharmony_ci atomic_t ic_refcnt ____cacheline_aligned_in_smp; 20162306a36Sopenharmony_ci xlog_in_core_2_t *ic_data; 20262306a36Sopenharmony_ci#define ic_header ic_data->hic_header 20362306a36Sopenharmony_ci#ifdef DEBUG 20462306a36Sopenharmony_ci bool ic_fail_crc : 1; 20562306a36Sopenharmony_ci#endif 20662306a36Sopenharmony_ci struct semaphore ic_sema; 20762306a36Sopenharmony_ci struct work_struct ic_end_io_work; 20862306a36Sopenharmony_ci struct bio ic_bio; 20962306a36Sopenharmony_ci struct bio_vec ic_bvec[]; 21062306a36Sopenharmony_ci} xlog_in_core_t; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci/* 21362306a36Sopenharmony_ci * The CIL context is used to aggregate per-transaction details as well be 21462306a36Sopenharmony_ci * passed to the iclog for checkpoint post-commit processing. After being 21562306a36Sopenharmony_ci * passed to the iclog, another context needs to be allocated for tracking the 21662306a36Sopenharmony_ci * next set of transactions to be aggregated into a checkpoint. 21762306a36Sopenharmony_ci */ 21862306a36Sopenharmony_cistruct xfs_cil; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistruct xfs_cil_ctx { 22162306a36Sopenharmony_ci struct xfs_cil *cil; 22262306a36Sopenharmony_ci xfs_csn_t sequence; /* chkpt sequence # */ 22362306a36Sopenharmony_ci xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ 22462306a36Sopenharmony_ci xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ 22562306a36Sopenharmony_ci struct xlog_in_core *commit_iclog; 22662306a36Sopenharmony_ci struct xlog_ticket *ticket; /* chkpt ticket */ 22762306a36Sopenharmony_ci atomic_t space_used; /* aggregate size of regions */ 22862306a36Sopenharmony_ci struct xfs_busy_extents busy_extents; 22962306a36Sopenharmony_ci struct list_head log_items; /* log items in chkpt */ 23062306a36Sopenharmony_ci struct list_head lv_chain; /* logvecs being pushed */ 23162306a36Sopenharmony_ci struct list_head iclog_entry; 23262306a36Sopenharmony_ci struct list_head committing; /* ctx committing list */ 23362306a36Sopenharmony_ci struct work_struct push_work; 23462306a36Sopenharmony_ci atomic_t order_id; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* 23762306a36Sopenharmony_ci * CPUs that could have added items to the percpu CIL data. Access is 23862306a36Sopenharmony_ci * coordinated with xc_ctx_lock. 23962306a36Sopenharmony_ci */ 24062306a36Sopenharmony_ci struct cpumask cil_pcpmask; 24162306a36Sopenharmony_ci}; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci/* 24462306a36Sopenharmony_ci * Per-cpu CIL tracking items 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_cistruct xlog_cil_pcp { 24762306a36Sopenharmony_ci int32_t space_used; 24862306a36Sopenharmony_ci uint32_t space_reserved; 24962306a36Sopenharmony_ci struct list_head busy_extents; 25062306a36Sopenharmony_ci struct list_head log_items; 25162306a36Sopenharmony_ci}; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci/* 25462306a36Sopenharmony_ci * Committed Item List structure 25562306a36Sopenharmony_ci * 25662306a36Sopenharmony_ci * This structure is used to track log items that have been committed but not 25762306a36Sopenharmony_ci * yet written into the log. It is used only when the delayed logging mount 25862306a36Sopenharmony_ci * option is enabled. 25962306a36Sopenharmony_ci * 26062306a36Sopenharmony_ci * This structure tracks the list of committing checkpoint contexts so 26162306a36Sopenharmony_ci * we can avoid the problem of having to hold out new transactions during a 26262306a36Sopenharmony_ci * flush until we have a the commit record LSN of the checkpoint. We can 26362306a36Sopenharmony_ci * traverse the list of committing contexts in xlog_cil_push_lsn() to find a 26462306a36Sopenharmony_ci * sequence match and extract the commit LSN directly from there. If the 26562306a36Sopenharmony_ci * checkpoint is still in the process of committing, we can block waiting for 26662306a36Sopenharmony_ci * the commit LSN to be determined as well. This should make synchronous 26762306a36Sopenharmony_ci * operations almost as efficient as the old logging methods. 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_cistruct xfs_cil { 27062306a36Sopenharmony_ci struct xlog *xc_log; 27162306a36Sopenharmony_ci unsigned long xc_flags; 27262306a36Sopenharmony_ci atomic_t xc_iclog_hdrs; 27362306a36Sopenharmony_ci struct workqueue_struct *xc_push_wq; 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp; 27662306a36Sopenharmony_ci struct xfs_cil_ctx *xc_ctx; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci spinlock_t xc_push_lock ____cacheline_aligned_in_smp; 27962306a36Sopenharmony_ci xfs_csn_t xc_push_seq; 28062306a36Sopenharmony_ci bool xc_push_commit_stable; 28162306a36Sopenharmony_ci struct list_head xc_committing; 28262306a36Sopenharmony_ci wait_queue_head_t xc_commit_wait; 28362306a36Sopenharmony_ci wait_queue_head_t xc_start_wait; 28462306a36Sopenharmony_ci xfs_csn_t xc_current_sequence; 28562306a36Sopenharmony_ci wait_queue_head_t xc_push_wait; /* background push throttle */ 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci void __percpu *xc_pcp; /* percpu CIL structures */ 28862306a36Sopenharmony_ci} ____cacheline_aligned_in_smp; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci/* xc_flags bit values */ 29162306a36Sopenharmony_ci#define XLOG_CIL_EMPTY 1 29262306a36Sopenharmony_ci#define XLOG_CIL_PCP_SPACE 2 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci/* 29562306a36Sopenharmony_ci * The amount of log space we allow the CIL to aggregate is difficult to size. 29662306a36Sopenharmony_ci * Whatever we choose, we have to make sure we can get a reservation for the 29762306a36Sopenharmony_ci * log space effectively, that it is large enough to capture sufficient 29862306a36Sopenharmony_ci * relogging to reduce log buffer IO significantly, but it is not too large for 29962306a36Sopenharmony_ci * the log or induces too much latency when writing out through the iclogs. We 30062306a36Sopenharmony_ci * track both space consumed and the number of vectors in the checkpoint 30162306a36Sopenharmony_ci * context, so we need to decide which to use for limiting. 30262306a36Sopenharmony_ci * 30362306a36Sopenharmony_ci * Every log buffer we write out during a push needs a header reserved, which 30462306a36Sopenharmony_ci * is at least one sector and more for v2 logs. Hence we need a reservation of 30562306a36Sopenharmony_ci * at least 512 bytes per 32k of log space just for the LR headers. That means 30662306a36Sopenharmony_ci * 16KB of reservation per megabyte of delayed logging space we will consume, 30762306a36Sopenharmony_ci * plus various headers. The number of headers will vary based on the num of 30862306a36Sopenharmony_ci * io vectors, so limiting on a specific number of vectors is going to result 30962306a36Sopenharmony_ci * in transactions of varying size. IOWs, it is more consistent to track and 31062306a36Sopenharmony_ci * limit space consumed in the log rather than by the number of objects being 31162306a36Sopenharmony_ci * logged in order to prevent checkpoint ticket overruns. 31262306a36Sopenharmony_ci * 31362306a36Sopenharmony_ci * Further, use of static reservations through the log grant mechanism is 31462306a36Sopenharmony_ci * problematic. It introduces a lot of complexity (e.g. reserve grant vs write 31562306a36Sopenharmony_ci * grant) and a significant deadlock potential because regranting write space 31662306a36Sopenharmony_ci * can block on log pushes. Hence if we have to regrant log space during a log 31762306a36Sopenharmony_ci * push, we can deadlock. 31862306a36Sopenharmony_ci * 31962306a36Sopenharmony_ci * However, we can avoid this by use of a dynamic "reservation stealing" 32062306a36Sopenharmony_ci * technique during transaction commit whereby unused reservation space in the 32162306a36Sopenharmony_ci * transaction ticket is transferred to the CIL ctx commit ticket to cover the 32262306a36Sopenharmony_ci * space needed by the checkpoint transaction. This means that we never need to 32362306a36Sopenharmony_ci * specifically reserve space for the CIL checkpoint transaction, nor do we 32462306a36Sopenharmony_ci * need to regrant space once the checkpoint completes. This also means the 32562306a36Sopenharmony_ci * checkpoint transaction ticket is specific to the checkpoint context, rather 32662306a36Sopenharmony_ci * than the CIL itself. 32762306a36Sopenharmony_ci * 32862306a36Sopenharmony_ci * With dynamic reservations, we can effectively make up arbitrary limits for 32962306a36Sopenharmony_ci * the checkpoint size so long as they don't violate any other size rules. 33062306a36Sopenharmony_ci * Recovery imposes a rule that no transaction exceed half the log, so we are 33162306a36Sopenharmony_ci * limited by that. Furthermore, the log transaction reservation subsystem 33262306a36Sopenharmony_ci * tries to keep 25% of the log free, so we need to keep below that limit or we 33362306a36Sopenharmony_ci * risk running out of free log space to start any new transactions. 33462306a36Sopenharmony_ci * 33562306a36Sopenharmony_ci * In order to keep background CIL push efficient, we only need to ensure the 33662306a36Sopenharmony_ci * CIL is large enough to maintain sufficient in-memory relogging to avoid 33762306a36Sopenharmony_ci * repeated physical writes of frequently modified metadata. If we allow the CIL 33862306a36Sopenharmony_ci * to grow to a substantial fraction of the log, then we may be pinning hundreds 33962306a36Sopenharmony_ci * of megabytes of metadata in memory until the CIL flushes. This can cause 34062306a36Sopenharmony_ci * issues when we are running low on memory - pinned memory cannot be reclaimed, 34162306a36Sopenharmony_ci * and the CIL consumes a lot of memory. Hence we need to set an upper physical 34262306a36Sopenharmony_ci * size limit for the CIL that limits the maximum amount of memory pinned by the 34362306a36Sopenharmony_ci * CIL but does not limit performance by reducing relogging efficiency 34462306a36Sopenharmony_ci * significantly. 34562306a36Sopenharmony_ci * 34662306a36Sopenharmony_ci * As such, the CIL push threshold ends up being the smaller of two thresholds: 34762306a36Sopenharmony_ci * - a threshold large enough that it allows CIL to be pushed and progress to be 34862306a36Sopenharmony_ci * made without excessive blocking of incoming transaction commits. This is 34962306a36Sopenharmony_ci * defined to be 12.5% of the log space - half the 25% push threshold of the 35062306a36Sopenharmony_ci * AIL. 35162306a36Sopenharmony_ci * - small enough that it doesn't pin excessive amounts of memory but maintains 35262306a36Sopenharmony_ci * close to peak relogging efficiency. This is defined to be 16x the iclog 35362306a36Sopenharmony_ci * buffer window (32MB) as measurements have shown this to be roughly the 35462306a36Sopenharmony_ci * point of diminishing performance increases under highly concurrent 35562306a36Sopenharmony_ci * modification workloads. 35662306a36Sopenharmony_ci * 35762306a36Sopenharmony_ci * To prevent the CIL from overflowing upper commit size bounds, we introduce a 35862306a36Sopenharmony_ci * new threshold at which we block committing transactions until the background 35962306a36Sopenharmony_ci * CIL commit commences and switches to a new context. While this is not a hard 36062306a36Sopenharmony_ci * limit, it forces the process committing a transaction to the CIL to block and 36162306a36Sopenharmony_ci * yeild the CPU, giving the CIL push work a chance to be scheduled and start 36262306a36Sopenharmony_ci * work. This prevents a process running lots of transactions from overfilling 36362306a36Sopenharmony_ci * the CIL because it is not yielding the CPU. We set the blocking limit at 36462306a36Sopenharmony_ci * twice the background push space threshold so we keep in line with the AIL 36562306a36Sopenharmony_ci * push thresholds. 36662306a36Sopenharmony_ci * 36762306a36Sopenharmony_ci * Note: this is not a -hard- limit as blocking is applied after the transaction 36862306a36Sopenharmony_ci * is inserted into the CIL and the push has been triggered. It is largely a 36962306a36Sopenharmony_ci * throttling mechanism that allows the CIL push to be scheduled and run. A hard 37062306a36Sopenharmony_ci * limit will be difficult to implement without introducing global serialisation 37162306a36Sopenharmony_ci * in the CIL commit fast path, and it's not at all clear that we actually need 37262306a36Sopenharmony_ci * such hard limits given the ~7 years we've run without a hard limit before 37362306a36Sopenharmony_ci * finding the first situation where a checkpoint size overflow actually 37462306a36Sopenharmony_ci * occurred. Hence the simple throttle, and an ASSERT check to tell us that 37562306a36Sopenharmony_ci * we've overrun the max size. 37662306a36Sopenharmony_ci */ 37762306a36Sopenharmony_ci#define XLOG_CIL_SPACE_LIMIT(log) \ 37862306a36Sopenharmony_ci min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4) 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \ 38162306a36Sopenharmony_ci (XLOG_CIL_SPACE_LIMIT(log) * 2) 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci/* 38462306a36Sopenharmony_ci * ticket grant locks, queues and accounting have their own cachlines 38562306a36Sopenharmony_ci * as these are quite hot and can be operated on concurrently. 38662306a36Sopenharmony_ci */ 38762306a36Sopenharmony_cistruct xlog_grant_head { 38862306a36Sopenharmony_ci spinlock_t lock ____cacheline_aligned_in_smp; 38962306a36Sopenharmony_ci struct list_head waiters; 39062306a36Sopenharmony_ci atomic64_t grant; 39162306a36Sopenharmony_ci}; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci/* 39462306a36Sopenharmony_ci * The reservation head lsn is not made up of a cycle number and block number. 39562306a36Sopenharmony_ci * Instead, it uses a cycle number and byte number. Logs don't expect to 39662306a36Sopenharmony_ci * overflow 31 bits worth of byte offset, so using a byte number will mean 39762306a36Sopenharmony_ci * that round off problems won't occur when releasing partial reservations. 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_cistruct xlog { 40062306a36Sopenharmony_ci /* The following fields don't need locking */ 40162306a36Sopenharmony_ci struct xfs_mount *l_mp; /* mount point */ 40262306a36Sopenharmony_ci struct xfs_ail *l_ailp; /* AIL log is working with */ 40362306a36Sopenharmony_ci struct xfs_cil *l_cilp; /* CIL log is working with */ 40462306a36Sopenharmony_ci struct xfs_buftarg *l_targ; /* buftarg of log */ 40562306a36Sopenharmony_ci struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ 40662306a36Sopenharmony_ci struct delayed_work l_work; /* background flush work */ 40762306a36Sopenharmony_ci long l_opstate; /* operational state */ 40862306a36Sopenharmony_ci uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ 40962306a36Sopenharmony_ci struct list_head *l_buf_cancel_table; 41062306a36Sopenharmony_ci int l_iclog_hsize; /* size of iclog header */ 41162306a36Sopenharmony_ci int l_iclog_heads; /* # of iclog header sectors */ 41262306a36Sopenharmony_ci uint l_sectBBsize; /* sector size in BBs (2^n) */ 41362306a36Sopenharmony_ci int l_iclog_size; /* size of log in bytes */ 41462306a36Sopenharmony_ci int l_iclog_bufs; /* number of iclog buffers */ 41562306a36Sopenharmony_ci xfs_daddr_t l_logBBstart; /* start block of log */ 41662306a36Sopenharmony_ci int l_logsize; /* size of log in bytes */ 41762306a36Sopenharmony_ci int l_logBBsize; /* size of log in BB chunks */ 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci /* The following block of fields are changed while holding icloglock */ 42062306a36Sopenharmony_ci wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; 42162306a36Sopenharmony_ci /* waiting for iclog flush */ 42262306a36Sopenharmony_ci int l_covered_state;/* state of "covering disk 42362306a36Sopenharmony_ci * log entries" */ 42462306a36Sopenharmony_ci xlog_in_core_t *l_iclog; /* head log queue */ 42562306a36Sopenharmony_ci spinlock_t l_icloglock; /* grab to change iclog state */ 42662306a36Sopenharmony_ci int l_curr_cycle; /* Cycle number of log writes */ 42762306a36Sopenharmony_ci int l_prev_cycle; /* Cycle number before last 42862306a36Sopenharmony_ci * block increment */ 42962306a36Sopenharmony_ci int l_curr_block; /* current logical log block */ 43062306a36Sopenharmony_ci int l_prev_block; /* previous logical log block */ 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci /* 43362306a36Sopenharmony_ci * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and 43462306a36Sopenharmony_ci * read without needing to hold specific locks. To avoid operations 43562306a36Sopenharmony_ci * contending with other hot objects, place each of them on a separate 43662306a36Sopenharmony_ci * cacheline. 43762306a36Sopenharmony_ci */ 43862306a36Sopenharmony_ci /* lsn of last LR on disk */ 43962306a36Sopenharmony_ci atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; 44062306a36Sopenharmony_ci /* lsn of 1st LR with unflushed * buffers */ 44162306a36Sopenharmony_ci atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci struct xlog_grant_head l_reserve_head; 44462306a36Sopenharmony_ci struct xlog_grant_head l_write_head; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci struct xfs_kobj l_kobj; 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci /* log recovery lsn tracking (for buffer submission */ 44962306a36Sopenharmony_ci xfs_lsn_t l_recovery_lsn; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci uint32_t l_iclog_roundoff;/* padding roundoff */ 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci /* Users of log incompat features should take a read lock. */ 45462306a36Sopenharmony_ci struct rw_semaphore l_incompat_users; 45562306a36Sopenharmony_ci}; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci/* 45862306a36Sopenharmony_ci * Bits for operational state 45962306a36Sopenharmony_ci */ 46062306a36Sopenharmony_ci#define XLOG_ACTIVE_RECOVERY 0 /* in the middle of recovery */ 46162306a36Sopenharmony_ci#define XLOG_RECOVERY_NEEDED 1 /* log was recovered */ 46262306a36Sopenharmony_ci#define XLOG_IO_ERROR 2 /* log hit an I/O error, and being 46362306a36Sopenharmony_ci shutdown */ 46462306a36Sopenharmony_ci#define XLOG_TAIL_WARN 3 /* log tail verify warning issued */ 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_cistatic inline bool 46762306a36Sopenharmony_cixlog_recovery_needed(struct xlog *log) 46862306a36Sopenharmony_ci{ 46962306a36Sopenharmony_ci return test_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); 47062306a36Sopenharmony_ci} 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_cistatic inline bool 47362306a36Sopenharmony_cixlog_in_recovery(struct xlog *log) 47462306a36Sopenharmony_ci{ 47562306a36Sopenharmony_ci return test_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic inline bool 47962306a36Sopenharmony_cixlog_is_shutdown(struct xlog *log) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci return test_bit(XLOG_IO_ERROR, &log->l_opstate); 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci/* 48562306a36Sopenharmony_ci * Wait until the xlog_force_shutdown() has marked the log as shut down 48662306a36Sopenharmony_ci * so xlog_is_shutdown() will always return true. 48762306a36Sopenharmony_ci */ 48862306a36Sopenharmony_cistatic inline void 48962306a36Sopenharmony_cixlog_shutdown_wait( 49062306a36Sopenharmony_ci struct xlog *log) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci wait_var_event(&log->l_opstate, xlog_is_shutdown(log)); 49362306a36Sopenharmony_ci} 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci/* common routines */ 49662306a36Sopenharmony_ciextern int 49762306a36Sopenharmony_cixlog_recover( 49862306a36Sopenharmony_ci struct xlog *log); 49962306a36Sopenharmony_ciextern int 50062306a36Sopenharmony_cixlog_recover_finish( 50162306a36Sopenharmony_ci struct xlog *log); 50262306a36Sopenharmony_ciextern void 50362306a36Sopenharmony_cixlog_recover_cancel(struct xlog *); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ciextern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, 50662306a36Sopenharmony_ci char *dp, int size); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ciextern struct kmem_cache *xfs_log_ticket_cache; 50962306a36Sopenharmony_cistruct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, 51062306a36Sopenharmony_ci int count, bool permanent); 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_civoid xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); 51362306a36Sopenharmony_civoid xlog_print_trans(struct xfs_trans *); 51462306a36Sopenharmony_ciint xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, 51562306a36Sopenharmony_ci struct list_head *lv_chain, struct xlog_ticket *tic, 51662306a36Sopenharmony_ci uint32_t len); 51762306a36Sopenharmony_civoid xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); 51862306a36Sopenharmony_civoid xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_civoid xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, 52162306a36Sopenharmony_ci int eventual_size); 52262306a36Sopenharmony_ciint xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, 52362306a36Sopenharmony_ci struct xlog_ticket *ticket); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci/* 52662306a36Sopenharmony_ci * When we crack an atomic LSN, we sample it first so that the value will not 52762306a36Sopenharmony_ci * change while we are cracking it into the component values. This means we 52862306a36Sopenharmony_ci * will always get consistent component values to work from. This should always 52962306a36Sopenharmony_ci * be used to sample and crack LSNs that are stored and updated in atomic 53062306a36Sopenharmony_ci * variables. 53162306a36Sopenharmony_ci */ 53262306a36Sopenharmony_cistatic inline void 53362306a36Sopenharmony_cixlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) 53462306a36Sopenharmony_ci{ 53562306a36Sopenharmony_ci xfs_lsn_t val = atomic64_read(lsn); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci *cycle = CYCLE_LSN(val); 53862306a36Sopenharmony_ci *block = BLOCK_LSN(val); 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci/* 54262306a36Sopenharmony_ci * Calculate and assign a value to an atomic LSN variable from component pieces. 54362306a36Sopenharmony_ci */ 54462306a36Sopenharmony_cistatic inline void 54562306a36Sopenharmony_cixlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci atomic64_set(lsn, xlog_assign_lsn(cycle, block)); 54862306a36Sopenharmony_ci} 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci/* 55162306a36Sopenharmony_ci * When we crack the grant head, we sample it first so that the value will not 55262306a36Sopenharmony_ci * change while we are cracking it into the component values. This means we 55362306a36Sopenharmony_ci * will always get consistent component values to work from. 55462306a36Sopenharmony_ci */ 55562306a36Sopenharmony_cistatic inline void 55662306a36Sopenharmony_cixlog_crack_grant_head_val(int64_t val, int *cycle, int *space) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci *cycle = val >> 32; 55962306a36Sopenharmony_ci *space = val & 0xffffffff; 56062306a36Sopenharmony_ci} 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_cistatic inline void 56362306a36Sopenharmony_cixlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) 56462306a36Sopenharmony_ci{ 56562306a36Sopenharmony_ci xlog_crack_grant_head_val(atomic64_read(head), cycle, space); 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cistatic inline int64_t 56962306a36Sopenharmony_cixlog_assign_grant_head_val(int cycle, int space) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci return ((int64_t)cycle << 32) | space; 57262306a36Sopenharmony_ci} 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_cistatic inline void 57562306a36Sopenharmony_cixlog_assign_grant_head(atomic64_t *head, int cycle, int space) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); 57862306a36Sopenharmony_ci} 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci/* 58162306a36Sopenharmony_ci * Committed Item List interfaces 58262306a36Sopenharmony_ci */ 58362306a36Sopenharmony_ciint xlog_cil_init(struct xlog *log); 58462306a36Sopenharmony_civoid xlog_cil_init_post_recovery(struct xlog *log); 58562306a36Sopenharmony_civoid xlog_cil_destroy(struct xlog *log); 58662306a36Sopenharmony_cibool xlog_cil_empty(struct xlog *log); 58762306a36Sopenharmony_civoid xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, 58862306a36Sopenharmony_ci xfs_csn_t *commit_seq, bool regrant); 58962306a36Sopenharmony_civoid xlog_cil_set_ctx_write_state(struct xfs_cil_ctx *ctx, 59062306a36Sopenharmony_ci struct xlog_in_core *iclog); 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci/* 59462306a36Sopenharmony_ci * CIL force routines 59562306a36Sopenharmony_ci */ 59662306a36Sopenharmony_civoid xlog_cil_flush(struct xlog *log); 59762306a36Sopenharmony_cixfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence); 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_cistatic inline void 60062306a36Sopenharmony_cixlog_cil_force(struct xlog *log) 60162306a36Sopenharmony_ci{ 60262306a36Sopenharmony_ci xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence); 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci/* 60662306a36Sopenharmony_ci * Wrapper function for waiting on a wait queue serialised against wakeups 60762306a36Sopenharmony_ci * by a spinlock. This matches the semantics of all the wait queues used in the 60862306a36Sopenharmony_ci * log code. 60962306a36Sopenharmony_ci */ 61062306a36Sopenharmony_cistatic inline void 61162306a36Sopenharmony_cixlog_wait( 61262306a36Sopenharmony_ci struct wait_queue_head *wq, 61362306a36Sopenharmony_ci struct spinlock *lock) 61462306a36Sopenharmony_ci __releases(lock) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci DECLARE_WAITQUEUE(wait, current); 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci add_wait_queue_exclusive(wq, &wait); 61962306a36Sopenharmony_ci __set_current_state(TASK_UNINTERRUPTIBLE); 62062306a36Sopenharmony_ci spin_unlock(lock); 62162306a36Sopenharmony_ci schedule(); 62262306a36Sopenharmony_ci remove_wait_queue(wq, &wait); 62362306a36Sopenharmony_ci} 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ciint xlog_wait_on_iclog(struct xlog_in_core *iclog); 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci/* 62862306a36Sopenharmony_ci * The LSN is valid so long as it is behind the current LSN. If it isn't, this 62962306a36Sopenharmony_ci * means that the next log record that includes this metadata could have a 63062306a36Sopenharmony_ci * smaller LSN. In turn, this means that the modification in the log would not 63162306a36Sopenharmony_ci * replay. 63262306a36Sopenharmony_ci */ 63362306a36Sopenharmony_cistatic inline bool 63462306a36Sopenharmony_cixlog_valid_lsn( 63562306a36Sopenharmony_ci struct xlog *log, 63662306a36Sopenharmony_ci xfs_lsn_t lsn) 63762306a36Sopenharmony_ci{ 63862306a36Sopenharmony_ci int cur_cycle; 63962306a36Sopenharmony_ci int cur_block; 64062306a36Sopenharmony_ci bool valid = true; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci /* 64362306a36Sopenharmony_ci * First, sample the current lsn without locking to avoid added 64462306a36Sopenharmony_ci * contention from metadata I/O. The current cycle and block are updated 64562306a36Sopenharmony_ci * (in xlog_state_switch_iclogs()) and read here in a particular order 64662306a36Sopenharmony_ci * to avoid false negatives (e.g., thinking the metadata LSN is valid 64762306a36Sopenharmony_ci * when it is not). 64862306a36Sopenharmony_ci * 64962306a36Sopenharmony_ci * The current block is always rewound before the cycle is bumped in 65062306a36Sopenharmony_ci * xlog_state_switch_iclogs() to ensure the current LSN is never seen in 65162306a36Sopenharmony_ci * a transiently forward state. Instead, we can see the LSN in a 65262306a36Sopenharmony_ci * transiently behind state if we happen to race with a cycle wrap. 65362306a36Sopenharmony_ci */ 65462306a36Sopenharmony_ci cur_cycle = READ_ONCE(log->l_curr_cycle); 65562306a36Sopenharmony_ci smp_rmb(); 65662306a36Sopenharmony_ci cur_block = READ_ONCE(log->l_curr_block); 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci if ((CYCLE_LSN(lsn) > cur_cycle) || 65962306a36Sopenharmony_ci (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { 66062306a36Sopenharmony_ci /* 66162306a36Sopenharmony_ci * If the metadata LSN appears invalid, it's possible the check 66262306a36Sopenharmony_ci * above raced with a wrap to the next log cycle. Grab the lock 66362306a36Sopenharmony_ci * to check for sure. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_ci spin_lock(&log->l_icloglock); 66662306a36Sopenharmony_ci cur_cycle = log->l_curr_cycle; 66762306a36Sopenharmony_ci cur_block = log->l_curr_block; 66862306a36Sopenharmony_ci spin_unlock(&log->l_icloglock); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci if ((CYCLE_LSN(lsn) > cur_cycle) || 67162306a36Sopenharmony_ci (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) 67262306a36Sopenharmony_ci valid = false; 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci return valid; 67662306a36Sopenharmony_ci} 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci/* 67962306a36Sopenharmony_ci * Log vector and shadow buffers can be large, so we need to use kvmalloc() here 68062306a36Sopenharmony_ci * to ensure success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts 68162306a36Sopenharmony_ci * to fall back to vmalloc, so we can't actually do anything useful with gfp 68262306a36Sopenharmony_ci * flags to control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() 68362306a36Sopenharmony_ci * will do direct reclaim and compaction in the slow path, both of which are 68462306a36Sopenharmony_ci * horrendously expensive. We just want kmalloc to fail fast and fall back to 68562306a36Sopenharmony_ci * vmalloc if it can't get somethign straight away from the free lists or 68662306a36Sopenharmony_ci * buddy allocator. Hence we have to open code kvmalloc outselves here. 68762306a36Sopenharmony_ci * 68862306a36Sopenharmony_ci * This assumes that the caller uses memalloc_nofs_save task context here, so 68962306a36Sopenharmony_ci * despite the use of GFP_KERNEL here, we are going to be doing GFP_NOFS 69062306a36Sopenharmony_ci * allocations. This is actually the only way to make vmalloc() do GFP_NOFS 69162306a36Sopenharmony_ci * allocations, so lets just all pretend this is a GFP_KERNEL context 69262306a36Sopenharmony_ci * operation.... 69362306a36Sopenharmony_ci */ 69462306a36Sopenharmony_cistatic inline void * 69562306a36Sopenharmony_cixlog_kvmalloc( 69662306a36Sopenharmony_ci size_t buf_size) 69762306a36Sopenharmony_ci{ 69862306a36Sopenharmony_ci gfp_t flags = GFP_KERNEL; 69962306a36Sopenharmony_ci void *p; 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci flags &= ~__GFP_DIRECT_RECLAIM; 70262306a36Sopenharmony_ci flags |= __GFP_NOWARN | __GFP_NORETRY; 70362306a36Sopenharmony_ci do { 70462306a36Sopenharmony_ci p = kmalloc(buf_size, flags); 70562306a36Sopenharmony_ci if (!p) 70662306a36Sopenharmony_ci p = vmalloc(buf_size); 70762306a36Sopenharmony_ci } while (!p); 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci return p; 71062306a36Sopenharmony_ci} 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci#endif /* __XFS_LOG_PRIV_H__ */ 713