162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_log_format.h" 1162306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1262306a36Sopenharmony_ci#include "xfs_sb.h" 1362306a36Sopenharmony_ci#include "xfs_mount.h" 1462306a36Sopenharmony_ci#include "xfs_trans.h" 1562306a36Sopenharmony_ci#include "xfs_error.h" 1662306a36Sopenharmony_ci#include "xfs_alloc.h" 1762306a36Sopenharmony_ci#include "xfs_fsops.h" 1862306a36Sopenharmony_ci#include "xfs_trans_space.h" 1962306a36Sopenharmony_ci#include "xfs_log.h" 2062306a36Sopenharmony_ci#include "xfs_log_priv.h" 2162306a36Sopenharmony_ci#include "xfs_ag.h" 2262306a36Sopenharmony_ci#include "xfs_ag_resv.h" 2362306a36Sopenharmony_ci#include "xfs_trace.h" 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* 2662306a36Sopenharmony_ci * Write new AG headers to disk. Non-transactional, but need to be 2762306a36Sopenharmony_ci * written and completed prior to the growfs transaction being logged. 2862306a36Sopenharmony_ci * To do this, we use a delayed write buffer list and wait for 2962306a36Sopenharmony_ci * submission and IO completion of the list as a whole. This allows the 3062306a36Sopenharmony_ci * IO subsystem to merge all the AG headers in a single AG into a single 3162306a36Sopenharmony_ci * IO and hide most of the latency of the IO from us. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * This also means that if we get an error whilst building the buffer 3462306a36Sopenharmony_ci * list to write, we can cancel the entire list without having written 3562306a36Sopenharmony_ci * anything. 3662306a36Sopenharmony_ci */ 3762306a36Sopenharmony_cistatic int 3862306a36Sopenharmony_cixfs_resizefs_init_new_ags( 3962306a36Sopenharmony_ci struct xfs_trans *tp, 4062306a36Sopenharmony_ci struct aghdr_init_data *id, 4162306a36Sopenharmony_ci xfs_agnumber_t oagcount, 4262306a36Sopenharmony_ci xfs_agnumber_t nagcount, 4362306a36Sopenharmony_ci xfs_rfsblock_t delta, 4462306a36Sopenharmony_ci struct xfs_perag *last_pag, 4562306a36Sopenharmony_ci bool *lastag_extended) 4662306a36Sopenharmony_ci{ 4762306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 4862306a36Sopenharmony_ci xfs_rfsblock_t nb = mp->m_sb.sb_dblocks + delta; 4962306a36Sopenharmony_ci int error; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci *lastag_extended = false; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci INIT_LIST_HEAD(&id->buffer_list); 5462306a36Sopenharmony_ci for (id->agno = nagcount - 1; 5562306a36Sopenharmony_ci id->agno >= oagcount; 5662306a36Sopenharmony_ci id->agno--, delta -= id->agsize) { 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci if (id->agno == nagcount - 1) 5962306a36Sopenharmony_ci id->agsize = nb - (id->agno * 6062306a36Sopenharmony_ci (xfs_rfsblock_t)mp->m_sb.sb_agblocks); 6162306a36Sopenharmony_ci else 6262306a36Sopenharmony_ci id->agsize = mp->m_sb.sb_agblocks; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci error = xfs_ag_init_headers(mp, id); 6562306a36Sopenharmony_ci if (error) { 6662306a36Sopenharmony_ci xfs_buf_delwri_cancel(&id->buffer_list); 6762306a36Sopenharmony_ci return error; 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci } 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci error = xfs_buf_delwri_submit(&id->buffer_list); 7262306a36Sopenharmony_ci if (error) 7362306a36Sopenharmony_ci return error; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci if (delta) { 7662306a36Sopenharmony_ci *lastag_extended = true; 7762306a36Sopenharmony_ci error = xfs_ag_extend_space(last_pag, tp, delta); 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci return error; 8062306a36Sopenharmony_ci} 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci/* 8362306a36Sopenharmony_ci * growfs operations 8462306a36Sopenharmony_ci */ 8562306a36Sopenharmony_cistatic int 8662306a36Sopenharmony_cixfs_growfs_data_private( 8762306a36Sopenharmony_ci struct xfs_mount *mp, /* mount point for filesystem */ 8862306a36Sopenharmony_ci struct xfs_growfs_data *in) /* growfs data input struct */ 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci struct xfs_buf *bp; 9162306a36Sopenharmony_ci int error; 9262306a36Sopenharmony_ci xfs_agnumber_t nagcount; 9362306a36Sopenharmony_ci xfs_agnumber_t nagimax = 0; 9462306a36Sopenharmony_ci xfs_rfsblock_t nb, nb_div, nb_mod; 9562306a36Sopenharmony_ci int64_t delta; 9662306a36Sopenharmony_ci bool lastag_extended = false; 9762306a36Sopenharmony_ci xfs_agnumber_t oagcount; 9862306a36Sopenharmony_ci struct xfs_trans *tp; 9962306a36Sopenharmony_ci struct aghdr_init_data id = {}; 10062306a36Sopenharmony_ci struct xfs_perag *last_pag; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci nb = in->newblocks; 10362306a36Sopenharmony_ci error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); 10462306a36Sopenharmony_ci if (error) 10562306a36Sopenharmony_ci return error; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci if (nb > mp->m_sb.sb_dblocks) { 10862306a36Sopenharmony_ci error = xfs_buf_read_uncached(mp->m_ddev_targp, 10962306a36Sopenharmony_ci XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 11062306a36Sopenharmony_ci XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); 11162306a36Sopenharmony_ci if (error) 11262306a36Sopenharmony_ci return error; 11362306a36Sopenharmony_ci xfs_buf_relse(bp); 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci nb_div = nb; 11762306a36Sopenharmony_ci nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); 11862306a36Sopenharmony_ci if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) 11962306a36Sopenharmony_ci nb_div++; 12062306a36Sopenharmony_ci else if (nb_mod) 12162306a36Sopenharmony_ci nb = nb_div * mp->m_sb.sb_agblocks; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci if (nb_div > XFS_MAX_AGNUMBER + 1) { 12462306a36Sopenharmony_ci nb_div = XFS_MAX_AGNUMBER + 1; 12562306a36Sopenharmony_ci nb = nb_div * mp->m_sb.sb_agblocks; 12662306a36Sopenharmony_ci } 12762306a36Sopenharmony_ci nagcount = nb_div; 12862306a36Sopenharmony_ci delta = nb - mp->m_sb.sb_dblocks; 12962306a36Sopenharmony_ci /* 13062306a36Sopenharmony_ci * Reject filesystems with a single AG because they are not 13162306a36Sopenharmony_ci * supported, and reject a shrink operation that would cause a 13262306a36Sopenharmony_ci * filesystem to become unsupported. 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_ci if (delta < 0 && nagcount < 2) 13562306a36Sopenharmony_ci return -EINVAL; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci oagcount = mp->m_sb.sb_agcount; 13862306a36Sopenharmony_ci /* allocate the new per-ag structures */ 13962306a36Sopenharmony_ci if (nagcount > oagcount) { 14062306a36Sopenharmony_ci error = xfs_initialize_perag(mp, nagcount, nb, &nagimax); 14162306a36Sopenharmony_ci if (error) 14262306a36Sopenharmony_ci return error; 14362306a36Sopenharmony_ci } else if (nagcount < oagcount) { 14462306a36Sopenharmony_ci /* TODO: shrinking the entire AGs hasn't yet completed */ 14562306a36Sopenharmony_ci return -EINVAL; 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci if (delta > 0) 14962306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 15062306a36Sopenharmony_ci XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, 15162306a36Sopenharmony_ci &tp); 15262306a36Sopenharmony_ci else 15362306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, -delta, 0, 15462306a36Sopenharmony_ci 0, &tp); 15562306a36Sopenharmony_ci if (error) 15662306a36Sopenharmony_ci return error; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci last_pag = xfs_perag_get(mp, oagcount - 1); 15962306a36Sopenharmony_ci if (delta > 0) { 16062306a36Sopenharmony_ci error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, 16162306a36Sopenharmony_ci delta, last_pag, &lastag_extended); 16262306a36Sopenharmony_ci } else { 16362306a36Sopenharmony_ci xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK, 16462306a36Sopenharmony_ci "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci error = xfs_ag_shrink_space(last_pag, &tp, -delta); 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci xfs_perag_put(last_pag); 16962306a36Sopenharmony_ci if (error) 17062306a36Sopenharmony_ci goto out_trans_cancel; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* 17362306a36Sopenharmony_ci * Update changed superblock fields transactionally. These are not 17462306a36Sopenharmony_ci * seen by the rest of the world until the transaction commit applies 17562306a36Sopenharmony_ci * them atomically to the superblock. 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci if (nagcount > oagcount) 17862306a36Sopenharmony_ci xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 17962306a36Sopenharmony_ci if (delta) 18062306a36Sopenharmony_ci xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, delta); 18162306a36Sopenharmony_ci if (id.nfree) 18262306a36Sopenharmony_ci xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci /* 18562306a36Sopenharmony_ci * Sync sb counters now to reflect the updated values. This is 18662306a36Sopenharmony_ci * particularly important for shrink because the write verifier 18762306a36Sopenharmony_ci * will fail if sb_fdblocks is ever larger than sb_dblocks. 18862306a36Sopenharmony_ci */ 18962306a36Sopenharmony_ci if (xfs_has_lazysbcount(mp)) 19062306a36Sopenharmony_ci xfs_log_sb(tp); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci xfs_trans_set_sync(tp); 19362306a36Sopenharmony_ci error = xfs_trans_commit(tp); 19462306a36Sopenharmony_ci if (error) 19562306a36Sopenharmony_ci return error; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci /* New allocation groups fully initialized, so update mount struct */ 19862306a36Sopenharmony_ci if (nagimax) 19962306a36Sopenharmony_ci mp->m_maxagi = nagimax; 20062306a36Sopenharmony_ci xfs_set_low_space_thresholds(mp); 20162306a36Sopenharmony_ci mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci if (delta > 0) { 20462306a36Sopenharmony_ci /* 20562306a36Sopenharmony_ci * If we expanded the last AG, free the per-AG reservation 20662306a36Sopenharmony_ci * so we can reinitialize it with the new size. 20762306a36Sopenharmony_ci */ 20862306a36Sopenharmony_ci if (lastag_extended) { 20962306a36Sopenharmony_ci struct xfs_perag *pag; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci pag = xfs_perag_get(mp, id.agno); 21262306a36Sopenharmony_ci error = xfs_ag_resv_free(pag); 21362306a36Sopenharmony_ci xfs_perag_put(pag); 21462306a36Sopenharmony_ci if (error) 21562306a36Sopenharmony_ci return error; 21662306a36Sopenharmony_ci } 21762306a36Sopenharmony_ci /* 21862306a36Sopenharmony_ci * Reserve AG metadata blocks. ENOSPC here does not mean there 21962306a36Sopenharmony_ci * was a growfs failure, just that there still isn't space for 22062306a36Sopenharmony_ci * new user data after the grow has been run. 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_ci error = xfs_fs_reserve_ag_blocks(mp); 22362306a36Sopenharmony_ci if (error == -ENOSPC) 22462306a36Sopenharmony_ci error = 0; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci return error; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ciout_trans_cancel: 22962306a36Sopenharmony_ci xfs_trans_cancel(tp); 23062306a36Sopenharmony_ci return error; 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_cistatic int 23462306a36Sopenharmony_cixfs_growfs_log_private( 23562306a36Sopenharmony_ci struct xfs_mount *mp, /* mount point for filesystem */ 23662306a36Sopenharmony_ci struct xfs_growfs_log *in) /* growfs log input struct */ 23762306a36Sopenharmony_ci{ 23862306a36Sopenharmony_ci xfs_extlen_t nb; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci nb = in->newblocks; 24162306a36Sopenharmony_ci if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) 24262306a36Sopenharmony_ci return -EINVAL; 24362306a36Sopenharmony_ci if (nb == mp->m_sb.sb_logblocks && 24462306a36Sopenharmony_ci in->isint == (mp->m_sb.sb_logstart != 0)) 24562306a36Sopenharmony_ci return -EINVAL; 24662306a36Sopenharmony_ci /* 24762306a36Sopenharmony_ci * Moving the log is hard, need new interfaces to sync 24862306a36Sopenharmony_ci * the log first, hold off all activity while moving it. 24962306a36Sopenharmony_ci * Can have shorter or longer log in the same space, 25062306a36Sopenharmony_ci * or transform internal to external log or vice versa. 25162306a36Sopenharmony_ci */ 25262306a36Sopenharmony_ci return -ENOSYS; 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cistatic int 25662306a36Sopenharmony_cixfs_growfs_imaxpct( 25762306a36Sopenharmony_ci struct xfs_mount *mp, 25862306a36Sopenharmony_ci __u32 imaxpct) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci struct xfs_trans *tp; 26162306a36Sopenharmony_ci int dpct; 26262306a36Sopenharmony_ci int error; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci if (imaxpct > 100) 26562306a36Sopenharmony_ci return -EINVAL; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 26862306a36Sopenharmony_ci XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); 26962306a36Sopenharmony_ci if (error) 27062306a36Sopenharmony_ci return error; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci dpct = imaxpct - mp->m_sb.sb_imax_pct; 27362306a36Sopenharmony_ci xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 27462306a36Sopenharmony_ci xfs_trans_set_sync(tp); 27562306a36Sopenharmony_ci return xfs_trans_commit(tp); 27662306a36Sopenharmony_ci} 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci/* 27962306a36Sopenharmony_ci * protected versions of growfs function acquire and release locks on the mount 28062306a36Sopenharmony_ci * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG, 28162306a36Sopenharmony_ci * XFS_IOC_FSGROWFSRT 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_ciint 28462306a36Sopenharmony_cixfs_growfs_data( 28562306a36Sopenharmony_ci struct xfs_mount *mp, 28662306a36Sopenharmony_ci struct xfs_growfs_data *in) 28762306a36Sopenharmony_ci{ 28862306a36Sopenharmony_ci int error = 0; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 29162306a36Sopenharmony_ci return -EPERM; 29262306a36Sopenharmony_ci if (!mutex_trylock(&mp->m_growlock)) 29362306a36Sopenharmony_ci return -EWOULDBLOCK; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci /* update imaxpct separately to the physical grow of the filesystem */ 29662306a36Sopenharmony_ci if (in->imaxpct != mp->m_sb.sb_imax_pct) { 29762306a36Sopenharmony_ci error = xfs_growfs_imaxpct(mp, in->imaxpct); 29862306a36Sopenharmony_ci if (error) 29962306a36Sopenharmony_ci goto out_error; 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci if (in->newblocks != mp->m_sb.sb_dblocks) { 30362306a36Sopenharmony_ci error = xfs_growfs_data_private(mp, in); 30462306a36Sopenharmony_ci if (error) 30562306a36Sopenharmony_ci goto out_error; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci /* Post growfs calculations needed to reflect new state in operations */ 30962306a36Sopenharmony_ci if (mp->m_sb.sb_imax_pct) { 31062306a36Sopenharmony_ci uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 31162306a36Sopenharmony_ci do_div(icount, 100); 31262306a36Sopenharmony_ci M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); 31362306a36Sopenharmony_ci } else 31462306a36Sopenharmony_ci M_IGEO(mp)->maxicount = 0; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* Update secondary superblocks now the physical grow has completed */ 31762306a36Sopenharmony_ci error = xfs_update_secondary_sbs(mp); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ciout_error: 32062306a36Sopenharmony_ci /* 32162306a36Sopenharmony_ci * Increment the generation unconditionally, the error could be from 32262306a36Sopenharmony_ci * updating the secondary superblocks, in which case the new size 32362306a36Sopenharmony_ci * is live already. 32462306a36Sopenharmony_ci */ 32562306a36Sopenharmony_ci mp->m_generation++; 32662306a36Sopenharmony_ci mutex_unlock(&mp->m_growlock); 32762306a36Sopenharmony_ci return error; 32862306a36Sopenharmony_ci} 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ciint 33162306a36Sopenharmony_cixfs_growfs_log( 33262306a36Sopenharmony_ci xfs_mount_t *mp, 33362306a36Sopenharmony_ci struct xfs_growfs_log *in) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci int error; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 33862306a36Sopenharmony_ci return -EPERM; 33962306a36Sopenharmony_ci if (!mutex_trylock(&mp->m_growlock)) 34062306a36Sopenharmony_ci return -EWOULDBLOCK; 34162306a36Sopenharmony_ci error = xfs_growfs_log_private(mp, in); 34262306a36Sopenharmony_ci mutex_unlock(&mp->m_growlock); 34362306a36Sopenharmony_ci return error; 34462306a36Sopenharmony_ci} 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci/* 34762306a36Sopenharmony_ci * exported through ioctl XFS_IOC_FSCOUNTS 34862306a36Sopenharmony_ci */ 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_civoid 35162306a36Sopenharmony_cixfs_fs_counts( 35262306a36Sopenharmony_ci xfs_mount_t *mp, 35362306a36Sopenharmony_ci xfs_fsop_counts_t *cnt) 35462306a36Sopenharmony_ci{ 35562306a36Sopenharmony_ci cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 35662306a36Sopenharmony_ci cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 35762306a36Sopenharmony_ci cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 35862306a36Sopenharmony_ci xfs_fdblocks_unavailable(mp); 35962306a36Sopenharmony_ci cnt->freertx = percpu_counter_read_positive(&mp->m_frextents); 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci/* 36362306a36Sopenharmony_ci * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS 36462306a36Sopenharmony_ci * 36562306a36Sopenharmony_ci * xfs_reserve_blocks is called to set m_resblks 36662306a36Sopenharmony_ci * in the in-core mount table. The number of unused reserved blocks 36762306a36Sopenharmony_ci * is kept in m_resblks_avail. 36862306a36Sopenharmony_ci * 36962306a36Sopenharmony_ci * Reserve the requested number of blocks if available. Otherwise return 37062306a36Sopenharmony_ci * as many as possible to satisfy the request. The actual number 37162306a36Sopenharmony_ci * reserved are returned in outval 37262306a36Sopenharmony_ci * 37362306a36Sopenharmony_ci * A null inval pointer indicates that only the current reserved blocks 37462306a36Sopenharmony_ci * available should be returned no settings are changed. 37562306a36Sopenharmony_ci */ 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ciint 37862306a36Sopenharmony_cixfs_reserve_blocks( 37962306a36Sopenharmony_ci xfs_mount_t *mp, 38062306a36Sopenharmony_ci uint64_t *inval, 38162306a36Sopenharmony_ci xfs_fsop_resblks_t *outval) 38262306a36Sopenharmony_ci{ 38362306a36Sopenharmony_ci int64_t lcounter, delta; 38462306a36Sopenharmony_ci int64_t fdblks_delta = 0; 38562306a36Sopenharmony_ci uint64_t request; 38662306a36Sopenharmony_ci int64_t free; 38762306a36Sopenharmony_ci int error = 0; 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci /* If inval is null, report current values and return */ 39062306a36Sopenharmony_ci if (inval == (uint64_t *)NULL) { 39162306a36Sopenharmony_ci if (!outval) 39262306a36Sopenharmony_ci return -EINVAL; 39362306a36Sopenharmony_ci outval->resblks = mp->m_resblks; 39462306a36Sopenharmony_ci outval->resblks_avail = mp->m_resblks_avail; 39562306a36Sopenharmony_ci return 0; 39662306a36Sopenharmony_ci } 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci request = *inval; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci /* 40162306a36Sopenharmony_ci * With per-cpu counters, this becomes an interesting problem. we need 40262306a36Sopenharmony_ci * to work out if we are freeing or allocation blocks first, then we can 40362306a36Sopenharmony_ci * do the modification as necessary. 40462306a36Sopenharmony_ci * 40562306a36Sopenharmony_ci * We do this under the m_sb_lock so that if we are near ENOSPC, we will 40662306a36Sopenharmony_ci * hold out any changes while we work out what to do. This means that 40762306a36Sopenharmony_ci * the amount of free space can change while we do this, so we need to 40862306a36Sopenharmony_ci * retry if we end up trying to reserve more space than is available. 40962306a36Sopenharmony_ci */ 41062306a36Sopenharmony_ci spin_lock(&mp->m_sb_lock); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci /* 41362306a36Sopenharmony_ci * If our previous reservation was larger than the current value, 41462306a36Sopenharmony_ci * then move any unused blocks back to the free pool. Modify the resblks 41562306a36Sopenharmony_ci * counters directly since we shouldn't have any problems unreserving 41662306a36Sopenharmony_ci * space. 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_ci if (mp->m_resblks > request) { 41962306a36Sopenharmony_ci lcounter = mp->m_resblks_avail - request; 42062306a36Sopenharmony_ci if (lcounter > 0) { /* release unused blocks */ 42162306a36Sopenharmony_ci fdblks_delta = lcounter; 42262306a36Sopenharmony_ci mp->m_resblks_avail -= lcounter; 42362306a36Sopenharmony_ci } 42462306a36Sopenharmony_ci mp->m_resblks = request; 42562306a36Sopenharmony_ci if (fdblks_delta) { 42662306a36Sopenharmony_ci spin_unlock(&mp->m_sb_lock); 42762306a36Sopenharmony_ci error = xfs_mod_fdblocks(mp, fdblks_delta, 0); 42862306a36Sopenharmony_ci spin_lock(&mp->m_sb_lock); 42962306a36Sopenharmony_ci } 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci goto out; 43262306a36Sopenharmony_ci } 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci /* 43562306a36Sopenharmony_ci * If the request is larger than the current reservation, reserve the 43662306a36Sopenharmony_ci * blocks before we update the reserve counters. Sample m_fdblocks and 43762306a36Sopenharmony_ci * perform a partial reservation if the request exceeds free space. 43862306a36Sopenharmony_ci * 43962306a36Sopenharmony_ci * The code below estimates how many blocks it can request from 44062306a36Sopenharmony_ci * fdblocks to stash in the reserve pool. This is a classic TOCTOU 44162306a36Sopenharmony_ci * race since fdblocks updates are not always coordinated via 44262306a36Sopenharmony_ci * m_sb_lock. Set the reserve size even if there's not enough free 44362306a36Sopenharmony_ci * space to fill it because mod_fdblocks will refill an undersized 44462306a36Sopenharmony_ci * reserve when it can. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_ci free = percpu_counter_sum(&mp->m_fdblocks) - 44762306a36Sopenharmony_ci xfs_fdblocks_unavailable(mp); 44862306a36Sopenharmony_ci delta = request - mp->m_resblks; 44962306a36Sopenharmony_ci mp->m_resblks = request; 45062306a36Sopenharmony_ci if (delta > 0 && free > 0) { 45162306a36Sopenharmony_ci /* 45262306a36Sopenharmony_ci * We'll either succeed in getting space from the free block 45362306a36Sopenharmony_ci * count or we'll get an ENOSPC. Don't set the reserved flag 45462306a36Sopenharmony_ci * here - we don't want to reserve the extra reserve blocks 45562306a36Sopenharmony_ci * from the reserve. 45662306a36Sopenharmony_ci * 45762306a36Sopenharmony_ci * The desired reserve size can change after we drop the lock. 45862306a36Sopenharmony_ci * Use mod_fdblocks to put the space into the reserve or into 45962306a36Sopenharmony_ci * fdblocks as appropriate. 46062306a36Sopenharmony_ci */ 46162306a36Sopenharmony_ci fdblks_delta = min(free, delta); 46262306a36Sopenharmony_ci spin_unlock(&mp->m_sb_lock); 46362306a36Sopenharmony_ci error = xfs_mod_fdblocks(mp, -fdblks_delta, 0); 46462306a36Sopenharmony_ci if (!error) 46562306a36Sopenharmony_ci xfs_mod_fdblocks(mp, fdblks_delta, 0); 46662306a36Sopenharmony_ci spin_lock(&mp->m_sb_lock); 46762306a36Sopenharmony_ci } 46862306a36Sopenharmony_ciout: 46962306a36Sopenharmony_ci if (outval) { 47062306a36Sopenharmony_ci outval->resblks = mp->m_resblks; 47162306a36Sopenharmony_ci outval->resblks_avail = mp->m_resblks_avail; 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci spin_unlock(&mp->m_sb_lock); 47562306a36Sopenharmony_ci return error; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ciint 47962306a36Sopenharmony_cixfs_fs_goingdown( 48062306a36Sopenharmony_ci xfs_mount_t *mp, 48162306a36Sopenharmony_ci uint32_t inflags) 48262306a36Sopenharmony_ci{ 48362306a36Sopenharmony_ci switch (inflags) { 48462306a36Sopenharmony_ci case XFS_FSOP_GOING_FLAGS_DEFAULT: { 48562306a36Sopenharmony_ci if (!freeze_bdev(mp->m_super->s_bdev)) { 48662306a36Sopenharmony_ci xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 48762306a36Sopenharmony_ci thaw_bdev(mp->m_super->s_bdev); 48862306a36Sopenharmony_ci } 48962306a36Sopenharmony_ci break; 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 49262306a36Sopenharmony_ci xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 49362306a36Sopenharmony_ci break; 49462306a36Sopenharmony_ci case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH: 49562306a36Sopenharmony_ci xfs_force_shutdown(mp, 49662306a36Sopenharmony_ci SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); 49762306a36Sopenharmony_ci break; 49862306a36Sopenharmony_ci default: 49962306a36Sopenharmony_ci return -EINVAL; 50062306a36Sopenharmony_ci } 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci return 0; 50362306a36Sopenharmony_ci} 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci/* 50662306a36Sopenharmony_ci * Force a shutdown of the filesystem instantly while keeping the filesystem 50762306a36Sopenharmony_ci * consistent. We don't do an unmount here; just shutdown the shop, make sure 50862306a36Sopenharmony_ci * that absolutely nothing persistent happens to this filesystem after this 50962306a36Sopenharmony_ci * point. 51062306a36Sopenharmony_ci * 51162306a36Sopenharmony_ci * The shutdown state change is atomic, resulting in the first and only the 51262306a36Sopenharmony_ci * first shutdown call processing the shutdown. This means we only shutdown the 51362306a36Sopenharmony_ci * log once as it requires, and we don't spam the logs when multiple concurrent 51462306a36Sopenharmony_ci * shutdowns race to set the shutdown flags. 51562306a36Sopenharmony_ci */ 51662306a36Sopenharmony_civoid 51762306a36Sopenharmony_cixfs_do_force_shutdown( 51862306a36Sopenharmony_ci struct xfs_mount *mp, 51962306a36Sopenharmony_ci uint32_t flags, 52062306a36Sopenharmony_ci char *fname, 52162306a36Sopenharmony_ci int lnnum) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci int tag; 52462306a36Sopenharmony_ci const char *why; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate)) { 52862306a36Sopenharmony_ci xlog_shutdown_wait(mp->m_log); 52962306a36Sopenharmony_ci return; 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci if (mp->m_sb_bp) 53262306a36Sopenharmony_ci mp->m_sb_bp->b_flags |= XBF_DONE; 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci if (flags & SHUTDOWN_FORCE_UMOUNT) 53562306a36Sopenharmony_ci xfs_alert(mp, "User initiated shutdown received."); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci if (xlog_force_shutdown(mp->m_log, flags)) { 53862306a36Sopenharmony_ci tag = XFS_PTAG_SHUTDOWN_LOGERROR; 53962306a36Sopenharmony_ci why = "Log I/O Error"; 54062306a36Sopenharmony_ci } else if (flags & SHUTDOWN_CORRUPT_INCORE) { 54162306a36Sopenharmony_ci tag = XFS_PTAG_SHUTDOWN_CORRUPT; 54262306a36Sopenharmony_ci why = "Corruption of in-memory data"; 54362306a36Sopenharmony_ci } else if (flags & SHUTDOWN_CORRUPT_ONDISK) { 54462306a36Sopenharmony_ci tag = XFS_PTAG_SHUTDOWN_CORRUPT; 54562306a36Sopenharmony_ci why = "Corruption of on-disk metadata"; 54662306a36Sopenharmony_ci } else if (flags & SHUTDOWN_DEVICE_REMOVED) { 54762306a36Sopenharmony_ci tag = XFS_PTAG_SHUTDOWN_IOERROR; 54862306a36Sopenharmony_ci why = "Block device removal"; 54962306a36Sopenharmony_ci } else { 55062306a36Sopenharmony_ci tag = XFS_PTAG_SHUTDOWN_IOERROR; 55162306a36Sopenharmony_ci why = "Metadata I/O Error"; 55262306a36Sopenharmony_ci } 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci trace_xfs_force_shutdown(mp, tag, flags, fname, lnnum); 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci xfs_alert_tag(mp, tag, 55762306a36Sopenharmony_ci"%s (0x%x) detected at %pS (%s:%d). Shutting down filesystem.", 55862306a36Sopenharmony_ci why, flags, __return_address, fname, lnnum); 55962306a36Sopenharmony_ci xfs_alert(mp, 56062306a36Sopenharmony_ci "Please unmount the filesystem and rectify the problem(s)"); 56162306a36Sopenharmony_ci if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 56262306a36Sopenharmony_ci xfs_stack_trace(); 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci/* 56662306a36Sopenharmony_ci * Reserve free space for per-AG metadata. 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_ciint 56962306a36Sopenharmony_cixfs_fs_reserve_ag_blocks( 57062306a36Sopenharmony_ci struct xfs_mount *mp) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci xfs_agnumber_t agno; 57362306a36Sopenharmony_ci struct xfs_perag *pag; 57462306a36Sopenharmony_ci int error = 0; 57562306a36Sopenharmony_ci int err2; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci mp->m_finobt_nores = false; 57862306a36Sopenharmony_ci for_each_perag(mp, agno, pag) { 57962306a36Sopenharmony_ci err2 = xfs_ag_resv_init(pag, NULL); 58062306a36Sopenharmony_ci if (err2 && !error) 58162306a36Sopenharmony_ci error = err2; 58262306a36Sopenharmony_ci } 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci if (error && error != -ENOSPC) { 58562306a36Sopenharmony_ci xfs_warn(mp, 58662306a36Sopenharmony_ci "Error %d reserving per-AG metadata reserve pool.", error); 58762306a36Sopenharmony_ci xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 58862306a36Sopenharmony_ci } 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci return error; 59162306a36Sopenharmony_ci} 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci/* 59462306a36Sopenharmony_ci * Free space reserved for per-AG metadata. 59562306a36Sopenharmony_ci */ 59662306a36Sopenharmony_ciint 59762306a36Sopenharmony_cixfs_fs_unreserve_ag_blocks( 59862306a36Sopenharmony_ci struct xfs_mount *mp) 59962306a36Sopenharmony_ci{ 60062306a36Sopenharmony_ci xfs_agnumber_t agno; 60162306a36Sopenharmony_ci struct xfs_perag *pag; 60262306a36Sopenharmony_ci int error = 0; 60362306a36Sopenharmony_ci int err2; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci for_each_perag(mp, agno, pag) { 60662306a36Sopenharmony_ci err2 = xfs_ag_resv_free(pag); 60762306a36Sopenharmony_ci if (err2 && !error) 60862306a36Sopenharmony_ci error = err2; 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci if (error) 61262306a36Sopenharmony_ci xfs_warn(mp, 61362306a36Sopenharmony_ci "Error %d freeing per-AG metadata reserve pool.", error); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci return error; 61662306a36Sopenharmony_ci} 617