162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc.
462306a36Sopenharmony_ci * All Rights Reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_log_format.h"
1162306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1262306a36Sopenharmony_ci#include "xfs_sb.h"
1362306a36Sopenharmony_ci#include "xfs_mount.h"
1462306a36Sopenharmony_ci#include "xfs_trans.h"
1562306a36Sopenharmony_ci#include "xfs_error.h"
1662306a36Sopenharmony_ci#include "xfs_alloc.h"
1762306a36Sopenharmony_ci#include "xfs_fsops.h"
1862306a36Sopenharmony_ci#include "xfs_trans_space.h"
1962306a36Sopenharmony_ci#include "xfs_log.h"
2062306a36Sopenharmony_ci#include "xfs_log_priv.h"
2162306a36Sopenharmony_ci#include "xfs_ag.h"
2262306a36Sopenharmony_ci#include "xfs_ag_resv.h"
2362306a36Sopenharmony_ci#include "xfs_trace.h"
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/*
2662306a36Sopenharmony_ci * Write new AG headers to disk. Non-transactional, but need to be
2762306a36Sopenharmony_ci * written and completed prior to the growfs transaction being logged.
2862306a36Sopenharmony_ci * To do this, we use a delayed write buffer list and wait for
2962306a36Sopenharmony_ci * submission and IO completion of the list as a whole. This allows the
3062306a36Sopenharmony_ci * IO subsystem to merge all the AG headers in a single AG into a single
3162306a36Sopenharmony_ci * IO and hide most of the latency of the IO from us.
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * This also means that if we get an error whilst building the buffer
3462306a36Sopenharmony_ci * list to write, we can cancel the entire list without having written
3562306a36Sopenharmony_ci * anything.
3662306a36Sopenharmony_ci */
3762306a36Sopenharmony_cistatic int
3862306a36Sopenharmony_cixfs_resizefs_init_new_ags(
3962306a36Sopenharmony_ci	struct xfs_trans	*tp,
4062306a36Sopenharmony_ci	struct aghdr_init_data	*id,
4162306a36Sopenharmony_ci	xfs_agnumber_t		oagcount,
4262306a36Sopenharmony_ci	xfs_agnumber_t		nagcount,
4362306a36Sopenharmony_ci	xfs_rfsblock_t		delta,
4462306a36Sopenharmony_ci	struct xfs_perag	*last_pag,
4562306a36Sopenharmony_ci	bool			*lastag_extended)
4662306a36Sopenharmony_ci{
4762306a36Sopenharmony_ci	struct xfs_mount	*mp = tp->t_mountp;
4862306a36Sopenharmony_ci	xfs_rfsblock_t		nb = mp->m_sb.sb_dblocks + delta;
4962306a36Sopenharmony_ci	int			error;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	*lastag_extended = false;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	INIT_LIST_HEAD(&id->buffer_list);
5462306a36Sopenharmony_ci	for (id->agno = nagcount - 1;
5562306a36Sopenharmony_ci	     id->agno >= oagcount;
5662306a36Sopenharmony_ci	     id->agno--, delta -= id->agsize) {
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci		if (id->agno == nagcount - 1)
5962306a36Sopenharmony_ci			id->agsize = nb - (id->agno *
6062306a36Sopenharmony_ci					(xfs_rfsblock_t)mp->m_sb.sb_agblocks);
6162306a36Sopenharmony_ci		else
6262306a36Sopenharmony_ci			id->agsize = mp->m_sb.sb_agblocks;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci		error = xfs_ag_init_headers(mp, id);
6562306a36Sopenharmony_ci		if (error) {
6662306a36Sopenharmony_ci			xfs_buf_delwri_cancel(&id->buffer_list);
6762306a36Sopenharmony_ci			return error;
6862306a36Sopenharmony_ci		}
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	error = xfs_buf_delwri_submit(&id->buffer_list);
7262306a36Sopenharmony_ci	if (error)
7362306a36Sopenharmony_ci		return error;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	if (delta) {
7662306a36Sopenharmony_ci		*lastag_extended = true;
7762306a36Sopenharmony_ci		error = xfs_ag_extend_space(last_pag, tp, delta);
7862306a36Sopenharmony_ci	}
7962306a36Sopenharmony_ci	return error;
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci/*
8362306a36Sopenharmony_ci * growfs operations
8462306a36Sopenharmony_ci */
8562306a36Sopenharmony_cistatic int
8662306a36Sopenharmony_cixfs_growfs_data_private(
8762306a36Sopenharmony_ci	struct xfs_mount	*mp,		/* mount point for filesystem */
8862306a36Sopenharmony_ci	struct xfs_growfs_data	*in)		/* growfs data input struct */
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	struct xfs_buf		*bp;
9162306a36Sopenharmony_ci	int			error;
9262306a36Sopenharmony_ci	xfs_agnumber_t		nagcount;
9362306a36Sopenharmony_ci	xfs_agnumber_t		nagimax = 0;
9462306a36Sopenharmony_ci	xfs_rfsblock_t		nb, nb_div, nb_mod;
9562306a36Sopenharmony_ci	int64_t			delta;
9662306a36Sopenharmony_ci	bool			lastag_extended = false;
9762306a36Sopenharmony_ci	xfs_agnumber_t		oagcount;
9862306a36Sopenharmony_ci	struct xfs_trans	*tp;
9962306a36Sopenharmony_ci	struct aghdr_init_data	id = {};
10062306a36Sopenharmony_ci	struct xfs_perag	*last_pag;
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	nb = in->newblocks;
10362306a36Sopenharmony_ci	error = xfs_sb_validate_fsb_count(&mp->m_sb, nb);
10462306a36Sopenharmony_ci	if (error)
10562306a36Sopenharmony_ci		return error;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	if (nb > mp->m_sb.sb_dblocks) {
10862306a36Sopenharmony_ci		error = xfs_buf_read_uncached(mp->m_ddev_targp,
10962306a36Sopenharmony_ci				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
11062306a36Sopenharmony_ci				XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
11162306a36Sopenharmony_ci		if (error)
11262306a36Sopenharmony_ci			return error;
11362306a36Sopenharmony_ci		xfs_buf_relse(bp);
11462306a36Sopenharmony_ci	}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	nb_div = nb;
11762306a36Sopenharmony_ci	nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks);
11862306a36Sopenharmony_ci	if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS)
11962306a36Sopenharmony_ci		nb_div++;
12062306a36Sopenharmony_ci	else if (nb_mod)
12162306a36Sopenharmony_ci		nb = nb_div * mp->m_sb.sb_agblocks;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	if (nb_div > XFS_MAX_AGNUMBER + 1) {
12462306a36Sopenharmony_ci		nb_div = XFS_MAX_AGNUMBER + 1;
12562306a36Sopenharmony_ci		nb = nb_div * mp->m_sb.sb_agblocks;
12662306a36Sopenharmony_ci	}
12762306a36Sopenharmony_ci	nagcount = nb_div;
12862306a36Sopenharmony_ci	delta = nb - mp->m_sb.sb_dblocks;
12962306a36Sopenharmony_ci	/*
13062306a36Sopenharmony_ci	 * Reject filesystems with a single AG because they are not
13162306a36Sopenharmony_ci	 * supported, and reject a shrink operation that would cause a
13262306a36Sopenharmony_ci	 * filesystem to become unsupported.
13362306a36Sopenharmony_ci	 */
13462306a36Sopenharmony_ci	if (delta < 0 && nagcount < 2)
13562306a36Sopenharmony_ci		return -EINVAL;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	oagcount = mp->m_sb.sb_agcount;
13862306a36Sopenharmony_ci	/* allocate the new per-ag structures */
13962306a36Sopenharmony_ci	if (nagcount > oagcount) {
14062306a36Sopenharmony_ci		error = xfs_initialize_perag(mp, nagcount, nb, &nagimax);
14162306a36Sopenharmony_ci		if (error)
14262306a36Sopenharmony_ci			return error;
14362306a36Sopenharmony_ci	} else if (nagcount < oagcount) {
14462306a36Sopenharmony_ci		/* TODO: shrinking the entire AGs hasn't yet completed */
14562306a36Sopenharmony_ci		return -EINVAL;
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	if (delta > 0)
14962306a36Sopenharmony_ci		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
15062306a36Sopenharmony_ci				XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
15162306a36Sopenharmony_ci				&tp);
15262306a36Sopenharmony_ci	else
15362306a36Sopenharmony_ci		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, -delta, 0,
15462306a36Sopenharmony_ci				0, &tp);
15562306a36Sopenharmony_ci	if (error)
15662306a36Sopenharmony_ci		return error;
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	last_pag = xfs_perag_get(mp, oagcount - 1);
15962306a36Sopenharmony_ci	if (delta > 0) {
16062306a36Sopenharmony_ci		error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount,
16162306a36Sopenharmony_ci				delta, last_pag, &lastag_extended);
16262306a36Sopenharmony_ci	} else {
16362306a36Sopenharmony_ci		xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK,
16462306a36Sopenharmony_ci	"EXPERIMENTAL online shrink feature in use. Use at your own risk!");
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci		error = xfs_ag_shrink_space(last_pag, &tp, -delta);
16762306a36Sopenharmony_ci	}
16862306a36Sopenharmony_ci	xfs_perag_put(last_pag);
16962306a36Sopenharmony_ci	if (error)
17062306a36Sopenharmony_ci		goto out_trans_cancel;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	/*
17362306a36Sopenharmony_ci	 * Update changed superblock fields transactionally. These are not
17462306a36Sopenharmony_ci	 * seen by the rest of the world until the transaction commit applies
17562306a36Sopenharmony_ci	 * them atomically to the superblock.
17662306a36Sopenharmony_ci	 */
17762306a36Sopenharmony_ci	if (nagcount > oagcount)
17862306a36Sopenharmony_ci		xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
17962306a36Sopenharmony_ci	if (delta)
18062306a36Sopenharmony_ci		xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, delta);
18162306a36Sopenharmony_ci	if (id.nfree)
18262306a36Sopenharmony_ci		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	/*
18562306a36Sopenharmony_ci	 * Sync sb counters now to reflect the updated values. This is
18662306a36Sopenharmony_ci	 * particularly important for shrink because the write verifier
18762306a36Sopenharmony_ci	 * will fail if sb_fdblocks is ever larger than sb_dblocks.
18862306a36Sopenharmony_ci	 */
18962306a36Sopenharmony_ci	if (xfs_has_lazysbcount(mp))
19062306a36Sopenharmony_ci		xfs_log_sb(tp);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	xfs_trans_set_sync(tp);
19362306a36Sopenharmony_ci	error = xfs_trans_commit(tp);
19462306a36Sopenharmony_ci	if (error)
19562306a36Sopenharmony_ci		return error;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	/* New allocation groups fully initialized, so update mount struct */
19862306a36Sopenharmony_ci	if (nagimax)
19962306a36Sopenharmony_ci		mp->m_maxagi = nagimax;
20062306a36Sopenharmony_ci	xfs_set_low_space_thresholds(mp);
20162306a36Sopenharmony_ci	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	if (delta > 0) {
20462306a36Sopenharmony_ci		/*
20562306a36Sopenharmony_ci		 * If we expanded the last AG, free the per-AG reservation
20662306a36Sopenharmony_ci		 * so we can reinitialize it with the new size.
20762306a36Sopenharmony_ci		 */
20862306a36Sopenharmony_ci		if (lastag_extended) {
20962306a36Sopenharmony_ci			struct xfs_perag	*pag;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci			pag = xfs_perag_get(mp, id.agno);
21262306a36Sopenharmony_ci			error = xfs_ag_resv_free(pag);
21362306a36Sopenharmony_ci			xfs_perag_put(pag);
21462306a36Sopenharmony_ci			if (error)
21562306a36Sopenharmony_ci				return error;
21662306a36Sopenharmony_ci		}
21762306a36Sopenharmony_ci		/*
21862306a36Sopenharmony_ci		 * Reserve AG metadata blocks. ENOSPC here does not mean there
21962306a36Sopenharmony_ci		 * was a growfs failure, just that there still isn't space for
22062306a36Sopenharmony_ci		 * new user data after the grow has been run.
22162306a36Sopenharmony_ci		 */
22262306a36Sopenharmony_ci		error = xfs_fs_reserve_ag_blocks(mp);
22362306a36Sopenharmony_ci		if (error == -ENOSPC)
22462306a36Sopenharmony_ci			error = 0;
22562306a36Sopenharmony_ci	}
22662306a36Sopenharmony_ci	return error;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ciout_trans_cancel:
22962306a36Sopenharmony_ci	xfs_trans_cancel(tp);
23062306a36Sopenharmony_ci	return error;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic int
23462306a36Sopenharmony_cixfs_growfs_log_private(
23562306a36Sopenharmony_ci	struct xfs_mount	*mp,	/* mount point for filesystem */
23662306a36Sopenharmony_ci	struct xfs_growfs_log	*in)	/* growfs log input struct */
23762306a36Sopenharmony_ci{
23862306a36Sopenharmony_ci	xfs_extlen_t		nb;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	nb = in->newblocks;
24162306a36Sopenharmony_ci	if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
24262306a36Sopenharmony_ci		return -EINVAL;
24362306a36Sopenharmony_ci	if (nb == mp->m_sb.sb_logblocks &&
24462306a36Sopenharmony_ci	    in->isint == (mp->m_sb.sb_logstart != 0))
24562306a36Sopenharmony_ci		return -EINVAL;
24662306a36Sopenharmony_ci	/*
24762306a36Sopenharmony_ci	 * Moving the log is hard, need new interfaces to sync
24862306a36Sopenharmony_ci	 * the log first, hold off all activity while moving it.
24962306a36Sopenharmony_ci	 * Can have shorter or longer log in the same space,
25062306a36Sopenharmony_ci	 * or transform internal to external log or vice versa.
25162306a36Sopenharmony_ci	 */
25262306a36Sopenharmony_ci	return -ENOSYS;
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_cistatic int
25662306a36Sopenharmony_cixfs_growfs_imaxpct(
25762306a36Sopenharmony_ci	struct xfs_mount	*mp,
25862306a36Sopenharmony_ci	__u32			imaxpct)
25962306a36Sopenharmony_ci{
26062306a36Sopenharmony_ci	struct xfs_trans	*tp;
26162306a36Sopenharmony_ci	int			dpct;
26262306a36Sopenharmony_ci	int			error;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	if (imaxpct > 100)
26562306a36Sopenharmony_ci		return -EINVAL;
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
26862306a36Sopenharmony_ci			XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
26962306a36Sopenharmony_ci	if (error)
27062306a36Sopenharmony_ci		return error;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	dpct = imaxpct - mp->m_sb.sb_imax_pct;
27362306a36Sopenharmony_ci	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
27462306a36Sopenharmony_ci	xfs_trans_set_sync(tp);
27562306a36Sopenharmony_ci	return xfs_trans_commit(tp);
27662306a36Sopenharmony_ci}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci/*
27962306a36Sopenharmony_ci * protected versions of growfs function acquire and release locks on the mount
28062306a36Sopenharmony_ci * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
28162306a36Sopenharmony_ci * XFS_IOC_FSGROWFSRT
28262306a36Sopenharmony_ci */
28362306a36Sopenharmony_ciint
28462306a36Sopenharmony_cixfs_growfs_data(
28562306a36Sopenharmony_ci	struct xfs_mount	*mp,
28662306a36Sopenharmony_ci	struct xfs_growfs_data	*in)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	int			error = 0;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
29162306a36Sopenharmony_ci		return -EPERM;
29262306a36Sopenharmony_ci	if (!mutex_trylock(&mp->m_growlock))
29362306a36Sopenharmony_ci		return -EWOULDBLOCK;
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	/* update imaxpct separately to the physical grow of the filesystem */
29662306a36Sopenharmony_ci	if (in->imaxpct != mp->m_sb.sb_imax_pct) {
29762306a36Sopenharmony_ci		error = xfs_growfs_imaxpct(mp, in->imaxpct);
29862306a36Sopenharmony_ci		if (error)
29962306a36Sopenharmony_ci			goto out_error;
30062306a36Sopenharmony_ci	}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	if (in->newblocks != mp->m_sb.sb_dblocks) {
30362306a36Sopenharmony_ci		error = xfs_growfs_data_private(mp, in);
30462306a36Sopenharmony_ci		if (error)
30562306a36Sopenharmony_ci			goto out_error;
30662306a36Sopenharmony_ci	}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	/* Post growfs calculations needed to reflect new state in operations */
30962306a36Sopenharmony_ci	if (mp->m_sb.sb_imax_pct) {
31062306a36Sopenharmony_ci		uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
31162306a36Sopenharmony_ci		do_div(icount, 100);
31262306a36Sopenharmony_ci		M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount);
31362306a36Sopenharmony_ci	} else
31462306a36Sopenharmony_ci		M_IGEO(mp)->maxicount = 0;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	/* Update secondary superblocks now the physical grow has completed */
31762306a36Sopenharmony_ci	error = xfs_update_secondary_sbs(mp);
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ciout_error:
32062306a36Sopenharmony_ci	/*
32162306a36Sopenharmony_ci	 * Increment the generation unconditionally, the error could be from
32262306a36Sopenharmony_ci	 * updating the secondary superblocks, in which case the new size
32362306a36Sopenharmony_ci	 * is live already.
32462306a36Sopenharmony_ci	 */
32562306a36Sopenharmony_ci	mp->m_generation++;
32662306a36Sopenharmony_ci	mutex_unlock(&mp->m_growlock);
32762306a36Sopenharmony_ci	return error;
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ciint
33162306a36Sopenharmony_cixfs_growfs_log(
33262306a36Sopenharmony_ci	xfs_mount_t		*mp,
33362306a36Sopenharmony_ci	struct xfs_growfs_log	*in)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci	int error;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
33862306a36Sopenharmony_ci		return -EPERM;
33962306a36Sopenharmony_ci	if (!mutex_trylock(&mp->m_growlock))
34062306a36Sopenharmony_ci		return -EWOULDBLOCK;
34162306a36Sopenharmony_ci	error = xfs_growfs_log_private(mp, in);
34262306a36Sopenharmony_ci	mutex_unlock(&mp->m_growlock);
34362306a36Sopenharmony_ci	return error;
34462306a36Sopenharmony_ci}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci/*
34762306a36Sopenharmony_ci * exported through ioctl XFS_IOC_FSCOUNTS
34862306a36Sopenharmony_ci */
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_civoid
35162306a36Sopenharmony_cixfs_fs_counts(
35262306a36Sopenharmony_ci	xfs_mount_t		*mp,
35362306a36Sopenharmony_ci	xfs_fsop_counts_t	*cnt)
35462306a36Sopenharmony_ci{
35562306a36Sopenharmony_ci	cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
35662306a36Sopenharmony_ci	cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
35762306a36Sopenharmony_ci	cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
35862306a36Sopenharmony_ci						xfs_fdblocks_unavailable(mp);
35962306a36Sopenharmony_ci	cnt->freertx = percpu_counter_read_positive(&mp->m_frextents);
36062306a36Sopenharmony_ci}
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci/*
36362306a36Sopenharmony_ci * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
36462306a36Sopenharmony_ci *
36562306a36Sopenharmony_ci * xfs_reserve_blocks is called to set m_resblks
36662306a36Sopenharmony_ci * in the in-core mount table. The number of unused reserved blocks
36762306a36Sopenharmony_ci * is kept in m_resblks_avail.
36862306a36Sopenharmony_ci *
36962306a36Sopenharmony_ci * Reserve the requested number of blocks if available. Otherwise return
37062306a36Sopenharmony_ci * as many as possible to satisfy the request. The actual number
37162306a36Sopenharmony_ci * reserved are returned in outval
37262306a36Sopenharmony_ci *
37362306a36Sopenharmony_ci * A null inval pointer indicates that only the current reserved blocks
37462306a36Sopenharmony_ci * available  should  be returned no settings are changed.
37562306a36Sopenharmony_ci */
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ciint
37862306a36Sopenharmony_cixfs_reserve_blocks(
37962306a36Sopenharmony_ci	xfs_mount_t             *mp,
38062306a36Sopenharmony_ci	uint64_t              *inval,
38162306a36Sopenharmony_ci	xfs_fsop_resblks_t      *outval)
38262306a36Sopenharmony_ci{
38362306a36Sopenharmony_ci	int64_t			lcounter, delta;
38462306a36Sopenharmony_ci	int64_t			fdblks_delta = 0;
38562306a36Sopenharmony_ci	uint64_t		request;
38662306a36Sopenharmony_ci	int64_t			free;
38762306a36Sopenharmony_ci	int			error = 0;
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	/* If inval is null, report current values and return */
39062306a36Sopenharmony_ci	if (inval == (uint64_t *)NULL) {
39162306a36Sopenharmony_ci		if (!outval)
39262306a36Sopenharmony_ci			return -EINVAL;
39362306a36Sopenharmony_ci		outval->resblks = mp->m_resblks;
39462306a36Sopenharmony_ci		outval->resblks_avail = mp->m_resblks_avail;
39562306a36Sopenharmony_ci		return 0;
39662306a36Sopenharmony_ci	}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	request = *inval;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	/*
40162306a36Sopenharmony_ci	 * With per-cpu counters, this becomes an interesting problem. we need
40262306a36Sopenharmony_ci	 * to work out if we are freeing or allocation blocks first, then we can
40362306a36Sopenharmony_ci	 * do the modification as necessary.
40462306a36Sopenharmony_ci	 *
40562306a36Sopenharmony_ci	 * We do this under the m_sb_lock so that if we are near ENOSPC, we will
40662306a36Sopenharmony_ci	 * hold out any changes while we work out what to do. This means that
40762306a36Sopenharmony_ci	 * the amount of free space can change while we do this, so we need to
40862306a36Sopenharmony_ci	 * retry if we end up trying to reserve more space than is available.
40962306a36Sopenharmony_ci	 */
41062306a36Sopenharmony_ci	spin_lock(&mp->m_sb_lock);
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	/*
41362306a36Sopenharmony_ci	 * If our previous reservation was larger than the current value,
41462306a36Sopenharmony_ci	 * then move any unused blocks back to the free pool. Modify the resblks
41562306a36Sopenharmony_ci	 * counters directly since we shouldn't have any problems unreserving
41662306a36Sopenharmony_ci	 * space.
41762306a36Sopenharmony_ci	 */
41862306a36Sopenharmony_ci	if (mp->m_resblks > request) {
41962306a36Sopenharmony_ci		lcounter = mp->m_resblks_avail - request;
42062306a36Sopenharmony_ci		if (lcounter  > 0) {		/* release unused blocks */
42162306a36Sopenharmony_ci			fdblks_delta = lcounter;
42262306a36Sopenharmony_ci			mp->m_resblks_avail -= lcounter;
42362306a36Sopenharmony_ci		}
42462306a36Sopenharmony_ci		mp->m_resblks = request;
42562306a36Sopenharmony_ci		if (fdblks_delta) {
42662306a36Sopenharmony_ci			spin_unlock(&mp->m_sb_lock);
42762306a36Sopenharmony_ci			error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
42862306a36Sopenharmony_ci			spin_lock(&mp->m_sb_lock);
42962306a36Sopenharmony_ci		}
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci		goto out;
43262306a36Sopenharmony_ci	}
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	/*
43562306a36Sopenharmony_ci	 * If the request is larger than the current reservation, reserve the
43662306a36Sopenharmony_ci	 * blocks before we update the reserve counters. Sample m_fdblocks and
43762306a36Sopenharmony_ci	 * perform a partial reservation if the request exceeds free space.
43862306a36Sopenharmony_ci	 *
43962306a36Sopenharmony_ci	 * The code below estimates how many blocks it can request from
44062306a36Sopenharmony_ci	 * fdblocks to stash in the reserve pool.  This is a classic TOCTOU
44162306a36Sopenharmony_ci	 * race since fdblocks updates are not always coordinated via
44262306a36Sopenharmony_ci	 * m_sb_lock.  Set the reserve size even if there's not enough free
44362306a36Sopenharmony_ci	 * space to fill it because mod_fdblocks will refill an undersized
44462306a36Sopenharmony_ci	 * reserve when it can.
44562306a36Sopenharmony_ci	 */
44662306a36Sopenharmony_ci	free = percpu_counter_sum(&mp->m_fdblocks) -
44762306a36Sopenharmony_ci						xfs_fdblocks_unavailable(mp);
44862306a36Sopenharmony_ci	delta = request - mp->m_resblks;
44962306a36Sopenharmony_ci	mp->m_resblks = request;
45062306a36Sopenharmony_ci	if (delta > 0 && free > 0) {
45162306a36Sopenharmony_ci		/*
45262306a36Sopenharmony_ci		 * We'll either succeed in getting space from the free block
45362306a36Sopenharmony_ci		 * count or we'll get an ENOSPC.  Don't set the reserved flag
45462306a36Sopenharmony_ci		 * here - we don't want to reserve the extra reserve blocks
45562306a36Sopenharmony_ci		 * from the reserve.
45662306a36Sopenharmony_ci		 *
45762306a36Sopenharmony_ci		 * The desired reserve size can change after we drop the lock.
45862306a36Sopenharmony_ci		 * Use mod_fdblocks to put the space into the reserve or into
45962306a36Sopenharmony_ci		 * fdblocks as appropriate.
46062306a36Sopenharmony_ci		 */
46162306a36Sopenharmony_ci		fdblks_delta = min(free, delta);
46262306a36Sopenharmony_ci		spin_unlock(&mp->m_sb_lock);
46362306a36Sopenharmony_ci		error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
46462306a36Sopenharmony_ci		if (!error)
46562306a36Sopenharmony_ci			xfs_mod_fdblocks(mp, fdblks_delta, 0);
46662306a36Sopenharmony_ci		spin_lock(&mp->m_sb_lock);
46762306a36Sopenharmony_ci	}
46862306a36Sopenharmony_ciout:
46962306a36Sopenharmony_ci	if (outval) {
47062306a36Sopenharmony_ci		outval->resblks = mp->m_resblks;
47162306a36Sopenharmony_ci		outval->resblks_avail = mp->m_resblks_avail;
47262306a36Sopenharmony_ci	}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	spin_unlock(&mp->m_sb_lock);
47562306a36Sopenharmony_ci	return error;
47662306a36Sopenharmony_ci}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ciint
47962306a36Sopenharmony_cixfs_fs_goingdown(
48062306a36Sopenharmony_ci	xfs_mount_t	*mp,
48162306a36Sopenharmony_ci	uint32_t	inflags)
48262306a36Sopenharmony_ci{
48362306a36Sopenharmony_ci	switch (inflags) {
48462306a36Sopenharmony_ci	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
48562306a36Sopenharmony_ci		if (!freeze_bdev(mp->m_super->s_bdev)) {
48662306a36Sopenharmony_ci			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
48762306a36Sopenharmony_ci			thaw_bdev(mp->m_super->s_bdev);
48862306a36Sopenharmony_ci		}
48962306a36Sopenharmony_ci		break;
49062306a36Sopenharmony_ci	}
49162306a36Sopenharmony_ci	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
49262306a36Sopenharmony_ci		xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
49362306a36Sopenharmony_ci		break;
49462306a36Sopenharmony_ci	case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
49562306a36Sopenharmony_ci		xfs_force_shutdown(mp,
49662306a36Sopenharmony_ci				SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
49762306a36Sopenharmony_ci		break;
49862306a36Sopenharmony_ci	default:
49962306a36Sopenharmony_ci		return -EINVAL;
50062306a36Sopenharmony_ci	}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci	return 0;
50362306a36Sopenharmony_ci}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci/*
50662306a36Sopenharmony_ci * Force a shutdown of the filesystem instantly while keeping the filesystem
50762306a36Sopenharmony_ci * consistent. We don't do an unmount here; just shutdown the shop, make sure
50862306a36Sopenharmony_ci * that absolutely nothing persistent happens to this filesystem after this
50962306a36Sopenharmony_ci * point.
51062306a36Sopenharmony_ci *
51162306a36Sopenharmony_ci * The shutdown state change is atomic, resulting in the first and only the
51262306a36Sopenharmony_ci * first shutdown call processing the shutdown. This means we only shutdown the
51362306a36Sopenharmony_ci * log once as it requires, and we don't spam the logs when multiple concurrent
51462306a36Sopenharmony_ci * shutdowns race to set the shutdown flags.
51562306a36Sopenharmony_ci */
51662306a36Sopenharmony_civoid
51762306a36Sopenharmony_cixfs_do_force_shutdown(
51862306a36Sopenharmony_ci	struct xfs_mount *mp,
51962306a36Sopenharmony_ci	uint32_t	flags,
52062306a36Sopenharmony_ci	char		*fname,
52162306a36Sopenharmony_ci	int		lnnum)
52262306a36Sopenharmony_ci{
52362306a36Sopenharmony_ci	int		tag;
52462306a36Sopenharmony_ci	const char	*why;
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate)) {
52862306a36Sopenharmony_ci		xlog_shutdown_wait(mp->m_log);
52962306a36Sopenharmony_ci		return;
53062306a36Sopenharmony_ci	}
53162306a36Sopenharmony_ci	if (mp->m_sb_bp)
53262306a36Sopenharmony_ci		mp->m_sb_bp->b_flags |= XBF_DONE;
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	if (flags & SHUTDOWN_FORCE_UMOUNT)
53562306a36Sopenharmony_ci		xfs_alert(mp, "User initiated shutdown received.");
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	if (xlog_force_shutdown(mp->m_log, flags)) {
53862306a36Sopenharmony_ci		tag = XFS_PTAG_SHUTDOWN_LOGERROR;
53962306a36Sopenharmony_ci		why = "Log I/O Error";
54062306a36Sopenharmony_ci	} else if (flags & SHUTDOWN_CORRUPT_INCORE) {
54162306a36Sopenharmony_ci		tag = XFS_PTAG_SHUTDOWN_CORRUPT;
54262306a36Sopenharmony_ci		why = "Corruption of in-memory data";
54362306a36Sopenharmony_ci	} else if (flags & SHUTDOWN_CORRUPT_ONDISK) {
54462306a36Sopenharmony_ci		tag = XFS_PTAG_SHUTDOWN_CORRUPT;
54562306a36Sopenharmony_ci		why = "Corruption of on-disk metadata";
54662306a36Sopenharmony_ci	} else if (flags & SHUTDOWN_DEVICE_REMOVED) {
54762306a36Sopenharmony_ci		tag = XFS_PTAG_SHUTDOWN_IOERROR;
54862306a36Sopenharmony_ci		why = "Block device removal";
54962306a36Sopenharmony_ci	} else {
55062306a36Sopenharmony_ci		tag = XFS_PTAG_SHUTDOWN_IOERROR;
55162306a36Sopenharmony_ci		why = "Metadata I/O Error";
55262306a36Sopenharmony_ci	}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	trace_xfs_force_shutdown(mp, tag, flags, fname, lnnum);
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	xfs_alert_tag(mp, tag,
55762306a36Sopenharmony_ci"%s (0x%x) detected at %pS (%s:%d).  Shutting down filesystem.",
55862306a36Sopenharmony_ci			why, flags, __return_address, fname, lnnum);
55962306a36Sopenharmony_ci	xfs_alert(mp,
56062306a36Sopenharmony_ci		"Please unmount the filesystem and rectify the problem(s)");
56162306a36Sopenharmony_ci	if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
56262306a36Sopenharmony_ci		xfs_stack_trace();
56362306a36Sopenharmony_ci}
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci/*
56662306a36Sopenharmony_ci * Reserve free space for per-AG metadata.
56762306a36Sopenharmony_ci */
56862306a36Sopenharmony_ciint
56962306a36Sopenharmony_cixfs_fs_reserve_ag_blocks(
57062306a36Sopenharmony_ci	struct xfs_mount	*mp)
57162306a36Sopenharmony_ci{
57262306a36Sopenharmony_ci	xfs_agnumber_t		agno;
57362306a36Sopenharmony_ci	struct xfs_perag	*pag;
57462306a36Sopenharmony_ci	int			error = 0;
57562306a36Sopenharmony_ci	int			err2;
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	mp->m_finobt_nores = false;
57862306a36Sopenharmony_ci	for_each_perag(mp, agno, pag) {
57962306a36Sopenharmony_ci		err2 = xfs_ag_resv_init(pag, NULL);
58062306a36Sopenharmony_ci		if (err2 && !error)
58162306a36Sopenharmony_ci			error = err2;
58262306a36Sopenharmony_ci	}
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	if (error && error != -ENOSPC) {
58562306a36Sopenharmony_ci		xfs_warn(mp,
58662306a36Sopenharmony_ci	"Error %d reserving per-AG metadata reserve pool.", error);
58762306a36Sopenharmony_ci		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
58862306a36Sopenharmony_ci	}
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	return error;
59162306a36Sopenharmony_ci}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci/*
59462306a36Sopenharmony_ci * Free space reserved for per-AG metadata.
59562306a36Sopenharmony_ci */
59662306a36Sopenharmony_ciint
59762306a36Sopenharmony_cixfs_fs_unreserve_ag_blocks(
59862306a36Sopenharmony_ci	struct xfs_mount	*mp)
59962306a36Sopenharmony_ci{
60062306a36Sopenharmony_ci	xfs_agnumber_t		agno;
60162306a36Sopenharmony_ci	struct xfs_perag	*pag;
60262306a36Sopenharmony_ci	int			error = 0;
60362306a36Sopenharmony_ci	int			err2;
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	for_each_perag(mp, agno, pag) {
60662306a36Sopenharmony_ci		err2 = xfs_ag_resv_free(pag);
60762306a36Sopenharmony_ci		if (err2 && !error)
60862306a36Sopenharmony_ci			error = err2;
60962306a36Sopenharmony_ci	}
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	if (error)
61262306a36Sopenharmony_ci		xfs_warn(mp,
61362306a36Sopenharmony_ci	"Error %d freeing per-AG metadata reserve pool.", error);
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	return error;
61662306a36Sopenharmony_ci}
617