1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2022 Fujitsu.  All Rights Reserved.
4 */
5
6#include "xfs.h"
7#include "xfs_shared.h"
8#include "xfs_format.h"
9#include "xfs_log_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_alloc.h"
13#include "xfs_bit.h"
14#include "xfs_btree.h"
15#include "xfs_inode.h"
16#include "xfs_icache.h"
17#include "xfs_rmap.h"
18#include "xfs_rmap_btree.h"
19#include "xfs_rtalloc.h"
20#include "xfs_trans.h"
21#include "xfs_ag.h"
22
23#include <linux/mm.h>
24#include <linux/dax.h>
25
26struct xfs_failure_info {
27	xfs_agblock_t		startblock;
28	xfs_extlen_t		blockcount;
29	int			mf_flags;
30	bool			want_shutdown;
31};
32
33static pgoff_t
34xfs_failure_pgoff(
35	struct xfs_mount		*mp,
36	const struct xfs_rmap_irec	*rec,
37	const struct xfs_failure_info	*notify)
38{
39	loff_t				pos = XFS_FSB_TO_B(mp, rec->rm_offset);
40
41	if (notify->startblock > rec->rm_startblock)
42		pos += XFS_FSB_TO_B(mp,
43				notify->startblock - rec->rm_startblock);
44	return pos >> PAGE_SHIFT;
45}
46
47static unsigned long
48xfs_failure_pgcnt(
49	struct xfs_mount		*mp,
50	const struct xfs_rmap_irec	*rec,
51	const struct xfs_failure_info	*notify)
52{
53	xfs_agblock_t			end_rec;
54	xfs_agblock_t			end_notify;
55	xfs_agblock_t			start_cross;
56	xfs_agblock_t			end_cross;
57
58	start_cross = max(rec->rm_startblock, notify->startblock);
59
60	end_rec = rec->rm_startblock + rec->rm_blockcount;
61	end_notify = notify->startblock + notify->blockcount;
62	end_cross = min(end_rec, end_notify);
63
64	return XFS_FSB_TO_B(mp, end_cross - start_cross) >> PAGE_SHIFT;
65}
66
67static int
68xfs_dax_failure_fn(
69	struct xfs_btree_cur		*cur,
70	const struct xfs_rmap_irec	*rec,
71	void				*data)
72{
73	struct xfs_mount		*mp = cur->bc_mp;
74	struct xfs_inode		*ip;
75	struct xfs_failure_info		*notify = data;
76	int				error = 0;
77
78	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
79	    (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
80		notify->want_shutdown = true;
81		return 0;
82	}
83
84	/* Get files that incore, filter out others that are not in use. */
85	error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE,
86			 0, &ip);
87	/* Continue the rmap query if the inode isn't incore */
88	if (error == -ENODATA)
89		return 0;
90	if (error) {
91		notify->want_shutdown = true;
92		return 0;
93	}
94
95	error = mf_dax_kill_procs(VFS_I(ip)->i_mapping,
96				  xfs_failure_pgoff(mp, rec, notify),
97				  xfs_failure_pgcnt(mp, rec, notify),
98				  notify->mf_flags);
99	xfs_irele(ip);
100	return error;
101}
102
103static int
104xfs_dax_notify_ddev_failure(
105	struct xfs_mount	*mp,
106	xfs_daddr_t		daddr,
107	xfs_daddr_t		bblen,
108	int			mf_flags)
109{
110	struct xfs_failure_info	notify = { .mf_flags = mf_flags };
111	struct xfs_trans	*tp = NULL;
112	struct xfs_btree_cur	*cur = NULL;
113	struct xfs_buf		*agf_bp = NULL;
114	int			error = 0;
115	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, daddr);
116	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, fsbno);
117	xfs_fsblock_t		end_fsbno = XFS_DADDR_TO_FSB(mp,
118							     daddr + bblen - 1);
119	xfs_agnumber_t		end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
120
121	error = xfs_trans_alloc_empty(mp, &tp);
122	if (error)
123		return error;
124
125	for (; agno <= end_agno; agno++) {
126		struct xfs_rmap_irec	ri_low = { };
127		struct xfs_rmap_irec	ri_high;
128		struct xfs_agf		*agf;
129		struct xfs_perag	*pag;
130		xfs_agblock_t		range_agend;
131
132		pag = xfs_perag_get(mp, agno);
133		error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
134		if (error) {
135			xfs_perag_put(pag);
136			break;
137		}
138
139		cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
140
141		/*
142		 * Set the rmap range from ri_low to ri_high, which represents
143		 * a [start, end] where we looking for the files or metadata.
144		 */
145		memset(&ri_high, 0xFF, sizeof(ri_high));
146		ri_low.rm_startblock = XFS_FSB_TO_AGBNO(mp, fsbno);
147		if (agno == end_agno)
148			ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno);
149
150		agf = agf_bp->b_addr;
151		range_agend = min(be32_to_cpu(agf->agf_length) - 1,
152				ri_high.rm_startblock);
153		notify.startblock = ri_low.rm_startblock;
154		notify.blockcount = range_agend + 1 - ri_low.rm_startblock;
155
156		error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
157				xfs_dax_failure_fn, &notify);
158		xfs_btree_del_cursor(cur, error);
159		xfs_trans_brelse(tp, agf_bp);
160		xfs_perag_put(pag);
161		if (error)
162			break;
163
164		fsbno = XFS_AGB_TO_FSB(mp, agno + 1, 0);
165	}
166
167	xfs_trans_cancel(tp);
168	if (error || notify.want_shutdown) {
169		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
170		if (!error)
171			error = -EFSCORRUPTED;
172	}
173	return error;
174}
175
176static int
177xfs_dax_notify_failure(
178	struct dax_device	*dax_dev,
179	u64			offset,
180	u64			len,
181	int			mf_flags)
182{
183	struct xfs_mount	*mp = dax_holder(dax_dev);
184	u64			ddev_start;
185	u64			ddev_end;
186
187	if (!(mp->m_super->s_flags & SB_BORN)) {
188		xfs_warn(mp, "filesystem is not ready for notify_failure()!");
189		return -EIO;
190	}
191
192	if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
193		xfs_debug(mp,
194			 "notify_failure() not supported on realtime device!");
195		return -EOPNOTSUPP;
196	}
197
198	if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
199	    mp->m_logdev_targp != mp->m_ddev_targp) {
200		xfs_err(mp, "ondisk log corrupt, shutting down fs!");
201		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
202		return -EFSCORRUPTED;
203	}
204
205	if (!xfs_has_rmapbt(mp)) {
206		xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
207		return -EOPNOTSUPP;
208	}
209
210	ddev_start = mp->m_ddev_targp->bt_dax_part_off;
211	ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
212
213	/* Ignore the range out of filesystem area */
214	if (offset + len - 1 < ddev_start)
215		return -ENXIO;
216	if (offset > ddev_end)
217		return -ENXIO;
218
219	/* Calculate the real range when it touches the boundary */
220	if (offset > ddev_start)
221		offset -= ddev_start;
222	else {
223		len -= ddev_start - offset;
224		offset = 0;
225	}
226	if (offset + len - 1 > ddev_end)
227		len = ddev_end - offset + 1;
228
229	return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
230			mf_flags);
231}
232
233const struct dax_holder_operations xfs_dax_holder_operations = {
234	.notify_failure		= xfs_dax_notify_failure,
235};
236