xref: /kernel/linux/linux-5.10/fs/xfs/libxfs/xfs_bmap.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_dir2.h"
17#include "xfs_inode.h"
18#include "xfs_btree.h"
19#include "xfs_trans.h"
20#include "xfs_alloc.h"
21#include "xfs_bmap.h"
22#include "xfs_bmap_util.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_rtalloc.h"
25#include "xfs_errortag.h"
26#include "xfs_error.h"
27#include "xfs_quota.h"
28#include "xfs_trans_space.h"
29#include "xfs_buf_item.h"
30#include "xfs_trace.h"
31#include "xfs_attr_leaf.h"
32#include "xfs_filestream.h"
33#include "xfs_rmap.h"
34#include "xfs_ag_resv.h"
35#include "xfs_refcount.h"
36#include "xfs_icache.h"
37#include "xfs_iomap.h"
38
39
40kmem_zone_t		*xfs_bmap_free_item_zone;
41
42/*
43 * Miscellaneous helper functions
44 */
45
46/*
47 * Compute and fill in the value of the maximum depth of a bmap btree
48 * in this filesystem.  Done once, during mount.
49 */
50void
51xfs_bmap_compute_maxlevels(
52	xfs_mount_t	*mp,		/* file system mount structure */
53	int		whichfork)	/* data or attr fork */
54{
55	int		level;		/* btree level */
56	uint		maxblocks;	/* max blocks at this level */
57	uint		maxleafents;	/* max leaf entries possible */
58	int		maxrootrecs;	/* max records in root block */
59	int		minleafrecs;	/* min records in leaf block */
60	int		minnoderecs;	/* min records in node block */
61	int		sz;		/* root block size */
62
63	/*
64	 * The maximum number of extents in a file, hence the maximum number of
65	 * leaf entries, is controlled by the size of the on-disk extent count,
66	 * either a signed 32-bit number for the data fork, or a signed 16-bit
67	 * number for the attr fork.
68	 *
69	 * Note that we can no longer assume that if we are in ATTR1 that
70	 * the fork offset of all the inodes will be
71	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
72	 * with ATTR2 and then mounted back with ATTR1, keeping the
73	 * di_forkoff's fixed but probably at various positions. Therefore,
74	 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
75	 * of a minimum size available.
76	 */
77	if (whichfork == XFS_DATA_FORK) {
78		maxleafents = MAXEXTNUM;
79		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
80	} else {
81		maxleafents = MAXAEXTNUM;
82		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
83	}
84	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
85	minleafrecs = mp->m_bmap_dmnr[0];
86	minnoderecs = mp->m_bmap_dmnr[1];
87	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
88	for (level = 1; maxblocks > 1; level++) {
89		if (maxblocks <= maxrootrecs)
90			maxblocks = 1;
91		else
92			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
93	}
94	mp->m_bm_maxlevels[whichfork] = level;
95}
96
97STATIC int				/* error */
98xfs_bmbt_lookup_eq(
99	struct xfs_btree_cur	*cur,
100	struct xfs_bmbt_irec	*irec,
101	int			*stat)	/* success/failure */
102{
103	cur->bc_rec.b = *irec;
104	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
105}
106
107STATIC int				/* error */
108xfs_bmbt_lookup_first(
109	struct xfs_btree_cur	*cur,
110	int			*stat)	/* success/failure */
111{
112	cur->bc_rec.b.br_startoff = 0;
113	cur->bc_rec.b.br_startblock = 0;
114	cur->bc_rec.b.br_blockcount = 0;
115	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
116}
117
118/*
119 * Check if the inode needs to be converted to btree format.
120 */
121static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
122{
123	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
124
125	return whichfork != XFS_COW_FORK &&
126		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
127		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
128}
129
130/*
131 * Check if the inode should be converted to extent format.
132 */
133static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
134{
135	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
136
137	return whichfork != XFS_COW_FORK &&
138		ifp->if_format == XFS_DINODE_FMT_BTREE &&
139		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
140}
141
142/*
143 * Update the record referred to by cur to the value given by irec
144 * This either works (return 0) or gets an EFSCORRUPTED error.
145 */
146STATIC int
147xfs_bmbt_update(
148	struct xfs_btree_cur	*cur,
149	struct xfs_bmbt_irec	*irec)
150{
151	union xfs_btree_rec	rec;
152
153	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
154	return xfs_btree_update(cur, &rec);
155}
156
157/*
158 * Compute the worst-case number of indirect blocks that will be used
159 * for ip's delayed extent of length "len".
160 */
161STATIC xfs_filblks_t
162xfs_bmap_worst_indlen(
163	xfs_inode_t	*ip,		/* incore inode pointer */
164	xfs_filblks_t	len)		/* delayed extent length */
165{
166	int		level;		/* btree level number */
167	int		maxrecs;	/* maximum record count at this level */
168	xfs_mount_t	*mp;		/* mount structure */
169	xfs_filblks_t	rval;		/* return value */
170
171	mp = ip->i_mount;
172	maxrecs = mp->m_bmap_dmxr[0];
173	for (level = 0, rval = 0;
174	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
175	     level++) {
176		len += maxrecs - 1;
177		do_div(len, maxrecs);
178		rval += len;
179		if (len == 1)
180			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
181				level - 1;
182		if (level == 0)
183			maxrecs = mp->m_bmap_dmxr[1];
184	}
185	return rval;
186}
187
188/*
189 * Calculate the default attribute fork offset for newly created inodes.
190 */
191uint
192xfs_default_attroffset(
193	struct xfs_inode	*ip)
194{
195	struct xfs_mount	*mp = ip->i_mount;
196	uint			offset;
197
198	if (mp->m_sb.sb_inodesize == 256)
199		offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
200	else
201		offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
202
203	ASSERT(offset < XFS_LITINO(mp));
204	return offset;
205}
206
207/*
208 * Helper routine to reset inode di_forkoff field when switching
209 * attribute fork from local to extent format - we reset it where
210 * possible to make space available for inline data fork extents.
211 */
212STATIC void
213xfs_bmap_forkoff_reset(
214	xfs_inode_t	*ip,
215	int		whichfork)
216{
217	if (whichfork == XFS_ATTR_FORK &&
218	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
219	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
220		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
221
222		if (dfl_forkoff > ip->i_d.di_forkoff)
223			ip->i_d.di_forkoff = dfl_forkoff;
224	}
225}
226
227#ifdef DEBUG
228STATIC struct xfs_buf *
229xfs_bmap_get_bp(
230	struct xfs_btree_cur	*cur,
231	xfs_fsblock_t		bno)
232{
233	struct xfs_log_item	*lip;
234	int			i;
235
236	if (!cur)
237		return NULL;
238
239	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
240		if (!cur->bc_bufs[i])
241			break;
242		if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
243			return cur->bc_bufs[i];
244	}
245
246	/* Chase down all the log items to see if the bp is there */
247	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
248		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
249
250		if (bip->bli_item.li_type == XFS_LI_BUF &&
251		    XFS_BUF_ADDR(bip->bli_buf) == bno)
252			return bip->bli_buf;
253	}
254
255	return NULL;
256}
257
258STATIC void
259xfs_check_block(
260	struct xfs_btree_block	*block,
261	xfs_mount_t		*mp,
262	int			root,
263	short			sz)
264{
265	int			i, j, dmxr;
266	__be64			*pp, *thispa;	/* pointer to block address */
267	xfs_bmbt_key_t		*prevp, *keyp;
268
269	ASSERT(be16_to_cpu(block->bb_level) > 0);
270
271	prevp = NULL;
272	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
273		dmxr = mp->m_bmap_dmxr[0];
274		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
275
276		if (prevp) {
277			ASSERT(be64_to_cpu(prevp->br_startoff) <
278			       be64_to_cpu(keyp->br_startoff));
279		}
280		prevp = keyp;
281
282		/*
283		 * Compare the block numbers to see if there are dups.
284		 */
285		if (root)
286			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
287		else
288			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
289
290		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
291			if (root)
292				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
293			else
294				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
295			if (*thispa == *pp) {
296				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
297					__func__, j, i,
298					(unsigned long long)be64_to_cpu(*thispa));
299				xfs_err(mp, "%s: ptrs are equal in node\n",
300					__func__);
301				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
302			}
303		}
304	}
305}
306
307/*
308 * Check that the extents for the inode ip are in the right order in all
309 * btree leaves. THis becomes prohibitively expensive for large extent count
310 * files, so don't bother with inodes that have more than 10,000 extents in
311 * them. The btree record ordering checks will still be done, so for such large
312 * bmapbt constructs that is going to catch most corruptions.
313 */
314STATIC void
315xfs_bmap_check_leaf_extents(
316	xfs_btree_cur_t		*cur,	/* btree cursor or null */
317	xfs_inode_t		*ip,		/* incore inode pointer */
318	int			whichfork)	/* data or attr fork */
319{
320	struct xfs_mount	*mp = ip->i_mount;
321	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
322	struct xfs_btree_block	*block;	/* current btree block */
323	xfs_fsblock_t		bno;	/* block # of "block" */
324	xfs_buf_t		*bp;	/* buffer for "block" */
325	int			error;	/* error return value */
326	xfs_extnum_t		i=0, j;	/* index into the extents list */
327	int			level;	/* btree level, for checking */
328	__be64			*pp;	/* pointer to block address */
329	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
330	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
331	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
332	int			bp_release = 0;
333
334	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
335		return;
336
337	/* skip large extent count inodes */
338	if (ip->i_df.if_nextents > 10000)
339		return;
340
341	bno = NULLFSBLOCK;
342	block = ifp->if_broot;
343	/*
344	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
345	 */
346	level = be16_to_cpu(block->bb_level);
347	ASSERT(level > 0);
348	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
349	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
350	bno = be64_to_cpu(*pp);
351
352	ASSERT(bno != NULLFSBLOCK);
353	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
354	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
355
356	/*
357	 * Go down the tree until leaf level is reached, following the first
358	 * pointer (leftmost) at each level.
359	 */
360	while (level-- > 0) {
361		/* See if buf is in cur first */
362		bp_release = 0;
363		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
364		if (!bp) {
365			bp_release = 1;
366			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
367						XFS_BMAP_BTREE_REF,
368						&xfs_bmbt_buf_ops);
369			if (error)
370				goto error_norelse;
371		}
372		block = XFS_BUF_TO_BLOCK(bp);
373		if (level == 0)
374			break;
375
376		/*
377		 * Check this block for basic sanity (increasing keys and
378		 * no duplicate blocks).
379		 */
380
381		xfs_check_block(block, mp, 0, 0);
382		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
383		bno = be64_to_cpu(*pp);
384		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
385			error = -EFSCORRUPTED;
386			goto error0;
387		}
388		if (bp_release) {
389			bp_release = 0;
390			xfs_trans_brelse(NULL, bp);
391		}
392	}
393
394	/*
395	 * Here with bp and block set to the leftmost leaf node in the tree.
396	 */
397	i = 0;
398
399	/*
400	 * Loop over all leaf nodes checking that all extents are in the right order.
401	 */
402	for (;;) {
403		xfs_fsblock_t	nextbno;
404		xfs_extnum_t	num_recs;
405
406
407		num_recs = xfs_btree_get_numrecs(block);
408
409		/*
410		 * Read-ahead the next leaf block, if any.
411		 */
412
413		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
414
415		/*
416		 * Check all the extents to make sure they are OK.
417		 * If we had a previous block, the last entry should
418		 * conform with the first entry in this one.
419		 */
420
421		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
422		if (i) {
423			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
424			       xfs_bmbt_disk_get_blockcount(&last) <=
425			       xfs_bmbt_disk_get_startoff(ep));
426		}
427		for (j = 1; j < num_recs; j++) {
428			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
429			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
430			       xfs_bmbt_disk_get_blockcount(ep) <=
431			       xfs_bmbt_disk_get_startoff(nextp));
432			ep = nextp;
433		}
434
435		last = *ep;
436		i += num_recs;
437		if (bp_release) {
438			bp_release = 0;
439			xfs_trans_brelse(NULL, bp);
440		}
441		bno = nextbno;
442		/*
443		 * If we've reached the end, stop.
444		 */
445		if (bno == NULLFSBLOCK)
446			break;
447
448		bp_release = 0;
449		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
450		if (!bp) {
451			bp_release = 1;
452			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
453						XFS_BMAP_BTREE_REF,
454						&xfs_bmbt_buf_ops);
455			if (error)
456				goto error_norelse;
457		}
458		block = XFS_BUF_TO_BLOCK(bp);
459	}
460
461	return;
462
463error0:
464	xfs_warn(mp, "%s: at error0", __func__);
465	if (bp_release)
466		xfs_trans_brelse(NULL, bp);
467error_norelse:
468	xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
469		__func__, i);
470	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
471	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
472	return;
473}
474
475/*
476 * Validate that the bmbt_irecs being returned from bmapi are valid
477 * given the caller's original parameters.  Specifically check the
478 * ranges of the returned irecs to ensure that they only extend beyond
479 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
480 */
481STATIC void
482xfs_bmap_validate_ret(
483	xfs_fileoff_t		bno,
484	xfs_filblks_t		len,
485	int			flags,
486	xfs_bmbt_irec_t		*mval,
487	int			nmap,
488	int			ret_nmap)
489{
490	int			i;		/* index to map values */
491
492	ASSERT(ret_nmap <= nmap);
493
494	for (i = 0; i < ret_nmap; i++) {
495		ASSERT(mval[i].br_blockcount > 0);
496		if (!(flags & XFS_BMAPI_ENTIRE)) {
497			ASSERT(mval[i].br_startoff >= bno);
498			ASSERT(mval[i].br_blockcount <= len);
499			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
500			       bno + len);
501		} else {
502			ASSERT(mval[i].br_startoff < bno + len);
503			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
504			       bno);
505		}
506		ASSERT(i == 0 ||
507		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
508		       mval[i].br_startoff);
509		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
510		       mval[i].br_startblock != HOLESTARTBLOCK);
511		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
512		       mval[i].br_state == XFS_EXT_UNWRITTEN);
513	}
514}
515
516#else
517#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
518#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
519#endif /* DEBUG */
520
521/*
522 * bmap free list manipulation functions
523 */
524
525/*
526 * Add the extent to the list of extents to be free at transaction end.
527 * The list is maintained sorted (by block number).
528 */
529void
530__xfs_bmap_add_free(
531	struct xfs_trans		*tp,
532	xfs_fsblock_t			bno,
533	xfs_filblks_t			len,
534	const struct xfs_owner_info	*oinfo,
535	bool				skip_discard)
536{
537	struct xfs_extent_free_item	*new;		/* new element */
538#ifdef DEBUG
539	struct xfs_mount		*mp = tp->t_mountp;
540	xfs_agnumber_t			agno;
541	xfs_agblock_t			agbno;
542
543	ASSERT(bno != NULLFSBLOCK);
544	ASSERT(len > 0);
545	ASSERT(len <= MAXEXTLEN);
546	ASSERT(!isnullstartblock(bno));
547	agno = XFS_FSB_TO_AGNO(mp, bno);
548	agbno = XFS_FSB_TO_AGBNO(mp, bno);
549	ASSERT(agno < mp->m_sb.sb_agcount);
550	ASSERT(agbno < mp->m_sb.sb_agblocks);
551	ASSERT(len < mp->m_sb.sb_agblocks);
552	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
553#endif
554	ASSERT(xfs_bmap_free_item_zone != NULL);
555
556	new = kmem_cache_alloc(xfs_bmap_free_item_zone,
557			       GFP_KERNEL | __GFP_NOFAIL);
558	new->xefi_startblock = bno;
559	new->xefi_blockcount = (xfs_extlen_t)len;
560	if (oinfo)
561		new->xefi_oinfo = *oinfo;
562	else
563		new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
564	new->xefi_skip_discard = skip_discard;
565	trace_xfs_bmap_free_defer(tp->t_mountp,
566			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
567			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
568	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
569}
570
571/*
572 * Inode fork format manipulation functions
573 */
574
575/*
576 * Convert the inode format to extent format if it currently is in btree format,
577 * but the extent list is small enough that it fits into the extent format.
578 *
579 * Since the extents are already in-core, all we have to do is give up the space
580 * for the btree root and pitch the leaf block.
581 */
582STATIC int				/* error */
583xfs_bmap_btree_to_extents(
584	struct xfs_trans	*tp,	/* transaction pointer */
585	struct xfs_inode	*ip,	/* incore inode pointer */
586	struct xfs_btree_cur	*cur,	/* btree cursor */
587	int			*logflagsp, /* inode logging flags */
588	int			whichfork)  /* data or attr fork */
589{
590	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
591	struct xfs_mount	*mp = ip->i_mount;
592	struct xfs_btree_block	*rblock = ifp->if_broot;
593	struct xfs_btree_block	*cblock;/* child btree block */
594	xfs_fsblock_t		cbno;	/* child block number */
595	xfs_buf_t		*cbp;	/* child block's buffer */
596	int			error;	/* error return value */
597	__be64			*pp;	/* ptr to block address */
598	struct xfs_owner_info	oinfo;
599
600	/* check if we actually need the extent format first: */
601	if (!xfs_bmap_wants_extents(ip, whichfork))
602		return 0;
603
604	ASSERT(cur);
605	ASSERT(whichfork != XFS_COW_FORK);
606	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
607	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
608	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
609	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
610	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
611
612	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
613	cbno = be64_to_cpu(*pp);
614#ifdef DEBUG
615	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
616		return -EFSCORRUPTED;
617#endif
618	error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
619				&xfs_bmbt_buf_ops);
620	if (error)
621		return error;
622	cblock = XFS_BUF_TO_BLOCK(cbp);
623	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
624		return error;
625	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
626	xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
627	ip->i_d.di_nblocks--;
628	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
629	xfs_trans_binval(tp, cbp);
630	if (cur->bc_bufs[0] == cbp)
631		cur->bc_bufs[0] = NULL;
632	xfs_iroot_realloc(ip, -1, whichfork);
633	ASSERT(ifp->if_broot == NULL);
634	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
635	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
636	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
637	return 0;
638}
639
640/*
641 * Convert an extents-format file into a btree-format file.
642 * The new file will have a root block (in the inode) and a single child block.
643 */
644STATIC int					/* error */
645xfs_bmap_extents_to_btree(
646	struct xfs_trans	*tp,		/* transaction pointer */
647	struct xfs_inode	*ip,		/* incore inode pointer */
648	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
649	int			wasdel,		/* converting a delayed alloc */
650	int			*logflagsp,	/* inode logging flags */
651	int			whichfork)	/* data or attr fork */
652{
653	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
654	struct xfs_buf		*abp;		/* buffer for ablock */
655	struct xfs_alloc_arg	args;		/* allocation arguments */
656	struct xfs_bmbt_rec	*arp;		/* child record pointer */
657	struct xfs_btree_block	*block;		/* btree root block */
658	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
659	int			error;		/* error return value */
660	struct xfs_ifork	*ifp;		/* inode fork pointer */
661	struct xfs_bmbt_key	*kp;		/* root block key pointer */
662	struct xfs_mount	*mp;		/* mount structure */
663	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
664	struct xfs_iext_cursor	icur;
665	struct xfs_bmbt_irec	rec;
666	xfs_extnum_t		cnt = 0;
667
668	mp = ip->i_mount;
669	ASSERT(whichfork != XFS_COW_FORK);
670	ifp = XFS_IFORK_PTR(ip, whichfork);
671	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
672
673	/*
674	 * Make space in the inode incore. This needs to be undone if we fail
675	 * to expand the root.
676	 */
677	xfs_iroot_realloc(ip, 1, whichfork);
678	ifp->if_flags |= XFS_IFBROOT;
679
680	/*
681	 * Fill in the root.
682	 */
683	block = ifp->if_broot;
684	xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
685				 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
686				 XFS_BTREE_LONG_PTRS);
687	/*
688	 * Need a cursor.  Can't allocate until bb_level is filled in.
689	 */
690	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
691	cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
692	/*
693	 * Convert to a btree with two levels, one record in root.
694	 */
695	ifp->if_format = XFS_DINODE_FMT_BTREE;
696	memset(&args, 0, sizeof(args));
697	args.tp = tp;
698	args.mp = mp;
699	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
700	if (tp->t_firstblock == NULLFSBLOCK) {
701		args.type = XFS_ALLOCTYPE_START_BNO;
702		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
703	} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
704		args.type = XFS_ALLOCTYPE_START_BNO;
705		args.fsbno = tp->t_firstblock;
706	} else {
707		args.type = XFS_ALLOCTYPE_NEAR_BNO;
708		args.fsbno = tp->t_firstblock;
709	}
710	args.minlen = args.maxlen = args.prod = 1;
711	args.wasdel = wasdel;
712	*logflagsp = 0;
713	error = xfs_alloc_vextent(&args);
714	if (error)
715		goto out_root_realloc;
716
717	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
718		error = -ENOSPC;
719		goto out_root_realloc;
720	}
721
722	/*
723	 * Allocation can't fail, the space was reserved.
724	 */
725	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
726	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
727	tp->t_firstblock = args.fsbno;
728	cur->bc_ino.allocated++;
729	ip->i_d.di_nblocks++;
730	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
731	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
732			XFS_FSB_TO_DADDR(mp, args.fsbno),
733			mp->m_bsize, 0, &abp);
734	if (error)
735		goto out_unreserve_dquot;
736
737	/*
738	 * Fill in the child block.
739	 */
740	abp->b_ops = &xfs_bmbt_buf_ops;
741	ablock = XFS_BUF_TO_BLOCK(abp);
742	xfs_btree_init_block_int(mp, ablock, abp->b_bn,
743				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
744				XFS_BTREE_LONG_PTRS);
745
746	for_each_xfs_iext(ifp, &icur, &rec) {
747		if (isnullstartblock(rec.br_startblock))
748			continue;
749		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
750		xfs_bmbt_disk_set_all(arp, &rec);
751		cnt++;
752	}
753	ASSERT(cnt == ifp->if_nextents);
754	xfs_btree_set_numrecs(ablock, cnt);
755
756	/*
757	 * Fill in the root key and pointer.
758	 */
759	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
760	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
761	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
762	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
763						be16_to_cpu(block->bb_level)));
764	*pp = cpu_to_be64(args.fsbno);
765
766	/*
767	 * Do all this logging at the end so that
768	 * the root is at the right level.
769	 */
770	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
771	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
772	ASSERT(*curp == NULL);
773	*curp = cur;
774	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
775	return 0;
776
777out_unreserve_dquot:
778	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
779out_root_realloc:
780	xfs_iroot_realloc(ip, -1, whichfork);
781	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
782	ASSERT(ifp->if_broot == NULL);
783	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
784
785	return error;
786}
787
788/*
789 * Convert a local file to an extents file.
790 * This code is out of bounds for data forks of regular files,
791 * since the file data needs to get logged so things will stay consistent.
792 * (The bmap-level manipulations are ok, though).
793 */
794void
795xfs_bmap_local_to_extents_empty(
796	struct xfs_trans	*tp,
797	struct xfs_inode	*ip,
798	int			whichfork)
799{
800	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
801
802	ASSERT(whichfork != XFS_COW_FORK);
803	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
804	ASSERT(ifp->if_bytes == 0);
805	ASSERT(ifp->if_nextents == 0);
806
807	xfs_bmap_forkoff_reset(ip, whichfork);
808	ifp->if_flags &= ~XFS_IFINLINE;
809	ifp->if_flags |= XFS_IFEXTENTS;
810	ifp->if_u1.if_root = NULL;
811	ifp->if_height = 0;
812	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
813	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
814}
815
816
817STATIC int				/* error */
818xfs_bmap_local_to_extents(
819	xfs_trans_t	*tp,		/* transaction pointer */
820	xfs_inode_t	*ip,		/* incore inode pointer */
821	xfs_extlen_t	total,		/* total blocks needed by transaction */
822	int		*logflagsp,	/* inode logging flags */
823	int		whichfork,
824	void		(*init_fn)(struct xfs_trans *tp,
825				   struct xfs_buf *bp,
826				   struct xfs_inode *ip,
827				   struct xfs_ifork *ifp))
828{
829	int		error = 0;
830	int		flags;		/* logging flags returned */
831	struct xfs_ifork *ifp;		/* inode fork pointer */
832	xfs_alloc_arg_t	args;		/* allocation arguments */
833	xfs_buf_t	*bp;		/* buffer for extent block */
834	struct xfs_bmbt_irec rec;
835	struct xfs_iext_cursor icur;
836
837	/*
838	 * We don't want to deal with the case of keeping inode data inline yet.
839	 * So sending the data fork of a regular inode is invalid.
840	 */
841	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
842	ifp = XFS_IFORK_PTR(ip, whichfork);
843	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
844
845	if (!ifp->if_bytes) {
846		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
847		flags = XFS_ILOG_CORE;
848		goto done;
849	}
850
851	flags = 0;
852	error = 0;
853	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
854	memset(&args, 0, sizeof(args));
855	args.tp = tp;
856	args.mp = ip->i_mount;
857	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
858	/*
859	 * Allocate a block.  We know we need only one, since the
860	 * file currently fits in an inode.
861	 */
862	if (tp->t_firstblock == NULLFSBLOCK) {
863		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
864		args.type = XFS_ALLOCTYPE_START_BNO;
865	} else {
866		args.fsbno = tp->t_firstblock;
867		args.type = XFS_ALLOCTYPE_NEAR_BNO;
868	}
869	args.total = total;
870	args.minlen = args.maxlen = args.prod = 1;
871	error = xfs_alloc_vextent(&args);
872	if (error)
873		goto done;
874
875	/* Can't fail, the space was reserved. */
876	ASSERT(args.fsbno != NULLFSBLOCK);
877	ASSERT(args.len == 1);
878	tp->t_firstblock = args.fsbno;
879	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
880			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
881			args.mp->m_bsize, 0, &bp);
882	if (error)
883		goto done;
884
885	/*
886	 * Initialize the block, copy the data and log the remote buffer.
887	 *
888	 * The callout is responsible for logging because the remote format
889	 * might differ from the local format and thus we don't know how much to
890	 * log here. Note that init_fn must also set the buffer log item type
891	 * correctly.
892	 */
893	init_fn(tp, bp, ip, ifp);
894
895	/* account for the change in fork size */
896	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
897	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
898	flags |= XFS_ILOG_CORE;
899
900	ifp->if_u1.if_root = NULL;
901	ifp->if_height = 0;
902
903	rec.br_startoff = 0;
904	rec.br_startblock = args.fsbno;
905	rec.br_blockcount = 1;
906	rec.br_state = XFS_EXT_NORM;
907	xfs_iext_first(ifp, &icur);
908	xfs_iext_insert(ip, &icur, &rec, 0);
909
910	ifp->if_nextents = 1;
911	ip->i_d.di_nblocks = 1;
912	xfs_trans_mod_dquot_byino(tp, ip,
913		XFS_TRANS_DQ_BCOUNT, 1L);
914	flags |= xfs_ilog_fext(whichfork);
915
916done:
917	*logflagsp = flags;
918	return error;
919}
920
921/*
922 * Called from xfs_bmap_add_attrfork to handle btree format files.
923 */
924STATIC int					/* error */
925xfs_bmap_add_attrfork_btree(
926	xfs_trans_t		*tp,		/* transaction pointer */
927	xfs_inode_t		*ip,		/* incore inode pointer */
928	int			*flags)		/* inode logging flags */
929{
930	xfs_btree_cur_t		*cur;		/* btree cursor */
931	int			error;		/* error return value */
932	xfs_mount_t		*mp;		/* file system mount struct */
933	int			stat;		/* newroot status */
934
935	mp = ip->i_mount;
936	if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
937		*flags |= XFS_ILOG_DBROOT;
938	else {
939		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
940		error = xfs_bmbt_lookup_first(cur, &stat);
941		if (error)
942			goto error0;
943		/* must be at least one entry */
944		if (XFS_IS_CORRUPT(mp, stat != 1)) {
945			error = -EFSCORRUPTED;
946			goto error0;
947		}
948		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
949			goto error0;
950		if (stat == 0) {
951			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
952			return -ENOSPC;
953		}
954		cur->bc_ino.allocated = 0;
955		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
956	}
957	return 0;
958error0:
959	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
960	return error;
961}
962
963/*
964 * Called from xfs_bmap_add_attrfork to handle extents format files.
965 */
966STATIC int					/* error */
967xfs_bmap_add_attrfork_extents(
968	struct xfs_trans	*tp,		/* transaction pointer */
969	struct xfs_inode	*ip,		/* incore inode pointer */
970	int			*flags)		/* inode logging flags */
971{
972	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
973	int			error;		/* error return value */
974
975	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
976	    XFS_IFORK_DSIZE(ip))
977		return 0;
978	cur = NULL;
979	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
980					  XFS_DATA_FORK);
981	if (cur) {
982		cur->bc_ino.allocated = 0;
983		xfs_btree_del_cursor(cur, error);
984	}
985	return error;
986}
987
988/*
989 * Called from xfs_bmap_add_attrfork to handle local format files. Each
990 * different data fork content type needs a different callout to do the
991 * conversion. Some are basic and only require special block initialisation
992 * callouts for the data formating, others (directories) are so specialised they
993 * handle everything themselves.
994 *
995 * XXX (dgc): investigate whether directory conversion can use the generic
996 * formatting callout. It should be possible - it's just a very complex
997 * formatter.
998 */
999STATIC int					/* error */
1000xfs_bmap_add_attrfork_local(
1001	struct xfs_trans	*tp,		/* transaction pointer */
1002	struct xfs_inode	*ip,		/* incore inode pointer */
1003	int			*flags)		/* inode logging flags */
1004{
1005	struct xfs_da_args	dargs;		/* args for dir/attr code */
1006
1007	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1008		return 0;
1009
1010	if (S_ISDIR(VFS_I(ip)->i_mode)) {
1011		memset(&dargs, 0, sizeof(dargs));
1012		dargs.geo = ip->i_mount->m_dir_geo;
1013		dargs.dp = ip;
1014		dargs.total = dargs.geo->fsbcount;
1015		dargs.whichfork = XFS_DATA_FORK;
1016		dargs.trans = tp;
1017		return xfs_dir2_sf_to_block(&dargs);
1018	}
1019
1020	if (S_ISLNK(VFS_I(ip)->i_mode))
1021		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1022						 XFS_DATA_FORK,
1023						 xfs_symlink_local_to_remote);
1024
1025	/* should only be called for types that support local format data */
1026	ASSERT(0);
1027	return -EFSCORRUPTED;
1028}
1029
1030/* Set an inode attr fork off based on the format */
1031int
1032xfs_bmap_set_attrforkoff(
1033	struct xfs_inode	*ip,
1034	int			size,
1035	int			*version)
1036{
1037	switch (ip->i_df.if_format) {
1038	case XFS_DINODE_FMT_DEV:
1039		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1040		break;
1041	case XFS_DINODE_FMT_LOCAL:
1042	case XFS_DINODE_FMT_EXTENTS:
1043	case XFS_DINODE_FMT_BTREE:
1044		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1045		if (!ip->i_d.di_forkoff)
1046			ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1047		else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1048			*version = 2;
1049		break;
1050	default:
1051		ASSERT(0);
1052		return -EINVAL;
1053	}
1054
1055	return 0;
1056}
1057
1058/*
1059 * Convert inode from non-attributed to attributed.
1060 * Must not be in a transaction, ip must not be locked.
1061 */
1062int						/* error code */
1063xfs_bmap_add_attrfork(
1064	xfs_inode_t		*ip,		/* incore inode pointer */
1065	int			size,		/* space new attribute needs */
1066	int			rsvd)		/* xact may use reserved blks */
1067{
1068	xfs_mount_t		*mp;		/* mount structure */
1069	xfs_trans_t		*tp;		/* transaction pointer */
1070	int			blks;		/* space reservation */
1071	int			version = 1;	/* superblock attr version */
1072	int			logflags;	/* logging flags */
1073	int			error;		/* error return value */
1074
1075	ASSERT(XFS_IFORK_Q(ip) == 0);
1076
1077	mp = ip->i_mount;
1078	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1079
1080	blks = XFS_ADDAFORK_SPACE_RES(mp);
1081
1082	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1083			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1084	if (error)
1085		return error;
1086
1087	xfs_ilock(ip, XFS_ILOCK_EXCL);
1088	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1089			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1090			XFS_QMOPT_RES_REGBLKS);
1091	if (error)
1092		goto trans_cancel;
1093	if (XFS_IFORK_Q(ip))
1094		goto trans_cancel;
1095
1096	xfs_trans_ijoin(tp, ip, 0);
1097	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1098	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1099	if (error)
1100		goto trans_cancel;
1101	ASSERT(ip->i_afp == NULL);
1102
1103	ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone,
1104				      GFP_KERNEL | __GFP_NOFAIL);
1105
1106	ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
1107	ip->i_afp->if_flags = XFS_IFEXTENTS;
1108	logflags = 0;
1109	switch (ip->i_df.if_format) {
1110	case XFS_DINODE_FMT_LOCAL:
1111		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1112		break;
1113	case XFS_DINODE_FMT_EXTENTS:
1114		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1115		break;
1116	case XFS_DINODE_FMT_BTREE:
1117		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1118		break;
1119	default:
1120		error = 0;
1121		break;
1122	}
1123	if (logflags)
1124		xfs_trans_log_inode(tp, ip, logflags);
1125	if (error)
1126		goto trans_cancel;
1127	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1128	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1129		bool log_sb = false;
1130
1131		spin_lock(&mp->m_sb_lock);
1132		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1133			xfs_sb_version_addattr(&mp->m_sb);
1134			log_sb = true;
1135		}
1136		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1137			xfs_sb_version_addattr2(&mp->m_sb);
1138			log_sb = true;
1139		}
1140		spin_unlock(&mp->m_sb_lock);
1141		if (log_sb)
1142			xfs_log_sb(tp);
1143	}
1144
1145	error = xfs_trans_commit(tp);
1146	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147	return error;
1148
1149trans_cancel:
1150	xfs_trans_cancel(tp);
1151	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1152	return error;
1153}
1154
1155/*
1156 * Internal and external extent tree search functions.
1157 */
1158
1159struct xfs_iread_state {
1160	struct xfs_iext_cursor	icur;
1161	xfs_extnum_t		loaded;
1162};
1163
1164/* Stuff every bmbt record from this block into the incore extent map. */
1165static int
1166xfs_iread_bmbt_block(
1167	struct xfs_btree_cur	*cur,
1168	int			level,
1169	void			*priv)
1170{
1171	struct xfs_iread_state	*ir = priv;
1172	struct xfs_mount	*mp = cur->bc_mp;
1173	struct xfs_inode	*ip = cur->bc_ino.ip;
1174	struct xfs_btree_block	*block;
1175	struct xfs_buf		*bp;
1176	struct xfs_bmbt_rec	*frp;
1177	xfs_extnum_t		num_recs;
1178	xfs_extnum_t		j;
1179	int			whichfork = cur->bc_ino.whichfork;
1180	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1181
1182	block = xfs_btree_get_block(cur, level, &bp);
1183
1184	/* Abort if we find more records than nextents. */
1185	num_recs = xfs_btree_get_numrecs(block);
1186	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1187		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1188				(unsigned long long)ip->i_ino);
1189		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1190				sizeof(*block), __this_address);
1191		return -EFSCORRUPTED;
1192	}
1193
1194	/* Copy records into the incore cache. */
1195	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1196	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1197		struct xfs_bmbt_irec	new;
1198		xfs_failaddr_t		fa;
1199
1200		xfs_bmbt_disk_get_all(frp, &new);
1201		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1202		if (fa) {
1203			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1204					"xfs_iread_extents(2)", frp,
1205					sizeof(*frp), fa);
1206			return -EFSCORRUPTED;
1207		}
1208		xfs_iext_insert(ip, &ir->icur, &new,
1209				xfs_bmap_fork_to_state(whichfork));
1210		trace_xfs_read_extent(ip, &ir->icur,
1211				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1212		xfs_iext_next(ifp, &ir->icur);
1213	}
1214
1215	return 0;
1216}
1217
1218/*
1219 * Read in extents from a btree-format inode.
1220 */
1221int
1222xfs_iread_extents(
1223	struct xfs_trans	*tp,
1224	struct xfs_inode	*ip,
1225	int			whichfork)
1226{
1227	struct xfs_iread_state	ir;
1228	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1229	struct xfs_mount	*mp = ip->i_mount;
1230	struct xfs_btree_cur	*cur;
1231	int			error;
1232
1233	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1234
1235	if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) {
1236		error = -EFSCORRUPTED;
1237		goto out;
1238	}
1239
1240	ir.loaded = 0;
1241	xfs_iext_first(ifp, &ir.icur);
1242	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1243	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1244			XFS_BTREE_VISIT_RECORDS, &ir);
1245	xfs_btree_del_cursor(cur, error);
1246	if (error)
1247		goto out;
1248
1249	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1250		error = -EFSCORRUPTED;
1251		goto out;
1252	}
1253	ASSERT(ir.loaded == xfs_iext_count(ifp));
1254
1255	ifp->if_flags |= XFS_IFEXTENTS;
1256	return 0;
1257out:
1258	xfs_iext_destroy(ifp);
1259	return error;
1260}
1261
1262/*
1263 * Returns the relative block number of the first unused block(s) in the given
1264 * fork with at least "len" logically contiguous blocks free.  This is the
1265 * lowest-address hole if the fork has holes, else the first block past the end
1266 * of fork.  Return 0 if the fork is currently local (in-inode).
1267 */
1268int						/* error */
1269xfs_bmap_first_unused(
1270	struct xfs_trans	*tp,		/* transaction pointer */
1271	struct xfs_inode	*ip,		/* incore inode */
1272	xfs_extlen_t		len,		/* size of hole to find */
1273	xfs_fileoff_t		*first_unused,	/* unused block */
1274	int			whichfork)	/* data or attr fork */
1275{
1276	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1277	struct xfs_bmbt_irec	got;
1278	struct xfs_iext_cursor	icur;
1279	xfs_fileoff_t		lastaddr = 0;
1280	xfs_fileoff_t		lowest, max;
1281	int			error;
1282
1283	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1284		*first_unused = 0;
1285		return 0;
1286	}
1287
1288	ASSERT(xfs_ifork_has_extents(ifp));
1289
1290	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1291		error = xfs_iread_extents(tp, ip, whichfork);
1292		if (error)
1293			return error;
1294	}
1295
1296	lowest = max = *first_unused;
1297	for_each_xfs_iext(ifp, &icur, &got) {
1298		/*
1299		 * See if the hole before this extent will work.
1300		 */
1301		if (got.br_startoff >= lowest + len &&
1302		    got.br_startoff - max >= len)
1303			break;
1304		lastaddr = got.br_startoff + got.br_blockcount;
1305		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1306	}
1307
1308	*first_unused = max;
1309	return 0;
1310}
1311
1312/*
1313 * Returns the file-relative block number of the last block - 1 before
1314 * last_block (input value) in the file.
1315 * This is not based on i_size, it is based on the extent records.
1316 * Returns 0 for local files, as they do not have extent records.
1317 */
1318int						/* error */
1319xfs_bmap_last_before(
1320	struct xfs_trans	*tp,		/* transaction pointer */
1321	struct xfs_inode	*ip,		/* incore inode */
1322	xfs_fileoff_t		*last_block,	/* last block */
1323	int			whichfork)	/* data or attr fork */
1324{
1325	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1326	struct xfs_bmbt_irec	got;
1327	struct xfs_iext_cursor	icur;
1328	int			error;
1329
1330	switch (ifp->if_format) {
1331	case XFS_DINODE_FMT_LOCAL:
1332		*last_block = 0;
1333		return 0;
1334	case XFS_DINODE_FMT_BTREE:
1335	case XFS_DINODE_FMT_EXTENTS:
1336		break;
1337	default:
1338		ASSERT(0);
1339		return -EFSCORRUPTED;
1340	}
1341
1342	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1343		error = xfs_iread_extents(tp, ip, whichfork);
1344		if (error)
1345			return error;
1346	}
1347
1348	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1349		*last_block = 0;
1350	return 0;
1351}
1352
1353int
1354xfs_bmap_last_extent(
1355	struct xfs_trans	*tp,
1356	struct xfs_inode	*ip,
1357	int			whichfork,
1358	struct xfs_bmbt_irec	*rec,
1359	int			*is_empty)
1360{
1361	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1362	struct xfs_iext_cursor	icur;
1363	int			error;
1364
1365	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1366		error = xfs_iread_extents(tp, ip, whichfork);
1367		if (error)
1368			return error;
1369	}
1370
1371	xfs_iext_last(ifp, &icur);
1372	if (!xfs_iext_get_extent(ifp, &icur, rec))
1373		*is_empty = 1;
1374	else
1375		*is_empty = 0;
1376	return 0;
1377}
1378
1379/*
1380 * Check the last inode extent to determine whether this allocation will result
1381 * in blocks being allocated at the end of the file. When we allocate new data
1382 * blocks at the end of the file which do not start at the previous data block,
1383 * we will try to align the new blocks at stripe unit boundaries.
1384 *
1385 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1386 * at, or past the EOF.
1387 */
1388STATIC int
1389xfs_bmap_isaeof(
1390	struct xfs_bmalloca	*bma,
1391	int			whichfork)
1392{
1393	struct xfs_bmbt_irec	rec;
1394	int			is_empty;
1395	int			error;
1396
1397	bma->aeof = false;
1398	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1399				     &is_empty);
1400	if (error)
1401		return error;
1402
1403	if (is_empty) {
1404		bma->aeof = true;
1405		return 0;
1406	}
1407
1408	/*
1409	 * Check if we are allocation or past the last extent, or at least into
1410	 * the last delayed allocated extent.
1411	 */
1412	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1413		(bma->offset >= rec.br_startoff &&
1414		 isnullstartblock(rec.br_startblock));
1415	return 0;
1416}
1417
1418/*
1419 * Returns the file-relative block number of the first block past eof in
1420 * the file.  This is not based on i_size, it is based on the extent records.
1421 * Returns 0 for local files, as they do not have extent records.
1422 */
1423int
1424xfs_bmap_last_offset(
1425	struct xfs_inode	*ip,
1426	xfs_fileoff_t		*last_block,
1427	int			whichfork)
1428{
1429	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1430	struct xfs_bmbt_irec	rec;
1431	int			is_empty;
1432	int			error;
1433
1434	*last_block = 0;
1435
1436	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1437		return 0;
1438
1439	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1440		return -EFSCORRUPTED;
1441
1442	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1443	if (error || is_empty)
1444		return error;
1445
1446	*last_block = rec.br_startoff + rec.br_blockcount;
1447	return 0;
1448}
1449
1450/*
1451 * Returns whether the selected fork of the inode has exactly one
1452 * block or not.  For the data fork we check this matches di_size,
1453 * implying the file's range is 0..bsize-1.
1454 */
1455int					/* 1=>1 block, 0=>otherwise */
1456xfs_bmap_one_block(
1457	struct xfs_inode	*ip,		/* incore inode */
1458	int			whichfork)	/* data or attr fork */
1459{
1460	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1461	int			rval;		/* return value */
1462	struct xfs_bmbt_irec	s;		/* internal version of extent */
1463	struct xfs_iext_cursor icur;
1464
1465#ifndef DEBUG
1466	if (whichfork == XFS_DATA_FORK)
1467		return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1468#endif	/* !DEBUG */
1469	if (ifp->if_nextents != 1)
1470		return 0;
1471	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS)
1472		return 0;
1473	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1474	xfs_iext_first(ifp, &icur);
1475	xfs_iext_get_extent(ifp, &icur, &s);
1476	rval = s.br_startoff == 0 && s.br_blockcount == 1;
1477	if (rval && whichfork == XFS_DATA_FORK)
1478		ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1479	return rval;
1480}
1481
1482/*
1483 * Extent tree manipulation functions used during allocation.
1484 */
1485
1486/*
1487 * Convert a delayed allocation to a real allocation.
1488 */
1489STATIC int				/* error */
1490xfs_bmap_add_extent_delay_real(
1491	struct xfs_bmalloca	*bma,
1492	int			whichfork)
1493{
1494	struct xfs_mount	*mp = bma->ip->i_mount;
1495	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1496	struct xfs_bmbt_irec	*new = &bma->got;
1497	int			error;	/* error return value */
1498	int			i;	/* temp state */
1499	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1500	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1501					/* left is 0, right is 1, prev is 2 */
1502	int			rval=0;	/* return value (logging flags) */
1503	int			state = xfs_bmap_fork_to_state(whichfork);
1504	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1505	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1506	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1507	int			tmp_rval;	/* partial logging flags */
1508	struct xfs_bmbt_irec	old;
1509
1510	ASSERT(whichfork != XFS_ATTR_FORK);
1511	ASSERT(!isnullstartblock(new->br_startblock));
1512	ASSERT(!bma->cur ||
1513	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1514
1515	XFS_STATS_INC(mp, xs_add_exlist);
1516
1517#define	LEFT		r[0]
1518#define	RIGHT		r[1]
1519#define	PREV		r[2]
1520
1521	/*
1522	 * Set up a bunch of variables to make the tests simpler.
1523	 */
1524	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1525	new_endoff = new->br_startoff + new->br_blockcount;
1526	ASSERT(isnullstartblock(PREV.br_startblock));
1527	ASSERT(PREV.br_startoff <= new->br_startoff);
1528	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1529
1530	da_old = startblockval(PREV.br_startblock);
1531	da_new = 0;
1532
1533	/*
1534	 * Set flags determining what part of the previous delayed allocation
1535	 * extent is being replaced by a real allocation.
1536	 */
1537	if (PREV.br_startoff == new->br_startoff)
1538		state |= BMAP_LEFT_FILLING;
1539	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1540		state |= BMAP_RIGHT_FILLING;
1541
1542	/*
1543	 * Check and set flags if this segment has a left neighbor.
1544	 * Don't set contiguous if the combined extent would be too large.
1545	 */
1546	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1547		state |= BMAP_LEFT_VALID;
1548		if (isnullstartblock(LEFT.br_startblock))
1549			state |= BMAP_LEFT_DELAY;
1550	}
1551
1552	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1553	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1554	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1555	    LEFT.br_state == new->br_state &&
1556	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1557		state |= BMAP_LEFT_CONTIG;
1558
1559	/*
1560	 * Check and set flags if this segment has a right neighbor.
1561	 * Don't set contiguous if the combined extent would be too large.
1562	 * Also check for all-three-contiguous being too large.
1563	 */
1564	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1565		state |= BMAP_RIGHT_VALID;
1566		if (isnullstartblock(RIGHT.br_startblock))
1567			state |= BMAP_RIGHT_DELAY;
1568	}
1569
1570	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1571	    new_endoff == RIGHT.br_startoff &&
1572	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1573	    new->br_state == RIGHT.br_state &&
1574	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1575	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1576		       BMAP_RIGHT_FILLING)) !=
1577		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1578		       BMAP_RIGHT_FILLING) ||
1579	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1580			<= MAXEXTLEN))
1581		state |= BMAP_RIGHT_CONTIG;
1582
1583	error = 0;
1584	/*
1585	 * Switch out based on the FILLING and CONTIG state bits.
1586	 */
1587	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1588			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1589	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1590	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1591		/*
1592		 * Filling in all of a previously delayed allocation extent.
1593		 * The left and right neighbors are both contiguous with new.
1594		 */
1595		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1596
1597		xfs_iext_remove(bma->ip, &bma->icur, state);
1598		xfs_iext_remove(bma->ip, &bma->icur, state);
1599		xfs_iext_prev(ifp, &bma->icur);
1600		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1601		ifp->if_nextents--;
1602
1603		if (bma->cur == NULL)
1604			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1605		else {
1606			rval = XFS_ILOG_CORE;
1607			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1608			if (error)
1609				goto done;
1610			if (XFS_IS_CORRUPT(mp, i != 1)) {
1611				error = -EFSCORRUPTED;
1612				goto done;
1613			}
1614			error = xfs_btree_delete(bma->cur, &i);
1615			if (error)
1616				goto done;
1617			if (XFS_IS_CORRUPT(mp, i != 1)) {
1618				error = -EFSCORRUPTED;
1619				goto done;
1620			}
1621			error = xfs_btree_decrement(bma->cur, 0, &i);
1622			if (error)
1623				goto done;
1624			if (XFS_IS_CORRUPT(mp, i != 1)) {
1625				error = -EFSCORRUPTED;
1626				goto done;
1627			}
1628			error = xfs_bmbt_update(bma->cur, &LEFT);
1629			if (error)
1630				goto done;
1631		}
1632		break;
1633
1634	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1635		/*
1636		 * Filling in all of a previously delayed allocation extent.
1637		 * The left neighbor is contiguous, the right is not.
1638		 */
1639		old = LEFT;
1640		LEFT.br_blockcount += PREV.br_blockcount;
1641
1642		xfs_iext_remove(bma->ip, &bma->icur, state);
1643		xfs_iext_prev(ifp, &bma->icur);
1644		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1645
1646		if (bma->cur == NULL)
1647			rval = XFS_ILOG_DEXT;
1648		else {
1649			rval = 0;
1650			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1651			if (error)
1652				goto done;
1653			if (XFS_IS_CORRUPT(mp, i != 1)) {
1654				error = -EFSCORRUPTED;
1655				goto done;
1656			}
1657			error = xfs_bmbt_update(bma->cur, &LEFT);
1658			if (error)
1659				goto done;
1660		}
1661		break;
1662
1663	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664		/*
1665		 * Filling in all of a previously delayed allocation extent.
1666		 * The right neighbor is contiguous, the left is not. Take care
1667		 * with delay -> unwritten extent allocation here because the
1668		 * delalloc record we are overwriting is always written.
1669		 */
1670		PREV.br_startblock = new->br_startblock;
1671		PREV.br_blockcount += RIGHT.br_blockcount;
1672		PREV.br_state = new->br_state;
1673
1674		xfs_iext_next(ifp, &bma->icur);
1675		xfs_iext_remove(bma->ip, &bma->icur, state);
1676		xfs_iext_prev(ifp, &bma->icur);
1677		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1678
1679		if (bma->cur == NULL)
1680			rval = XFS_ILOG_DEXT;
1681		else {
1682			rval = 0;
1683			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1684			if (error)
1685				goto done;
1686			if (XFS_IS_CORRUPT(mp, i != 1)) {
1687				error = -EFSCORRUPTED;
1688				goto done;
1689			}
1690			error = xfs_bmbt_update(bma->cur, &PREV);
1691			if (error)
1692				goto done;
1693		}
1694		break;
1695
1696	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1697		/*
1698		 * Filling in all of a previously delayed allocation extent.
1699		 * Neither the left nor right neighbors are contiguous with
1700		 * the new one.
1701		 */
1702		PREV.br_startblock = new->br_startblock;
1703		PREV.br_state = new->br_state;
1704		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1705		ifp->if_nextents++;
1706
1707		if (bma->cur == NULL)
1708			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1709		else {
1710			rval = XFS_ILOG_CORE;
1711			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1712			if (error)
1713				goto done;
1714			if (XFS_IS_CORRUPT(mp, i != 0)) {
1715				error = -EFSCORRUPTED;
1716				goto done;
1717			}
1718			error = xfs_btree_insert(bma->cur, &i);
1719			if (error)
1720				goto done;
1721			if (XFS_IS_CORRUPT(mp, i != 1)) {
1722				error = -EFSCORRUPTED;
1723				goto done;
1724			}
1725		}
1726		break;
1727
1728	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1729		/*
1730		 * Filling in the first part of a previous delayed allocation.
1731		 * The left neighbor is contiguous.
1732		 */
1733		old = LEFT;
1734		temp = PREV.br_blockcount - new->br_blockcount;
1735		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1736				startblockval(PREV.br_startblock));
1737
1738		LEFT.br_blockcount += new->br_blockcount;
1739
1740		PREV.br_blockcount = temp;
1741		PREV.br_startoff += new->br_blockcount;
1742		PREV.br_startblock = nullstartblock(da_new);
1743
1744		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1745		xfs_iext_prev(ifp, &bma->icur);
1746		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1747
1748		if (bma->cur == NULL)
1749			rval = XFS_ILOG_DEXT;
1750		else {
1751			rval = 0;
1752			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1753			if (error)
1754				goto done;
1755			if (XFS_IS_CORRUPT(mp, i != 1)) {
1756				error = -EFSCORRUPTED;
1757				goto done;
1758			}
1759			error = xfs_bmbt_update(bma->cur, &LEFT);
1760			if (error)
1761				goto done;
1762		}
1763		break;
1764
1765	case BMAP_LEFT_FILLING:
1766		/*
1767		 * Filling in the first part of a previous delayed allocation.
1768		 * The left neighbor is not contiguous.
1769		 */
1770		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1771		ifp->if_nextents++;
1772
1773		if (bma->cur == NULL)
1774			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1775		else {
1776			rval = XFS_ILOG_CORE;
1777			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1778			if (error)
1779				goto done;
1780			if (XFS_IS_CORRUPT(mp, i != 0)) {
1781				error = -EFSCORRUPTED;
1782				goto done;
1783			}
1784			error = xfs_btree_insert(bma->cur, &i);
1785			if (error)
1786				goto done;
1787			if (XFS_IS_CORRUPT(mp, i != 1)) {
1788				error = -EFSCORRUPTED;
1789				goto done;
1790			}
1791		}
1792
1793		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1794			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1795					&bma->cur, 1, &tmp_rval, whichfork);
1796			rval |= tmp_rval;
1797			if (error)
1798				goto done;
1799		}
1800
1801		temp = PREV.br_blockcount - new->br_blockcount;
1802		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1803			startblockval(PREV.br_startblock) -
1804			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1805
1806		PREV.br_startoff = new_endoff;
1807		PREV.br_blockcount = temp;
1808		PREV.br_startblock = nullstartblock(da_new);
1809		xfs_iext_next(ifp, &bma->icur);
1810		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1811		xfs_iext_prev(ifp, &bma->icur);
1812		break;
1813
1814	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1815		/*
1816		 * Filling in the last part of a previous delayed allocation.
1817		 * The right neighbor is contiguous with the new allocation.
1818		 */
1819		old = RIGHT;
1820		RIGHT.br_startoff = new->br_startoff;
1821		RIGHT.br_startblock = new->br_startblock;
1822		RIGHT.br_blockcount += new->br_blockcount;
1823
1824		if (bma->cur == NULL)
1825			rval = XFS_ILOG_DEXT;
1826		else {
1827			rval = 0;
1828			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1829			if (error)
1830				goto done;
1831			if (XFS_IS_CORRUPT(mp, i != 1)) {
1832				error = -EFSCORRUPTED;
1833				goto done;
1834			}
1835			error = xfs_bmbt_update(bma->cur, &RIGHT);
1836			if (error)
1837				goto done;
1838		}
1839
1840		temp = PREV.br_blockcount - new->br_blockcount;
1841		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1842			startblockval(PREV.br_startblock));
1843
1844		PREV.br_blockcount = temp;
1845		PREV.br_startblock = nullstartblock(da_new);
1846
1847		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1848		xfs_iext_next(ifp, &bma->icur);
1849		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1850		break;
1851
1852	case BMAP_RIGHT_FILLING:
1853		/*
1854		 * Filling in the last part of a previous delayed allocation.
1855		 * The right neighbor is not contiguous.
1856		 */
1857		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1858		ifp->if_nextents++;
1859
1860		if (bma->cur == NULL)
1861			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1862		else {
1863			rval = XFS_ILOG_CORE;
1864			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1865			if (error)
1866				goto done;
1867			if (XFS_IS_CORRUPT(mp, i != 0)) {
1868				error = -EFSCORRUPTED;
1869				goto done;
1870			}
1871			error = xfs_btree_insert(bma->cur, &i);
1872			if (error)
1873				goto done;
1874			if (XFS_IS_CORRUPT(mp, i != 1)) {
1875				error = -EFSCORRUPTED;
1876				goto done;
1877			}
1878		}
1879
1880		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1881			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1882				&bma->cur, 1, &tmp_rval, whichfork);
1883			rval |= tmp_rval;
1884			if (error)
1885				goto done;
1886		}
1887
1888		temp = PREV.br_blockcount - new->br_blockcount;
1889		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1890			startblockval(PREV.br_startblock) -
1891			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1892
1893		PREV.br_startblock = nullstartblock(da_new);
1894		PREV.br_blockcount = temp;
1895		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1896		xfs_iext_next(ifp, &bma->icur);
1897		break;
1898
1899	case 0:
1900		/*
1901		 * Filling in the middle part of a previous delayed allocation.
1902		 * Contiguity is impossible here.
1903		 * This case is avoided almost all the time.
1904		 *
1905		 * We start with a delayed allocation:
1906		 *
1907		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908		 *  PREV @ idx
1909		 *
1910	         * and we are allocating:
1911		 *                     +rrrrrrrrrrrrrrrrr+
1912		 *			      new
1913		 *
1914		 * and we set it up for insertion as:
1915		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916		 *                            new
1917		 *  PREV @ idx          LEFT              RIGHT
1918		 *                      inserted at idx + 1
1919		 */
1920		old = PREV;
1921
1922		/* LEFT is the new middle */
1923		LEFT = *new;
1924
1925		/* RIGHT is the new right */
1926		RIGHT.br_state = PREV.br_state;
1927		RIGHT.br_startoff = new_endoff;
1928		RIGHT.br_blockcount =
1929			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930		RIGHT.br_startblock =
1931			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932					RIGHT.br_blockcount));
1933
1934		/* truncate PREV */
1935		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936		PREV.br_startblock =
1937			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938					PREV.br_blockcount));
1939		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1940
1941		xfs_iext_next(ifp, &bma->icur);
1942		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1943		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1944		ifp->if_nextents++;
1945
1946		if (bma->cur == NULL)
1947			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1948		else {
1949			rval = XFS_ILOG_CORE;
1950			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1951			if (error)
1952				goto done;
1953			if (XFS_IS_CORRUPT(mp, i != 0)) {
1954				error = -EFSCORRUPTED;
1955				goto done;
1956			}
1957			error = xfs_btree_insert(bma->cur, &i);
1958			if (error)
1959				goto done;
1960			if (XFS_IS_CORRUPT(mp, i != 1)) {
1961				error = -EFSCORRUPTED;
1962				goto done;
1963			}
1964		}
1965
1966		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1967			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1968					&bma->cur, 1, &tmp_rval, whichfork);
1969			rval |= tmp_rval;
1970			if (error)
1971				goto done;
1972		}
1973
1974		da_new = startblockval(PREV.br_startblock) +
1975			 startblockval(RIGHT.br_startblock);
1976		break;
1977
1978	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1979	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1980	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1981	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1982	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1983	case BMAP_LEFT_CONTIG:
1984	case BMAP_RIGHT_CONTIG:
1985		/*
1986		 * These cases are all impossible.
1987		 */
1988		ASSERT(0);
1989	}
1990
1991	/* add reverse mapping unless caller opted out */
1992	if (!(bma->flags & XFS_BMAPI_NORMAP))
1993		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1994
1995	/* convert to a btree if necessary */
1996	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1997		int	tmp_logflags;	/* partial log flag return val */
1998
1999		ASSERT(bma->cur == NULL);
2000		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2001				&bma->cur, da_old > 0, &tmp_logflags,
2002				whichfork);
2003		bma->logflags |= tmp_logflags;
2004		if (error)
2005			goto done;
2006	}
2007
2008	if (da_new != da_old)
2009		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
2010
2011	if (bma->cur) {
2012		da_new += bma->cur->bc_ino.allocated;
2013		bma->cur->bc_ino.allocated = 0;
2014	}
2015
2016	/* adjust for changes in reserved delayed indirect blocks */
2017	if (da_new != da_old) {
2018		ASSERT(state == 0 || da_new < da_old);
2019		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2020				false);
2021	}
2022
2023	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2024done:
2025	if (whichfork != XFS_COW_FORK)
2026		bma->logflags |= rval;
2027	return error;
2028#undef	LEFT
2029#undef	RIGHT
2030#undef	PREV
2031}
2032
2033/*
2034 * Convert an unwritten allocation to a real allocation or vice versa.
2035 */
2036int					/* error */
2037xfs_bmap_add_extent_unwritten_real(
2038	struct xfs_trans	*tp,
2039	xfs_inode_t		*ip,	/* incore inode pointer */
2040	int			whichfork,
2041	struct xfs_iext_cursor	*icur,
2042	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
2043	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
2044	int			*logflagsp) /* inode logging flags */
2045{
2046	xfs_btree_cur_t		*cur;	/* btree cursor */
2047	int			error;	/* error return value */
2048	int			i;	/* temp state */
2049	struct xfs_ifork	*ifp;	/* inode fork pointer */
2050	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2051	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2052					/* left is 0, right is 1, prev is 2 */
2053	int			rval=0;	/* return value (logging flags) */
2054	int			state = xfs_bmap_fork_to_state(whichfork);
2055	struct xfs_mount	*mp = ip->i_mount;
2056	struct xfs_bmbt_irec	old;
2057
2058	*logflagsp = 0;
2059
2060	cur = *curp;
2061	ifp = XFS_IFORK_PTR(ip, whichfork);
2062
2063	ASSERT(!isnullstartblock(new->br_startblock));
2064
2065	XFS_STATS_INC(mp, xs_add_exlist);
2066
2067#define	LEFT		r[0]
2068#define	RIGHT		r[1]
2069#define	PREV		r[2]
2070
2071	/*
2072	 * Set up a bunch of variables to make the tests simpler.
2073	 */
2074	error = 0;
2075	xfs_iext_get_extent(ifp, icur, &PREV);
2076	ASSERT(new->br_state != PREV.br_state);
2077	new_endoff = new->br_startoff + new->br_blockcount;
2078	ASSERT(PREV.br_startoff <= new->br_startoff);
2079	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2080
2081	/*
2082	 * Set flags determining what part of the previous oldext allocation
2083	 * extent is being replaced by a newext allocation.
2084	 */
2085	if (PREV.br_startoff == new->br_startoff)
2086		state |= BMAP_LEFT_FILLING;
2087	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2088		state |= BMAP_RIGHT_FILLING;
2089
2090	/*
2091	 * Check and set flags if this segment has a left neighbor.
2092	 * Don't set contiguous if the combined extent would be too large.
2093	 */
2094	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2095		state |= BMAP_LEFT_VALID;
2096		if (isnullstartblock(LEFT.br_startblock))
2097			state |= BMAP_LEFT_DELAY;
2098	}
2099
2100	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2101	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2102	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2103	    LEFT.br_state == new->br_state &&
2104	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2105		state |= BMAP_LEFT_CONTIG;
2106
2107	/*
2108	 * Check and set flags if this segment has a right neighbor.
2109	 * Don't set contiguous if the combined extent would be too large.
2110	 * Also check for all-three-contiguous being too large.
2111	 */
2112	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2113		state |= BMAP_RIGHT_VALID;
2114		if (isnullstartblock(RIGHT.br_startblock))
2115			state |= BMAP_RIGHT_DELAY;
2116	}
2117
2118	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2119	    new_endoff == RIGHT.br_startoff &&
2120	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2121	    new->br_state == RIGHT.br_state &&
2122	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2123	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2124		       BMAP_RIGHT_FILLING)) !=
2125		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2126		       BMAP_RIGHT_FILLING) ||
2127	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2128			<= MAXEXTLEN))
2129		state |= BMAP_RIGHT_CONTIG;
2130
2131	/*
2132	 * Switch out based on the FILLING and CONTIG state bits.
2133	 */
2134	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2135			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2136	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2137	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2138		/*
2139		 * Setting all of a previous oldext extent to newext.
2140		 * The left and right neighbors are both contiguous with new.
2141		 */
2142		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2143
2144		xfs_iext_remove(ip, icur, state);
2145		xfs_iext_remove(ip, icur, state);
2146		xfs_iext_prev(ifp, icur);
2147		xfs_iext_update_extent(ip, state, icur, &LEFT);
2148		ifp->if_nextents -= 2;
2149		if (cur == NULL)
2150			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2151		else {
2152			rval = XFS_ILOG_CORE;
2153			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2154			if (error)
2155				goto done;
2156			if (XFS_IS_CORRUPT(mp, i != 1)) {
2157				error = -EFSCORRUPTED;
2158				goto done;
2159			}
2160			if ((error = xfs_btree_delete(cur, &i)))
2161				goto done;
2162			if (XFS_IS_CORRUPT(mp, i != 1)) {
2163				error = -EFSCORRUPTED;
2164				goto done;
2165			}
2166			if ((error = xfs_btree_decrement(cur, 0, &i)))
2167				goto done;
2168			if (XFS_IS_CORRUPT(mp, i != 1)) {
2169				error = -EFSCORRUPTED;
2170				goto done;
2171			}
2172			if ((error = xfs_btree_delete(cur, &i)))
2173				goto done;
2174			if (XFS_IS_CORRUPT(mp, i != 1)) {
2175				error = -EFSCORRUPTED;
2176				goto done;
2177			}
2178			if ((error = xfs_btree_decrement(cur, 0, &i)))
2179				goto done;
2180			if (XFS_IS_CORRUPT(mp, i != 1)) {
2181				error = -EFSCORRUPTED;
2182				goto done;
2183			}
2184			error = xfs_bmbt_update(cur, &LEFT);
2185			if (error)
2186				goto done;
2187		}
2188		break;
2189
2190	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2191		/*
2192		 * Setting all of a previous oldext extent to newext.
2193		 * The left neighbor is contiguous, the right is not.
2194		 */
2195		LEFT.br_blockcount += PREV.br_blockcount;
2196
2197		xfs_iext_remove(ip, icur, state);
2198		xfs_iext_prev(ifp, icur);
2199		xfs_iext_update_extent(ip, state, icur, &LEFT);
2200		ifp->if_nextents--;
2201		if (cur == NULL)
2202			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2203		else {
2204			rval = XFS_ILOG_CORE;
2205			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2206			if (error)
2207				goto done;
2208			if (XFS_IS_CORRUPT(mp, i != 1)) {
2209				error = -EFSCORRUPTED;
2210				goto done;
2211			}
2212			if ((error = xfs_btree_delete(cur, &i)))
2213				goto done;
2214			if (XFS_IS_CORRUPT(mp, i != 1)) {
2215				error = -EFSCORRUPTED;
2216				goto done;
2217			}
2218			if ((error = xfs_btree_decrement(cur, 0, &i)))
2219				goto done;
2220			if (XFS_IS_CORRUPT(mp, i != 1)) {
2221				error = -EFSCORRUPTED;
2222				goto done;
2223			}
2224			error = xfs_bmbt_update(cur, &LEFT);
2225			if (error)
2226				goto done;
2227		}
2228		break;
2229
2230	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2231		/*
2232		 * Setting all of a previous oldext extent to newext.
2233		 * The right neighbor is contiguous, the left is not.
2234		 */
2235		PREV.br_blockcount += RIGHT.br_blockcount;
2236		PREV.br_state = new->br_state;
2237
2238		xfs_iext_next(ifp, icur);
2239		xfs_iext_remove(ip, icur, state);
2240		xfs_iext_prev(ifp, icur);
2241		xfs_iext_update_extent(ip, state, icur, &PREV);
2242		ifp->if_nextents--;
2243
2244		if (cur == NULL)
2245			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2246		else {
2247			rval = XFS_ILOG_CORE;
2248			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2249			if (error)
2250				goto done;
2251			if (XFS_IS_CORRUPT(mp, i != 1)) {
2252				error = -EFSCORRUPTED;
2253				goto done;
2254			}
2255			if ((error = xfs_btree_delete(cur, &i)))
2256				goto done;
2257			if (XFS_IS_CORRUPT(mp, i != 1)) {
2258				error = -EFSCORRUPTED;
2259				goto done;
2260			}
2261			if ((error = xfs_btree_decrement(cur, 0, &i)))
2262				goto done;
2263			if (XFS_IS_CORRUPT(mp, i != 1)) {
2264				error = -EFSCORRUPTED;
2265				goto done;
2266			}
2267			error = xfs_bmbt_update(cur, &PREV);
2268			if (error)
2269				goto done;
2270		}
2271		break;
2272
2273	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2274		/*
2275		 * Setting all of a previous oldext extent to newext.
2276		 * Neither the left nor right neighbors are contiguous with
2277		 * the new one.
2278		 */
2279		PREV.br_state = new->br_state;
2280		xfs_iext_update_extent(ip, state, icur, &PREV);
2281
2282		if (cur == NULL)
2283			rval = XFS_ILOG_DEXT;
2284		else {
2285			rval = 0;
2286			error = xfs_bmbt_lookup_eq(cur, new, &i);
2287			if (error)
2288				goto done;
2289			if (XFS_IS_CORRUPT(mp, i != 1)) {
2290				error = -EFSCORRUPTED;
2291				goto done;
2292			}
2293			error = xfs_bmbt_update(cur, &PREV);
2294			if (error)
2295				goto done;
2296		}
2297		break;
2298
2299	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2300		/*
2301		 * Setting the first part of a previous oldext extent to newext.
2302		 * The left neighbor is contiguous.
2303		 */
2304		LEFT.br_blockcount += new->br_blockcount;
2305
2306		old = PREV;
2307		PREV.br_startoff += new->br_blockcount;
2308		PREV.br_startblock += new->br_blockcount;
2309		PREV.br_blockcount -= new->br_blockcount;
2310
2311		xfs_iext_update_extent(ip, state, icur, &PREV);
2312		xfs_iext_prev(ifp, icur);
2313		xfs_iext_update_extent(ip, state, icur, &LEFT);
2314
2315		if (cur == NULL)
2316			rval = XFS_ILOG_DEXT;
2317		else {
2318			rval = 0;
2319			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2320			if (error)
2321				goto done;
2322			if (XFS_IS_CORRUPT(mp, i != 1)) {
2323				error = -EFSCORRUPTED;
2324				goto done;
2325			}
2326			error = xfs_bmbt_update(cur, &PREV);
2327			if (error)
2328				goto done;
2329			error = xfs_btree_decrement(cur, 0, &i);
2330			if (error)
2331				goto done;
2332			error = xfs_bmbt_update(cur, &LEFT);
2333			if (error)
2334				goto done;
2335		}
2336		break;
2337
2338	case BMAP_LEFT_FILLING:
2339		/*
2340		 * Setting the first part of a previous oldext extent to newext.
2341		 * The left neighbor is not contiguous.
2342		 */
2343		old = PREV;
2344		PREV.br_startoff += new->br_blockcount;
2345		PREV.br_startblock += new->br_blockcount;
2346		PREV.br_blockcount -= new->br_blockcount;
2347
2348		xfs_iext_update_extent(ip, state, icur, &PREV);
2349		xfs_iext_insert(ip, icur, new, state);
2350		ifp->if_nextents++;
2351
2352		if (cur == NULL)
2353			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2354		else {
2355			rval = XFS_ILOG_CORE;
2356			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2357			if (error)
2358				goto done;
2359			if (XFS_IS_CORRUPT(mp, i != 1)) {
2360				error = -EFSCORRUPTED;
2361				goto done;
2362			}
2363			error = xfs_bmbt_update(cur, &PREV);
2364			if (error)
2365				goto done;
2366			cur->bc_rec.b = *new;
2367			if ((error = xfs_btree_insert(cur, &i)))
2368				goto done;
2369			if (XFS_IS_CORRUPT(mp, i != 1)) {
2370				error = -EFSCORRUPTED;
2371				goto done;
2372			}
2373		}
2374		break;
2375
2376	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2377		/*
2378		 * Setting the last part of a previous oldext extent to newext.
2379		 * The right neighbor is contiguous with the new allocation.
2380		 */
2381		old = PREV;
2382		PREV.br_blockcount -= new->br_blockcount;
2383
2384		RIGHT.br_startoff = new->br_startoff;
2385		RIGHT.br_startblock = new->br_startblock;
2386		RIGHT.br_blockcount += new->br_blockcount;
2387
2388		xfs_iext_update_extent(ip, state, icur, &PREV);
2389		xfs_iext_next(ifp, icur);
2390		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2391
2392		if (cur == NULL)
2393			rval = XFS_ILOG_DEXT;
2394		else {
2395			rval = 0;
2396			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2397			if (error)
2398				goto done;
2399			if (XFS_IS_CORRUPT(mp, i != 1)) {
2400				error = -EFSCORRUPTED;
2401				goto done;
2402			}
2403			error = xfs_bmbt_update(cur, &PREV);
2404			if (error)
2405				goto done;
2406			error = xfs_btree_increment(cur, 0, &i);
2407			if (error)
2408				goto done;
2409			error = xfs_bmbt_update(cur, &RIGHT);
2410			if (error)
2411				goto done;
2412		}
2413		break;
2414
2415	case BMAP_RIGHT_FILLING:
2416		/*
2417		 * Setting the last part of a previous oldext extent to newext.
2418		 * The right neighbor is not contiguous.
2419		 */
2420		old = PREV;
2421		PREV.br_blockcount -= new->br_blockcount;
2422
2423		xfs_iext_update_extent(ip, state, icur, &PREV);
2424		xfs_iext_next(ifp, icur);
2425		xfs_iext_insert(ip, icur, new, state);
2426		ifp->if_nextents++;
2427
2428		if (cur == NULL)
2429			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2430		else {
2431			rval = XFS_ILOG_CORE;
2432			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2433			if (error)
2434				goto done;
2435			if (XFS_IS_CORRUPT(mp, i != 1)) {
2436				error = -EFSCORRUPTED;
2437				goto done;
2438			}
2439			error = xfs_bmbt_update(cur, &PREV);
2440			if (error)
2441				goto done;
2442			error = xfs_bmbt_lookup_eq(cur, new, &i);
2443			if (error)
2444				goto done;
2445			if (XFS_IS_CORRUPT(mp, i != 0)) {
2446				error = -EFSCORRUPTED;
2447				goto done;
2448			}
2449			if ((error = xfs_btree_insert(cur, &i)))
2450				goto done;
2451			if (XFS_IS_CORRUPT(mp, i != 1)) {
2452				error = -EFSCORRUPTED;
2453				goto done;
2454			}
2455		}
2456		break;
2457
2458	case 0:
2459		/*
2460		 * Setting the middle part of a previous oldext extent to
2461		 * newext.  Contiguity is impossible here.
2462		 * One extent becomes three extents.
2463		 */
2464		old = PREV;
2465		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2466
2467		r[0] = *new;
2468		r[1].br_startoff = new_endoff;
2469		r[1].br_blockcount =
2470			old.br_startoff + old.br_blockcount - new_endoff;
2471		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2472		r[1].br_state = PREV.br_state;
2473
2474		xfs_iext_update_extent(ip, state, icur, &PREV);
2475		xfs_iext_next(ifp, icur);
2476		xfs_iext_insert(ip, icur, &r[1], state);
2477		xfs_iext_insert(ip, icur, &r[0], state);
2478		ifp->if_nextents += 2;
2479
2480		if (cur == NULL)
2481			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2482		else {
2483			rval = XFS_ILOG_CORE;
2484			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2485			if (error)
2486				goto done;
2487			if (XFS_IS_CORRUPT(mp, i != 1)) {
2488				error = -EFSCORRUPTED;
2489				goto done;
2490			}
2491			/* new right extent - oldext */
2492			error = xfs_bmbt_update(cur, &r[1]);
2493			if (error)
2494				goto done;
2495			/* new left extent - oldext */
2496			cur->bc_rec.b = PREV;
2497			if ((error = xfs_btree_insert(cur, &i)))
2498				goto done;
2499			if (XFS_IS_CORRUPT(mp, i != 1)) {
2500				error = -EFSCORRUPTED;
2501				goto done;
2502			}
2503			/*
2504			 * Reset the cursor to the position of the new extent
2505			 * we are about to insert as we can't trust it after
2506			 * the previous insert.
2507			 */
2508			error = xfs_bmbt_lookup_eq(cur, new, &i);
2509			if (error)
2510				goto done;
2511			if (XFS_IS_CORRUPT(mp, i != 0)) {
2512				error = -EFSCORRUPTED;
2513				goto done;
2514			}
2515			/* new middle extent - newext */
2516			if ((error = xfs_btree_insert(cur, &i)))
2517				goto done;
2518			if (XFS_IS_CORRUPT(mp, i != 1)) {
2519				error = -EFSCORRUPTED;
2520				goto done;
2521			}
2522		}
2523		break;
2524
2525	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2526	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2527	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2528	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2529	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2530	case BMAP_LEFT_CONTIG:
2531	case BMAP_RIGHT_CONTIG:
2532		/*
2533		 * These cases are all impossible.
2534		 */
2535		ASSERT(0);
2536	}
2537
2538	/* update reverse mappings */
2539	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2540
2541	/* convert to a btree if necessary */
2542	if (xfs_bmap_needs_btree(ip, whichfork)) {
2543		int	tmp_logflags;	/* partial log flag return val */
2544
2545		ASSERT(cur == NULL);
2546		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2547				&tmp_logflags, whichfork);
2548		*logflagsp |= tmp_logflags;
2549		if (error)
2550			goto done;
2551	}
2552
2553	/* clear out the allocated field, done with it now in any case. */
2554	if (cur) {
2555		cur->bc_ino.allocated = 0;
2556		*curp = cur;
2557	}
2558
2559	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2560done:
2561	*logflagsp |= rval;
2562	return error;
2563#undef	LEFT
2564#undef	RIGHT
2565#undef	PREV
2566}
2567
2568/*
2569 * Convert a hole to a delayed allocation.
2570 */
2571STATIC void
2572xfs_bmap_add_extent_hole_delay(
2573	xfs_inode_t		*ip,	/* incore inode pointer */
2574	int			whichfork,
2575	struct xfs_iext_cursor	*icur,
2576	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2577{
2578	struct xfs_ifork	*ifp;	/* inode fork pointer */
2579	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2580	xfs_filblks_t		newlen=0;	/* new indirect size */
2581	xfs_filblks_t		oldlen=0;	/* old indirect size */
2582	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2583	int			state = xfs_bmap_fork_to_state(whichfork);
2584	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2585
2586	ifp = XFS_IFORK_PTR(ip, whichfork);
2587	ASSERT(isnullstartblock(new->br_startblock));
2588
2589	/*
2590	 * Check and set flags if this segment has a left neighbor
2591	 */
2592	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2593		state |= BMAP_LEFT_VALID;
2594		if (isnullstartblock(left.br_startblock))
2595			state |= BMAP_LEFT_DELAY;
2596	}
2597
2598	/*
2599	 * Check and set flags if the current (right) segment exists.
2600	 * If it doesn't exist, we're converting the hole at end-of-file.
2601	 */
2602	if (xfs_iext_get_extent(ifp, icur, &right)) {
2603		state |= BMAP_RIGHT_VALID;
2604		if (isnullstartblock(right.br_startblock))
2605			state |= BMAP_RIGHT_DELAY;
2606	}
2607
2608	/*
2609	 * Set contiguity flags on the left and right neighbors.
2610	 * Don't let extents get too large, even if the pieces are contiguous.
2611	 */
2612	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2613	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2614	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2615		state |= BMAP_LEFT_CONTIG;
2616
2617	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2618	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2619	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2620	    (!(state & BMAP_LEFT_CONTIG) ||
2621	     (left.br_blockcount + new->br_blockcount +
2622	      right.br_blockcount <= MAXEXTLEN)))
2623		state |= BMAP_RIGHT_CONTIG;
2624
2625	/*
2626	 * Switch out based on the contiguity flags.
2627	 */
2628	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2629	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2630		/*
2631		 * New allocation is contiguous with delayed allocations
2632		 * on the left and on the right.
2633		 * Merge all three into a single extent record.
2634		 */
2635		temp = left.br_blockcount + new->br_blockcount +
2636			right.br_blockcount;
2637
2638		oldlen = startblockval(left.br_startblock) +
2639			startblockval(new->br_startblock) +
2640			startblockval(right.br_startblock);
2641		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2642					 oldlen);
2643		left.br_startblock = nullstartblock(newlen);
2644		left.br_blockcount = temp;
2645
2646		xfs_iext_remove(ip, icur, state);
2647		xfs_iext_prev(ifp, icur);
2648		xfs_iext_update_extent(ip, state, icur, &left);
2649		break;
2650
2651	case BMAP_LEFT_CONTIG:
2652		/*
2653		 * New allocation is contiguous with a delayed allocation
2654		 * on the left.
2655		 * Merge the new allocation with the left neighbor.
2656		 */
2657		temp = left.br_blockcount + new->br_blockcount;
2658
2659		oldlen = startblockval(left.br_startblock) +
2660			startblockval(new->br_startblock);
2661		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2662					 oldlen);
2663		left.br_blockcount = temp;
2664		left.br_startblock = nullstartblock(newlen);
2665
2666		xfs_iext_prev(ifp, icur);
2667		xfs_iext_update_extent(ip, state, icur, &left);
2668		break;
2669
2670	case BMAP_RIGHT_CONTIG:
2671		/*
2672		 * New allocation is contiguous with a delayed allocation
2673		 * on the right.
2674		 * Merge the new allocation with the right neighbor.
2675		 */
2676		temp = new->br_blockcount + right.br_blockcount;
2677		oldlen = startblockval(new->br_startblock) +
2678			startblockval(right.br_startblock);
2679		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2680					 oldlen);
2681		right.br_startoff = new->br_startoff;
2682		right.br_startblock = nullstartblock(newlen);
2683		right.br_blockcount = temp;
2684		xfs_iext_update_extent(ip, state, icur, &right);
2685		break;
2686
2687	case 0:
2688		/*
2689		 * New allocation is not contiguous with another
2690		 * delayed allocation.
2691		 * Insert a new entry.
2692		 */
2693		oldlen = newlen = 0;
2694		xfs_iext_insert(ip, icur, new, state);
2695		break;
2696	}
2697	if (oldlen != newlen) {
2698		ASSERT(oldlen > newlen);
2699		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2700				 false);
2701		/*
2702		 * Nothing to do for disk quota accounting here.
2703		 */
2704		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2705	}
2706}
2707
2708/*
2709 * Convert a hole to a real allocation.
2710 */
2711STATIC int				/* error */
2712xfs_bmap_add_extent_hole_real(
2713	struct xfs_trans	*tp,
2714	struct xfs_inode	*ip,
2715	int			whichfork,
2716	struct xfs_iext_cursor	*icur,
2717	struct xfs_btree_cur	**curp,
2718	struct xfs_bmbt_irec	*new,
2719	int			*logflagsp,
2720	int			flags)
2721{
2722	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
2723	struct xfs_mount	*mp = ip->i_mount;
2724	struct xfs_btree_cur	*cur = *curp;
2725	int			error;	/* error return value */
2726	int			i;	/* temp state */
2727	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2728	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2729	int			rval=0;	/* return value (logging flags) */
2730	int			state = xfs_bmap_fork_to_state(whichfork);
2731	struct xfs_bmbt_irec	old;
2732
2733	ASSERT(!isnullstartblock(new->br_startblock));
2734	ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2735
2736	XFS_STATS_INC(mp, xs_add_exlist);
2737
2738	/*
2739	 * Check and set flags if this segment has a left neighbor.
2740	 */
2741	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2742		state |= BMAP_LEFT_VALID;
2743		if (isnullstartblock(left.br_startblock))
2744			state |= BMAP_LEFT_DELAY;
2745	}
2746
2747	/*
2748	 * Check and set flags if this segment has a current value.
2749	 * Not true if we're inserting into the "hole" at eof.
2750	 */
2751	if (xfs_iext_get_extent(ifp, icur, &right)) {
2752		state |= BMAP_RIGHT_VALID;
2753		if (isnullstartblock(right.br_startblock))
2754			state |= BMAP_RIGHT_DELAY;
2755	}
2756
2757	/*
2758	 * We're inserting a real allocation between "left" and "right".
2759	 * Set the contiguity flags.  Don't let extents get too large.
2760	 */
2761	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2762	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2763	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2764	    left.br_state == new->br_state &&
2765	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2766		state |= BMAP_LEFT_CONTIG;
2767
2768	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2769	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2770	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2771	    new->br_state == right.br_state &&
2772	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2773	    (!(state & BMAP_LEFT_CONTIG) ||
2774	     left.br_blockcount + new->br_blockcount +
2775	     right.br_blockcount <= MAXEXTLEN))
2776		state |= BMAP_RIGHT_CONTIG;
2777
2778	error = 0;
2779	/*
2780	 * Select which case we're in here, and implement it.
2781	 */
2782	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2783	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2784		/*
2785		 * New allocation is contiguous with real allocations on the
2786		 * left and on the right.
2787		 * Merge all three into a single extent record.
2788		 */
2789		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2790
2791		xfs_iext_remove(ip, icur, state);
2792		xfs_iext_prev(ifp, icur);
2793		xfs_iext_update_extent(ip, state, icur, &left);
2794		ifp->if_nextents--;
2795
2796		if (cur == NULL) {
2797			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2798		} else {
2799			rval = XFS_ILOG_CORE;
2800			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2801			if (error)
2802				goto done;
2803			if (XFS_IS_CORRUPT(mp, i != 1)) {
2804				error = -EFSCORRUPTED;
2805				goto done;
2806			}
2807			error = xfs_btree_delete(cur, &i);
2808			if (error)
2809				goto done;
2810			if (XFS_IS_CORRUPT(mp, i != 1)) {
2811				error = -EFSCORRUPTED;
2812				goto done;
2813			}
2814			error = xfs_btree_decrement(cur, 0, &i);
2815			if (error)
2816				goto done;
2817			if (XFS_IS_CORRUPT(mp, i != 1)) {
2818				error = -EFSCORRUPTED;
2819				goto done;
2820			}
2821			error = xfs_bmbt_update(cur, &left);
2822			if (error)
2823				goto done;
2824		}
2825		break;
2826
2827	case BMAP_LEFT_CONTIG:
2828		/*
2829		 * New allocation is contiguous with a real allocation
2830		 * on the left.
2831		 * Merge the new allocation with the left neighbor.
2832		 */
2833		old = left;
2834		left.br_blockcount += new->br_blockcount;
2835
2836		xfs_iext_prev(ifp, icur);
2837		xfs_iext_update_extent(ip, state, icur, &left);
2838
2839		if (cur == NULL) {
2840			rval = xfs_ilog_fext(whichfork);
2841		} else {
2842			rval = 0;
2843			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2844			if (error)
2845				goto done;
2846			if (XFS_IS_CORRUPT(mp, i != 1)) {
2847				error = -EFSCORRUPTED;
2848				goto done;
2849			}
2850			error = xfs_bmbt_update(cur, &left);
2851			if (error)
2852				goto done;
2853		}
2854		break;
2855
2856	case BMAP_RIGHT_CONTIG:
2857		/*
2858		 * New allocation is contiguous with a real allocation
2859		 * on the right.
2860		 * Merge the new allocation with the right neighbor.
2861		 */
2862		old = right;
2863
2864		right.br_startoff = new->br_startoff;
2865		right.br_startblock = new->br_startblock;
2866		right.br_blockcount += new->br_blockcount;
2867		xfs_iext_update_extent(ip, state, icur, &right);
2868
2869		if (cur == NULL) {
2870			rval = xfs_ilog_fext(whichfork);
2871		} else {
2872			rval = 0;
2873			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2874			if (error)
2875				goto done;
2876			if (XFS_IS_CORRUPT(mp, i != 1)) {
2877				error = -EFSCORRUPTED;
2878				goto done;
2879			}
2880			error = xfs_bmbt_update(cur, &right);
2881			if (error)
2882				goto done;
2883		}
2884		break;
2885
2886	case 0:
2887		/*
2888		 * New allocation is not contiguous with another
2889		 * real allocation.
2890		 * Insert a new entry.
2891		 */
2892		xfs_iext_insert(ip, icur, new, state);
2893		ifp->if_nextents++;
2894
2895		if (cur == NULL) {
2896			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2897		} else {
2898			rval = XFS_ILOG_CORE;
2899			error = xfs_bmbt_lookup_eq(cur, new, &i);
2900			if (error)
2901				goto done;
2902			if (XFS_IS_CORRUPT(mp, i != 0)) {
2903				error = -EFSCORRUPTED;
2904				goto done;
2905			}
2906			error = xfs_btree_insert(cur, &i);
2907			if (error)
2908				goto done;
2909			if (XFS_IS_CORRUPT(mp, i != 1)) {
2910				error = -EFSCORRUPTED;
2911				goto done;
2912			}
2913		}
2914		break;
2915	}
2916
2917	/* add reverse mapping unless caller opted out */
2918	if (!(flags & XFS_BMAPI_NORMAP))
2919		xfs_rmap_map_extent(tp, ip, whichfork, new);
2920
2921	/* convert to a btree if necessary */
2922	if (xfs_bmap_needs_btree(ip, whichfork)) {
2923		int	tmp_logflags;	/* partial log flag return val */
2924
2925		ASSERT(cur == NULL);
2926		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2927				&tmp_logflags, whichfork);
2928		*logflagsp |= tmp_logflags;
2929		cur = *curp;
2930		if (error)
2931			goto done;
2932	}
2933
2934	/* clear out the allocated field, done with it now in any case. */
2935	if (cur)
2936		cur->bc_ino.allocated = 0;
2937
2938	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2939done:
2940	*logflagsp |= rval;
2941	return error;
2942}
2943
2944/*
2945 * Functions used in the extent read, allocate and remove paths
2946 */
2947
2948/*
2949 * Adjust the size of the new extent based on di_extsize and rt extsize.
2950 */
2951int
2952xfs_bmap_extsize_align(
2953	xfs_mount_t	*mp,
2954	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2955	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2956	xfs_extlen_t	extsz,		/* align to this extent size */
2957	int		rt,		/* is this a realtime inode? */
2958	int		eof,		/* is extent at end-of-file? */
2959	int		delay,		/* creating delalloc extent? */
2960	int		convert,	/* overwriting unwritten extent? */
2961	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2962	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2963{
2964	xfs_fileoff_t	orig_off;	/* original offset */
2965	xfs_extlen_t	orig_alen;	/* original length */
2966	xfs_fileoff_t	orig_end;	/* original off+len */
2967	xfs_fileoff_t	nexto;		/* next file offset */
2968	xfs_fileoff_t	prevo;		/* previous file offset */
2969	xfs_fileoff_t	align_off;	/* temp for offset */
2970	xfs_extlen_t	align_alen;	/* temp for length */
2971	xfs_extlen_t	temp;		/* temp for calculations */
2972
2973	if (convert)
2974		return 0;
2975
2976	orig_off = align_off = *offp;
2977	orig_alen = align_alen = *lenp;
2978	orig_end = orig_off + orig_alen;
2979
2980	/*
2981	 * If this request overlaps an existing extent, then don't
2982	 * attempt to perform any additional alignment.
2983	 */
2984	if (!delay && !eof &&
2985	    (orig_off >= gotp->br_startoff) &&
2986	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2987		return 0;
2988	}
2989
2990	/*
2991	 * If the file offset is unaligned vs. the extent size
2992	 * we need to align it.  This will be possible unless
2993	 * the file was previously written with a kernel that didn't
2994	 * perform this alignment, or if a truncate shot us in the
2995	 * foot.
2996	 */
2997	div_u64_rem(orig_off, extsz, &temp);
2998	if (temp) {
2999		align_alen += temp;
3000		align_off -= temp;
3001	}
3002
3003	/* Same adjustment for the end of the requested area. */
3004	temp = (align_alen % extsz);
3005	if (temp)
3006		align_alen += extsz - temp;
3007
3008	/*
3009	 * For large extent hint sizes, the aligned extent might be larger than
3010	 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3011	 * the length back under MAXEXTLEN. The outer allocation loops handle
3012	 * short allocation just fine, so it is safe to do this. We only want to
3013	 * do it when we are forced to, though, because it means more allocation
3014	 * operations are required.
3015	 */
3016	while (align_alen > MAXEXTLEN)
3017		align_alen -= extsz;
3018	ASSERT(align_alen <= MAXEXTLEN);
3019
3020	/*
3021	 * If the previous block overlaps with this proposed allocation
3022	 * then move the start forward without adjusting the length.
3023	 */
3024	if (prevp->br_startoff != NULLFILEOFF) {
3025		if (prevp->br_startblock == HOLESTARTBLOCK)
3026			prevo = prevp->br_startoff;
3027		else
3028			prevo = prevp->br_startoff + prevp->br_blockcount;
3029	} else
3030		prevo = 0;
3031	if (align_off != orig_off && align_off < prevo)
3032		align_off = prevo;
3033	/*
3034	 * If the next block overlaps with this proposed allocation
3035	 * then move the start back without adjusting the length,
3036	 * but not before offset 0.
3037	 * This may of course make the start overlap previous block,
3038	 * and if we hit the offset 0 limit then the next block
3039	 * can still overlap too.
3040	 */
3041	if (!eof && gotp->br_startoff != NULLFILEOFF) {
3042		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3043		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3044			nexto = gotp->br_startoff + gotp->br_blockcount;
3045		else
3046			nexto = gotp->br_startoff;
3047	} else
3048		nexto = NULLFILEOFF;
3049	if (!eof &&
3050	    align_off + align_alen != orig_end &&
3051	    align_off + align_alen > nexto)
3052		align_off = nexto > align_alen ? nexto - align_alen : 0;
3053	/*
3054	 * If we're now overlapping the next or previous extent that
3055	 * means we can't fit an extsz piece in this hole.  Just move
3056	 * the start forward to the first valid spot and set
3057	 * the length so we hit the end.
3058	 */
3059	if (align_off != orig_off && align_off < prevo)
3060		align_off = prevo;
3061	if (align_off + align_alen != orig_end &&
3062	    align_off + align_alen > nexto &&
3063	    nexto != NULLFILEOFF) {
3064		ASSERT(nexto > prevo);
3065		align_alen = nexto - align_off;
3066	}
3067
3068	/*
3069	 * If realtime, and the result isn't a multiple of the realtime
3070	 * extent size we need to remove blocks until it is.
3071	 */
3072	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3073		/*
3074		 * We're not covering the original request, or
3075		 * we won't be able to once we fix the length.
3076		 */
3077		if (orig_off < align_off ||
3078		    orig_end > align_off + align_alen ||
3079		    align_alen - temp < orig_alen)
3080			return -EINVAL;
3081		/*
3082		 * Try to fix it by moving the start up.
3083		 */
3084		if (align_off + temp <= orig_off) {
3085			align_alen -= temp;
3086			align_off += temp;
3087		}
3088		/*
3089		 * Try to fix it by moving the end in.
3090		 */
3091		else if (align_off + align_alen - temp >= orig_end)
3092			align_alen -= temp;
3093		/*
3094		 * Set the start to the minimum then trim the length.
3095		 */
3096		else {
3097			align_alen -= orig_off - align_off;
3098			align_off = orig_off;
3099			align_alen -= align_alen % mp->m_sb.sb_rextsize;
3100		}
3101		/*
3102		 * Result doesn't cover the request, fail it.
3103		 */
3104		if (orig_off < align_off || orig_end > align_off + align_alen)
3105			return -EINVAL;
3106	} else {
3107		ASSERT(orig_off >= align_off);
3108		/* see MAXEXTLEN handling above */
3109		ASSERT(orig_end <= align_off + align_alen ||
3110		       align_alen + extsz > MAXEXTLEN);
3111	}
3112
3113#ifdef DEBUG
3114	if (!eof && gotp->br_startoff != NULLFILEOFF)
3115		ASSERT(align_off + align_alen <= gotp->br_startoff);
3116	if (prevp->br_startoff != NULLFILEOFF)
3117		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3118#endif
3119
3120	*lenp = align_alen;
3121	*offp = align_off;
3122	return 0;
3123}
3124
3125#define XFS_ALLOC_GAP_UNITS	4
3126
3127void
3128xfs_bmap_adjacent(
3129	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3130{
3131	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
3132	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3133	xfs_mount_t	*mp;		/* mount point structure */
3134	int		nullfb;		/* true if ap->firstblock isn't set */
3135	int		rt;		/* true if inode is realtime */
3136
3137#define	ISVALID(x,y)	\
3138	(rt ? \
3139		(x) < mp->m_sb.sb_rblocks : \
3140		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3141		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3142		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3143
3144	mp = ap->ip->i_mount;
3145	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3146	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3147		(ap->datatype & XFS_ALLOC_USERDATA);
3148	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3149							ap->tp->t_firstblock);
3150	/*
3151	 * If allocating at eof, and there's a previous real block,
3152	 * try to use its last block as our starting point.
3153	 */
3154	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3155	    !isnullstartblock(ap->prev.br_startblock) &&
3156	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3157		    ap->prev.br_startblock)) {
3158		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3159		/*
3160		 * Adjust for the gap between prevp and us.
3161		 */
3162		adjust = ap->offset -
3163			(ap->prev.br_startoff + ap->prev.br_blockcount);
3164		if (adjust &&
3165		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3166			ap->blkno += adjust;
3167	}
3168	/*
3169	 * If not at eof, then compare the two neighbor blocks.
3170	 * Figure out whether either one gives us a good starting point,
3171	 * and pick the better one.
3172	 */
3173	else if (!ap->eof) {
3174		xfs_fsblock_t	gotbno;		/* right side block number */
3175		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3176		xfs_fsblock_t	prevbno;	/* left side block number */
3177		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3178
3179		/*
3180		 * If there's a previous (left) block, select a requested
3181		 * start block based on it.
3182		 */
3183		if (ap->prev.br_startoff != NULLFILEOFF &&
3184		    !isnullstartblock(ap->prev.br_startblock) &&
3185		    (prevbno = ap->prev.br_startblock +
3186			       ap->prev.br_blockcount) &&
3187		    ISVALID(prevbno, ap->prev.br_startblock)) {
3188			/*
3189			 * Calculate gap to end of previous block.
3190			 */
3191			adjust = prevdiff = ap->offset -
3192				(ap->prev.br_startoff +
3193				 ap->prev.br_blockcount);
3194			/*
3195			 * Figure the startblock based on the previous block's
3196			 * end and the gap size.
3197			 * Heuristic!
3198			 * If the gap is large relative to the piece we're
3199			 * allocating, or using it gives us an invalid block
3200			 * number, then just use the end of the previous block.
3201			 */
3202			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3203			    ISVALID(prevbno + prevdiff,
3204				    ap->prev.br_startblock))
3205				prevbno += adjust;
3206			else
3207				prevdiff += adjust;
3208			/*
3209			 * If the firstblock forbids it, can't use it,
3210			 * must use default.
3211			 */
3212			if (!rt && !nullfb &&
3213			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3214				prevbno = NULLFSBLOCK;
3215		}
3216		/*
3217		 * No previous block or can't follow it, just default.
3218		 */
3219		else
3220			prevbno = NULLFSBLOCK;
3221		/*
3222		 * If there's a following (right) block, select a requested
3223		 * start block based on it.
3224		 */
3225		if (!isnullstartblock(ap->got.br_startblock)) {
3226			/*
3227			 * Calculate gap to start of next block.
3228			 */
3229			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3230			/*
3231			 * Figure the startblock based on the next block's
3232			 * start and the gap size.
3233			 */
3234			gotbno = ap->got.br_startblock;
3235			/*
3236			 * Heuristic!
3237			 * If the gap is large relative to the piece we're
3238			 * allocating, or using it gives us an invalid block
3239			 * number, then just use the start of the next block
3240			 * offset by our length.
3241			 */
3242			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3243			    ISVALID(gotbno - gotdiff, gotbno))
3244				gotbno -= adjust;
3245			else if (ISVALID(gotbno - ap->length, gotbno)) {
3246				gotbno -= ap->length;
3247				gotdiff += adjust - ap->length;
3248			} else
3249				gotdiff += adjust;
3250			/*
3251			 * If the firstblock forbids it, can't use it,
3252			 * must use default.
3253			 */
3254			if (!rt && !nullfb &&
3255			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3256				gotbno = NULLFSBLOCK;
3257		}
3258		/*
3259		 * No next block, just default.
3260		 */
3261		else
3262			gotbno = NULLFSBLOCK;
3263		/*
3264		 * If both valid, pick the better one, else the only good
3265		 * one, else ap->blkno is already set (to 0 or the inode block).
3266		 */
3267		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3268			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3269		else if (prevbno != NULLFSBLOCK)
3270			ap->blkno = prevbno;
3271		else if (gotbno != NULLFSBLOCK)
3272			ap->blkno = gotbno;
3273	}
3274#undef ISVALID
3275}
3276
3277static int
3278xfs_bmap_longest_free_extent(
3279	struct xfs_trans	*tp,
3280	xfs_agnumber_t		ag,
3281	xfs_extlen_t		*blen,
3282	int			*notinit)
3283{
3284	struct xfs_mount	*mp = tp->t_mountp;
3285	struct xfs_perag	*pag;
3286	xfs_extlen_t		longest;
3287	int			error = 0;
3288
3289	pag = xfs_perag_get(mp, ag);
3290	if (!pag->pagf_init) {
3291		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3292		if (error) {
3293			/* Couldn't lock the AGF, so skip this AG. */
3294			if (error == -EAGAIN) {
3295				*notinit = 1;
3296				error = 0;
3297			}
3298			goto out;
3299		}
3300	}
3301
3302	longest = xfs_alloc_longest_free_extent(pag,
3303				xfs_alloc_min_freelist(mp, pag),
3304				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3305	if (*blen < longest)
3306		*blen = longest;
3307
3308out:
3309	xfs_perag_put(pag);
3310	return error;
3311}
3312
3313static void
3314xfs_bmap_select_minlen(
3315	struct xfs_bmalloca	*ap,
3316	struct xfs_alloc_arg	*args,
3317	xfs_extlen_t		*blen,
3318	int			notinit)
3319{
3320	if (notinit || *blen < ap->minlen) {
3321		/*
3322		 * Since we did a BUF_TRYLOCK above, it is possible that
3323		 * there is space for this request.
3324		 */
3325		args->minlen = ap->minlen;
3326	} else if (*blen < args->maxlen) {
3327		/*
3328		 * If the best seen length is less than the request length,
3329		 * use the best as the minimum.
3330		 */
3331		args->minlen = *blen;
3332	} else {
3333		/*
3334		 * Otherwise we've seen an extent as big as maxlen, use that
3335		 * as the minimum.
3336		 */
3337		args->minlen = args->maxlen;
3338	}
3339}
3340
3341STATIC int
3342xfs_bmap_btalloc_nullfb(
3343	struct xfs_bmalloca	*ap,
3344	struct xfs_alloc_arg	*args,
3345	xfs_extlen_t		*blen)
3346{
3347	struct xfs_mount	*mp = ap->ip->i_mount;
3348	xfs_agnumber_t		ag, startag;
3349	int			notinit = 0;
3350	int			error;
3351
3352	args->type = XFS_ALLOCTYPE_START_BNO;
3353	args->total = ap->total;
3354
3355	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3356	if (startag == NULLAGNUMBER)
3357		startag = ag = 0;
3358
3359	while (*blen < args->maxlen) {
3360		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3361						     &notinit);
3362		if (error)
3363			return error;
3364
3365		if (++ag == mp->m_sb.sb_agcount)
3366			ag = 0;
3367		if (ag == startag)
3368			break;
3369	}
3370
3371	xfs_bmap_select_minlen(ap, args, blen, notinit);
3372	return 0;
3373}
3374
3375STATIC int
3376xfs_bmap_btalloc_filestreams(
3377	struct xfs_bmalloca	*ap,
3378	struct xfs_alloc_arg	*args,
3379	xfs_extlen_t		*blen)
3380{
3381	struct xfs_mount	*mp = ap->ip->i_mount;
3382	xfs_agnumber_t		ag;
3383	int			notinit = 0;
3384	int			error;
3385
3386	args->type = XFS_ALLOCTYPE_NEAR_BNO;
3387	args->total = ap->total;
3388
3389	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3390	if (ag == NULLAGNUMBER)
3391		ag = 0;
3392
3393	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3394	if (error)
3395		return error;
3396
3397	if (*blen < args->maxlen) {
3398		error = xfs_filestream_new_ag(ap, &ag);
3399		if (error)
3400			return error;
3401
3402		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3403						     &notinit);
3404		if (error)
3405			return error;
3406
3407	}
3408
3409	xfs_bmap_select_minlen(ap, args, blen, notinit);
3410
3411	/*
3412	 * Set the failure fallback case to look in the selected AG as stream
3413	 * may have moved.
3414	 */
3415	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3416	return 0;
3417}
3418
3419/* Update all inode and quota accounting for the allocation we just did. */
3420static void
3421xfs_bmap_btalloc_accounting(
3422	struct xfs_bmalloca	*ap,
3423	struct xfs_alloc_arg	*args)
3424{
3425	if (ap->flags & XFS_BMAPI_COWFORK) {
3426		/*
3427		 * COW fork blocks are in-core only and thus are treated as
3428		 * in-core quota reservation (like delalloc blocks) even when
3429		 * converted to real blocks. The quota reservation is not
3430		 * accounted to disk until blocks are remapped to the data
3431		 * fork. So if these blocks were previously delalloc, we
3432		 * already have quota reservation and there's nothing to do
3433		 * yet.
3434		 */
3435		if (ap->wasdel) {
3436			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3437			return;
3438		}
3439
3440		/*
3441		 * Otherwise, we've allocated blocks in a hole. The transaction
3442		 * has acquired in-core quota reservation for this extent.
3443		 * Rather than account these as real blocks, however, we reduce
3444		 * the transaction quota reservation based on the allocation.
3445		 * This essentially transfers the transaction quota reservation
3446		 * to that of a delalloc extent.
3447		 */
3448		ap->ip->i_delayed_blks += args->len;
3449		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3450				-(long)args->len);
3451		return;
3452	}
3453
3454	/* data/attr fork only */
3455	ap->ip->i_d.di_nblocks += args->len;
3456	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3457	if (ap->wasdel) {
3458		ap->ip->i_delayed_blks -= args->len;
3459		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3460	}
3461	xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3462		ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3463		args->len);
3464}
3465
3466STATIC int
3467xfs_bmap_btalloc(
3468	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3469{
3470	xfs_mount_t	*mp;		/* mount point structure */
3471	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
3472	xfs_extlen_t	align = 0;	/* minimum allocation alignment */
3473	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3474	xfs_agnumber_t	ag;
3475	xfs_alloc_arg_t	args;
3476	xfs_fileoff_t	orig_offset;
3477	xfs_extlen_t	orig_length;
3478	xfs_extlen_t	blen;
3479	xfs_extlen_t	nextminlen = 0;
3480	int		nullfb;		/* true if ap->firstblock isn't set */
3481	int		isaligned;
3482	int		tryagain;
3483	int		error;
3484	int		stripe_align;
3485
3486	ASSERT(ap->length);
3487	orig_offset = ap->offset;
3488	orig_length = ap->length;
3489
3490	mp = ap->ip->i_mount;
3491
3492	/* stripe alignment for allocation is determined by mount parameters */
3493	stripe_align = 0;
3494	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3495		stripe_align = mp->m_swidth;
3496	else if (mp->m_dalign)
3497		stripe_align = mp->m_dalign;
3498
3499	if (ap->flags & XFS_BMAPI_COWFORK)
3500		align = xfs_get_cowextsz_hint(ap->ip);
3501	else if (ap->datatype & XFS_ALLOC_USERDATA)
3502		align = xfs_get_extsz_hint(ap->ip);
3503	if (align) {
3504		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3505						align, 0, ap->eof, 0, ap->conv,
3506						&ap->offset, &ap->length);
3507		ASSERT(!error);
3508		ASSERT(ap->length);
3509	}
3510
3511
3512	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3513	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3514							ap->tp->t_firstblock);
3515	if (nullfb) {
3516		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3517		    xfs_inode_is_filestream(ap->ip)) {
3518			ag = xfs_filestream_lookup_ag(ap->ip);
3519			ag = (ag != NULLAGNUMBER) ? ag : 0;
3520			ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3521		} else {
3522			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3523		}
3524	} else
3525		ap->blkno = ap->tp->t_firstblock;
3526
3527	xfs_bmap_adjacent(ap);
3528
3529	/*
3530	 * If allowed, use ap->blkno; otherwise must use firstblock since
3531	 * it's in the right allocation group.
3532	 */
3533	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3534		;
3535	else
3536		ap->blkno = ap->tp->t_firstblock;
3537	/*
3538	 * Normal allocation, done through xfs_alloc_vextent.
3539	 */
3540	tryagain = isaligned = 0;
3541	memset(&args, 0, sizeof(args));
3542	args.tp = ap->tp;
3543	args.mp = mp;
3544	args.fsbno = ap->blkno;
3545	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3546
3547	/* Trim the allocation back to the maximum an AG can fit. */
3548	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3549	blen = 0;
3550	if (nullfb) {
3551		/*
3552		 * Search for an allocation group with a single extent large
3553		 * enough for the request.  If one isn't found, then adjust
3554		 * the minimum allocation size to the largest space found.
3555		 */
3556		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3557		    xfs_inode_is_filestream(ap->ip))
3558			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3559		else
3560			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3561		if (error)
3562			return error;
3563	} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3564		if (xfs_inode_is_filestream(ap->ip))
3565			args.type = XFS_ALLOCTYPE_FIRST_AG;
3566		else
3567			args.type = XFS_ALLOCTYPE_START_BNO;
3568		args.total = args.minlen = ap->minlen;
3569	} else {
3570		args.type = XFS_ALLOCTYPE_NEAR_BNO;
3571		args.total = ap->total;
3572		args.minlen = ap->minlen;
3573	}
3574	/* apply extent size hints if obtained earlier */
3575	if (align) {
3576		args.prod = align;
3577		div_u64_rem(ap->offset, args.prod, &args.mod);
3578		if (args.mod)
3579			args.mod = args.prod - args.mod;
3580	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3581		args.prod = 1;
3582		args.mod = 0;
3583	} else {
3584		args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3585		div_u64_rem(ap->offset, args.prod, &args.mod);
3586		if (args.mod)
3587			args.mod = args.prod - args.mod;
3588	}
3589	/*
3590	 * If we are not low on available data blocks, and the underlying
3591	 * logical volume manager is a stripe, and the file offset is zero then
3592	 * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3593	 * is only set if the allocation length is >= the stripe unit and the
3594	 * allocation offset is at the end of file.
3595	 */
3596	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3597		if (!ap->offset) {
3598			args.alignment = stripe_align;
3599			atype = args.type;
3600			isaligned = 1;
3601			/*
3602			 * Adjust minlen to try and preserve alignment if we
3603			 * can't guarantee an aligned maxlen extent.
3604			 */
3605			if (blen > args.alignment &&
3606			    blen <= args.maxlen + args.alignment)
3607				args.minlen = blen - args.alignment;
3608			args.minalignslop = 0;
3609		} else {
3610			/*
3611			 * First try an exact bno allocation.
3612			 * If it fails then do a near or start bno
3613			 * allocation with alignment turned on.
3614			 */
3615			atype = args.type;
3616			tryagain = 1;
3617			args.type = XFS_ALLOCTYPE_THIS_BNO;
3618			args.alignment = 1;
3619			/*
3620			 * Compute the minlen+alignment for the
3621			 * next case.  Set slop so that the value
3622			 * of minlen+alignment+slop doesn't go up
3623			 * between the calls.
3624			 */
3625			if (blen > stripe_align && blen <= args.maxlen)
3626				nextminlen = blen - stripe_align;
3627			else
3628				nextminlen = args.minlen;
3629			if (nextminlen + stripe_align > args.minlen + 1)
3630				args.minalignslop =
3631					nextminlen + stripe_align -
3632					args.minlen - 1;
3633			else
3634				args.minalignslop = 0;
3635		}
3636	} else {
3637		args.alignment = 1;
3638		args.minalignslop = 0;
3639	}
3640	args.minleft = ap->minleft;
3641	args.wasdel = ap->wasdel;
3642	args.resv = XFS_AG_RESV_NONE;
3643	args.datatype = ap->datatype;
3644
3645	error = xfs_alloc_vextent(&args);
3646	if (error)
3647		return error;
3648
3649	if (tryagain && args.fsbno == NULLFSBLOCK) {
3650		/*
3651		 * Exact allocation failed. Now try with alignment
3652		 * turned on.
3653		 */
3654		args.type = atype;
3655		args.fsbno = ap->blkno;
3656		args.alignment = stripe_align;
3657		args.minlen = nextminlen;
3658		args.minalignslop = 0;
3659		isaligned = 1;
3660		if ((error = xfs_alloc_vextent(&args)))
3661			return error;
3662	}
3663	if (isaligned && args.fsbno == NULLFSBLOCK) {
3664		/*
3665		 * allocation failed, so turn off alignment and
3666		 * try again.
3667		 */
3668		args.type = atype;
3669		args.fsbno = ap->blkno;
3670		args.alignment = 0;
3671		if ((error = xfs_alloc_vextent(&args)))
3672			return error;
3673	}
3674	if (args.fsbno == NULLFSBLOCK && nullfb &&
3675	    args.minlen > ap->minlen) {
3676		args.minlen = ap->minlen;
3677		args.type = XFS_ALLOCTYPE_START_BNO;
3678		args.fsbno = ap->blkno;
3679		if ((error = xfs_alloc_vextent(&args)))
3680			return error;
3681	}
3682	if (args.fsbno == NULLFSBLOCK && nullfb) {
3683		args.fsbno = 0;
3684		args.type = XFS_ALLOCTYPE_FIRST_AG;
3685		args.total = ap->minlen;
3686		if ((error = xfs_alloc_vextent(&args)))
3687			return error;
3688		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3689	}
3690	if (args.fsbno != NULLFSBLOCK) {
3691		/*
3692		 * check the allocation happened at the same or higher AG than
3693		 * the first block that was allocated.
3694		 */
3695		ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
3696		       XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
3697		       XFS_FSB_TO_AGNO(mp, args.fsbno));
3698
3699		ap->blkno = args.fsbno;
3700		if (ap->tp->t_firstblock == NULLFSBLOCK)
3701			ap->tp->t_firstblock = args.fsbno;
3702		ASSERT(nullfb || fb_agno <= args.agno);
3703		ap->length = args.len;
3704		/*
3705		 * If the extent size hint is active, we tried to round the
3706		 * caller's allocation request offset down to extsz and the
3707		 * length up to another extsz boundary.  If we found a free
3708		 * extent we mapped it in starting at this new offset.  If the
3709		 * newly mapped space isn't long enough to cover any of the
3710		 * range of offsets that was originally requested, move the
3711		 * mapping up so that we can fill as much of the caller's
3712		 * original request as possible.  Free space is apparently
3713		 * very fragmented so we're unlikely to be able to satisfy the
3714		 * hints anyway.
3715		 */
3716		if (ap->length <= orig_length)
3717			ap->offset = orig_offset;
3718		else if (ap->offset + ap->length < orig_offset + orig_length)
3719			ap->offset = orig_offset + orig_length - ap->length;
3720		xfs_bmap_btalloc_accounting(ap, &args);
3721	} else {
3722		ap->blkno = NULLFSBLOCK;
3723		ap->length = 0;
3724	}
3725	return 0;
3726}
3727
3728/* Trim extent to fit a logical block range. */
3729void
3730xfs_trim_extent(
3731	struct xfs_bmbt_irec	*irec,
3732	xfs_fileoff_t		bno,
3733	xfs_filblks_t		len)
3734{
3735	xfs_fileoff_t		distance;
3736	xfs_fileoff_t		end = bno + len;
3737
3738	if (irec->br_startoff + irec->br_blockcount <= bno ||
3739	    irec->br_startoff >= end) {
3740		irec->br_blockcount = 0;
3741		return;
3742	}
3743
3744	if (irec->br_startoff < bno) {
3745		distance = bno - irec->br_startoff;
3746		if (isnullstartblock(irec->br_startblock))
3747			irec->br_startblock = DELAYSTARTBLOCK;
3748		if (irec->br_startblock != DELAYSTARTBLOCK &&
3749		    irec->br_startblock != HOLESTARTBLOCK)
3750			irec->br_startblock += distance;
3751		irec->br_startoff += distance;
3752		irec->br_blockcount -= distance;
3753	}
3754
3755	if (end < irec->br_startoff + irec->br_blockcount) {
3756		distance = irec->br_startoff + irec->br_blockcount - end;
3757		irec->br_blockcount -= distance;
3758	}
3759}
3760
3761/*
3762 * Trim the returned map to the required bounds
3763 */
3764STATIC void
3765xfs_bmapi_trim_map(
3766	struct xfs_bmbt_irec	*mval,
3767	struct xfs_bmbt_irec	*got,
3768	xfs_fileoff_t		*bno,
3769	xfs_filblks_t		len,
3770	xfs_fileoff_t		obno,
3771	xfs_fileoff_t		end,
3772	int			n,
3773	int			flags)
3774{
3775	if ((flags & XFS_BMAPI_ENTIRE) ||
3776	    got->br_startoff + got->br_blockcount <= obno) {
3777		*mval = *got;
3778		if (isnullstartblock(got->br_startblock))
3779			mval->br_startblock = DELAYSTARTBLOCK;
3780		return;
3781	}
3782
3783	if (obno > *bno)
3784		*bno = obno;
3785	ASSERT((*bno >= obno) || (n == 0));
3786	ASSERT(*bno < end);
3787	mval->br_startoff = *bno;
3788	if (isnullstartblock(got->br_startblock))
3789		mval->br_startblock = DELAYSTARTBLOCK;
3790	else
3791		mval->br_startblock = got->br_startblock +
3792					(*bno - got->br_startoff);
3793	/*
3794	 * Return the minimum of what we got and what we asked for for
3795	 * the length.  We can use the len variable here because it is
3796	 * modified below and we could have been there before coming
3797	 * here if the first part of the allocation didn't overlap what
3798	 * was asked for.
3799	 */
3800	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3801			got->br_blockcount - (*bno - got->br_startoff));
3802	mval->br_state = got->br_state;
3803	ASSERT(mval->br_blockcount <= len);
3804	return;
3805}
3806
3807/*
3808 * Update and validate the extent map to return
3809 */
3810STATIC void
3811xfs_bmapi_update_map(
3812	struct xfs_bmbt_irec	**map,
3813	xfs_fileoff_t		*bno,
3814	xfs_filblks_t		*len,
3815	xfs_fileoff_t		obno,
3816	xfs_fileoff_t		end,
3817	int			*n,
3818	int			flags)
3819{
3820	xfs_bmbt_irec_t	*mval = *map;
3821
3822	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3823	       ((mval->br_startoff + mval->br_blockcount) <= end));
3824	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3825	       (mval->br_startoff < obno));
3826
3827	*bno = mval->br_startoff + mval->br_blockcount;
3828	*len = end - *bno;
3829	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3830		/* update previous map with new information */
3831		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3832		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3833		ASSERT(mval->br_state == mval[-1].br_state);
3834		mval[-1].br_blockcount = mval->br_blockcount;
3835		mval[-1].br_state = mval->br_state;
3836	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3837		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3838		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3839		   mval->br_startblock == mval[-1].br_startblock +
3840					  mval[-1].br_blockcount &&
3841		   mval[-1].br_state == mval->br_state) {
3842		ASSERT(mval->br_startoff ==
3843		       mval[-1].br_startoff + mval[-1].br_blockcount);
3844		mval[-1].br_blockcount += mval->br_blockcount;
3845	} else if (*n > 0 &&
3846		   mval->br_startblock == DELAYSTARTBLOCK &&
3847		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3848		   mval->br_startoff ==
3849		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3850		mval[-1].br_blockcount += mval->br_blockcount;
3851		mval[-1].br_state = mval->br_state;
3852	} else if (!((*n == 0) &&
3853		     ((mval->br_startoff + mval->br_blockcount) <=
3854		      obno))) {
3855		mval++;
3856		(*n)++;
3857	}
3858	*map = mval;
3859}
3860
3861/*
3862 * Map file blocks to filesystem blocks without allocation.
3863 */
3864int
3865xfs_bmapi_read(
3866	struct xfs_inode	*ip,
3867	xfs_fileoff_t		bno,
3868	xfs_filblks_t		len,
3869	struct xfs_bmbt_irec	*mval,
3870	int			*nmap,
3871	int			flags)
3872{
3873	struct xfs_mount	*mp = ip->i_mount;
3874	int			whichfork = xfs_bmapi_whichfork(flags);
3875	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3876	struct xfs_bmbt_irec	got;
3877	xfs_fileoff_t		obno;
3878	xfs_fileoff_t		end;
3879	struct xfs_iext_cursor	icur;
3880	int			error;
3881	bool			eof = false;
3882	int			n = 0;
3883
3884	ASSERT(*nmap >= 1);
3885	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3886	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3887
3888	if (WARN_ON_ONCE(!ifp))
3889		return -EFSCORRUPTED;
3890
3891	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3892	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3893		return -EFSCORRUPTED;
3894
3895	if (XFS_FORCED_SHUTDOWN(mp))
3896		return -EIO;
3897
3898	XFS_STATS_INC(mp, xs_blk_mapr);
3899
3900	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3901		error = xfs_iread_extents(NULL, ip, whichfork);
3902		if (error)
3903			return error;
3904	}
3905
3906	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3907		eof = true;
3908	end = bno + len;
3909	obno = bno;
3910
3911	while (bno < end && n < *nmap) {
3912		/* Reading past eof, act as though there's a hole up to end. */
3913		if (eof)
3914			got.br_startoff = end;
3915		if (got.br_startoff > bno) {
3916			/* Reading in a hole.  */
3917			mval->br_startoff = bno;
3918			mval->br_startblock = HOLESTARTBLOCK;
3919			mval->br_blockcount =
3920				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3921			mval->br_state = XFS_EXT_NORM;
3922			bno += mval->br_blockcount;
3923			len -= mval->br_blockcount;
3924			mval++;
3925			n++;
3926			continue;
3927		}
3928
3929		/* set up the extent map to return. */
3930		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3931		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3932
3933		/* If we're done, stop now. */
3934		if (bno >= end || n >= *nmap)
3935			break;
3936
3937		/* Else go on to the next record. */
3938		if (!xfs_iext_next_extent(ifp, &icur, &got))
3939			eof = true;
3940	}
3941	*nmap = n;
3942	return 0;
3943}
3944
3945/*
3946 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3947 * global pool and the extent inserted into the inode in-core extent tree.
3948 *
3949 * On entry, got refers to the first extent beyond the offset of the extent to
3950 * allocate or eof is specified if no such extent exists. On return, got refers
3951 * to the extent record that was inserted to the inode fork.
3952 *
3953 * Note that the allocated extent may have been merged with contiguous extents
3954 * during insertion into the inode fork. Thus, got does not reflect the current
3955 * state of the inode fork on return. If necessary, the caller can use lastx to
3956 * look up the updated record in the inode fork.
3957 */
3958int
3959xfs_bmapi_reserve_delalloc(
3960	struct xfs_inode	*ip,
3961	int			whichfork,
3962	xfs_fileoff_t		off,
3963	xfs_filblks_t		len,
3964	xfs_filblks_t		prealloc,
3965	struct xfs_bmbt_irec	*got,
3966	struct xfs_iext_cursor	*icur,
3967	int			eof)
3968{
3969	struct xfs_mount	*mp = ip->i_mount;
3970	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3971	xfs_extlen_t		alen;
3972	xfs_extlen_t		indlen;
3973	int			error;
3974	xfs_fileoff_t		aoff = off;
3975
3976	/*
3977	 * Cap the alloc length. Keep track of prealloc so we know whether to
3978	 * tag the inode before we return.
3979	 */
3980	alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3981	if (!eof)
3982		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3983	if (prealloc && alen >= len)
3984		prealloc = alen - len;
3985
3986	/* Figure out the extent size, adjust alen */
3987	if (whichfork == XFS_COW_FORK) {
3988		struct xfs_bmbt_irec	prev;
3989		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
3990
3991		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3992			prev.br_startoff = NULLFILEOFF;
3993
3994		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3995					       1, 0, &aoff, &alen);
3996		ASSERT(!error);
3997	}
3998
3999	/*
4000	 * Make a transaction-less quota reservation for delayed allocation
4001	 * blocks.  This number gets adjusted later.  We return if we haven't
4002	 * allocated blocks already inside this loop.
4003	 */
4004	error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4005						XFS_QMOPT_RES_REGBLKS);
4006	if (error)
4007		return error;
4008
4009	/*
4010	 * Split changing sb for alen and indlen since they could be coming
4011	 * from different places.
4012	 */
4013	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4014	ASSERT(indlen > 0);
4015
4016	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4017	if (error)
4018		goto out_unreserve_quota;
4019
4020	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4021	if (error)
4022		goto out_unreserve_blocks;
4023
4024
4025	ip->i_delayed_blks += alen;
4026	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4027
4028	got->br_startoff = aoff;
4029	got->br_startblock = nullstartblock(indlen);
4030	got->br_blockcount = alen;
4031	got->br_state = XFS_EXT_NORM;
4032
4033	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4034
4035	/*
4036	 * Tag the inode if blocks were preallocated. Note that COW fork
4037	 * preallocation can occur at the start or end of the extent, even when
4038	 * prealloc == 0, so we must also check the aligned offset and length.
4039	 */
4040	if (whichfork == XFS_DATA_FORK && prealloc)
4041		xfs_inode_set_eofblocks_tag(ip);
4042	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4043		xfs_inode_set_cowblocks_tag(ip);
4044
4045	return 0;
4046
4047out_unreserve_blocks:
4048	xfs_mod_fdblocks(mp, alen, false);
4049out_unreserve_quota:
4050	if (XFS_IS_QUOTA_ON(mp))
4051		xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
4052						XFS_QMOPT_RES_REGBLKS);
4053	return error;
4054}
4055
4056static int
4057xfs_bmap_alloc_userdata(
4058	struct xfs_bmalloca	*bma)
4059{
4060	struct xfs_mount	*mp = bma->ip->i_mount;
4061	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4062	int			error;
4063
4064	/*
4065	 * Set the data type being allocated. For the data fork, the first data
4066	 * in the file is treated differently to all other allocations. For the
4067	 * attribute fork, we only need to ensure the allocated range is not on
4068	 * the busy list.
4069	 */
4070	bma->datatype = XFS_ALLOC_NOBUSY;
4071	if (whichfork == XFS_DATA_FORK) {
4072		bma->datatype |= XFS_ALLOC_USERDATA;
4073		if (bma->offset == 0)
4074			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4075
4076		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4077			error = xfs_bmap_isaeof(bma, whichfork);
4078			if (error)
4079				return error;
4080		}
4081
4082		if (XFS_IS_REALTIME_INODE(bma->ip))
4083			return xfs_bmap_rtalloc(bma);
4084	}
4085
4086	return xfs_bmap_btalloc(bma);
4087}
4088
4089static int
4090xfs_bmapi_allocate(
4091	struct xfs_bmalloca	*bma)
4092{
4093	struct xfs_mount	*mp = bma->ip->i_mount;
4094	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4095	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4096	int			tmp_logflags = 0;
4097	int			error;
4098
4099	ASSERT(bma->length > 0);
4100
4101	/*
4102	 * For the wasdelay case, we could also just allocate the stuff asked
4103	 * for in this bmap call but that wouldn't be as good.
4104	 */
4105	if (bma->wasdel) {
4106		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4107		bma->offset = bma->got.br_startoff;
4108		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4109			bma->prev.br_startoff = NULLFILEOFF;
4110	} else {
4111		bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4112		if (!bma->eof)
4113			bma->length = XFS_FILBLKS_MIN(bma->length,
4114					bma->got.br_startoff - bma->offset);
4115	}
4116
4117	if (bma->flags & XFS_BMAPI_CONTIG)
4118		bma->minlen = bma->length;
4119	else
4120		bma->minlen = 1;
4121
4122	if (bma->flags & XFS_BMAPI_METADATA)
4123		error = xfs_bmap_btalloc(bma);
4124	else
4125		error = xfs_bmap_alloc_userdata(bma);
4126	if (error || bma->blkno == NULLFSBLOCK)
4127		return error;
4128
4129	if (bma->flags & XFS_BMAPI_ZERO) {
4130		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4131		if (error)
4132			return error;
4133	}
4134
4135	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
4136		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4137	/*
4138	 * Bump the number of extents we've allocated
4139	 * in this call.
4140	 */
4141	bma->nallocs++;
4142
4143	if (bma->cur)
4144		bma->cur->bc_ino.flags =
4145			bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4146
4147	bma->got.br_startoff = bma->offset;
4148	bma->got.br_startblock = bma->blkno;
4149	bma->got.br_blockcount = bma->length;
4150	bma->got.br_state = XFS_EXT_NORM;
4151
4152	if (bma->flags & XFS_BMAPI_PREALLOC)
4153		bma->got.br_state = XFS_EXT_UNWRITTEN;
4154
4155	if (bma->wasdel)
4156		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4157	else
4158		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4159				whichfork, &bma->icur, &bma->cur, &bma->got,
4160				&bma->logflags, bma->flags);
4161
4162	bma->logflags |= tmp_logflags;
4163	if (error)
4164		return error;
4165
4166	/*
4167	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4168	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4169	 * the neighbouring ones.
4170	 */
4171	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4172
4173	ASSERT(bma->got.br_startoff <= bma->offset);
4174	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4175	       bma->offset + bma->length);
4176	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4177	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4178	return 0;
4179}
4180
4181STATIC int
4182xfs_bmapi_convert_unwritten(
4183	struct xfs_bmalloca	*bma,
4184	struct xfs_bmbt_irec	*mval,
4185	xfs_filblks_t		len,
4186	int			flags)
4187{
4188	int			whichfork = xfs_bmapi_whichfork(flags);
4189	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4190	int			tmp_logflags = 0;
4191	int			error;
4192
4193	/* check if we need to do unwritten->real conversion */
4194	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4195	    (flags & XFS_BMAPI_PREALLOC))
4196		return 0;
4197
4198	/* check if we need to do real->unwritten conversion */
4199	if (mval->br_state == XFS_EXT_NORM &&
4200	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4201			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4202		return 0;
4203
4204	/*
4205	 * Modify (by adding) the state flag, if writing.
4206	 */
4207	ASSERT(mval->br_blockcount <= len);
4208	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4209		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4210					bma->ip, whichfork);
4211	}
4212	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4213				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4214
4215	/*
4216	 * Before insertion into the bmbt, zero the range being converted
4217	 * if required.
4218	 */
4219	if (flags & XFS_BMAPI_ZERO) {
4220		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4221					mval->br_blockcount);
4222		if (error)
4223			return error;
4224	}
4225
4226	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4227			&bma->icur, &bma->cur, mval, &tmp_logflags);
4228	/*
4229	 * Log the inode core unconditionally in the unwritten extent conversion
4230	 * path because the conversion might not have done so (e.g., if the
4231	 * extent count hasn't changed). We need to make sure the inode is dirty
4232	 * in the transaction for the sake of fsync(), even if nothing has
4233	 * changed, because fsync() will not force the log for this transaction
4234	 * unless it sees the inode pinned.
4235	 *
4236	 * Note: If we're only converting cow fork extents, there aren't
4237	 * any on-disk updates to make, so we don't need to log anything.
4238	 */
4239	if (whichfork != XFS_COW_FORK)
4240		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4241	if (error)
4242		return error;
4243
4244	/*
4245	 * Update our extent pointer, given that
4246	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4247	 * of the neighbouring ones.
4248	 */
4249	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4250
4251	/*
4252	 * We may have combined previously unwritten space with written space,
4253	 * so generate another request.
4254	 */
4255	if (mval->br_blockcount < len)
4256		return -EAGAIN;
4257	return 0;
4258}
4259
4260static inline xfs_extlen_t
4261xfs_bmapi_minleft(
4262	struct xfs_trans	*tp,
4263	struct xfs_inode	*ip,
4264	int			fork)
4265{
4266	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, fork);
4267
4268	if (tp && tp->t_firstblock != NULLFSBLOCK)
4269		return 0;
4270	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4271		return 1;
4272	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4273}
4274
4275/*
4276 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4277 * a case where the data is changed, there's an error, and it's not logged so we
4278 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4279 * we converted to the other format.
4280 */
4281static void
4282xfs_bmapi_finish(
4283	struct xfs_bmalloca	*bma,
4284	int			whichfork,
4285	int			error)
4286{
4287	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4288
4289	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4290	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4291		bma->logflags &= ~xfs_ilog_fext(whichfork);
4292	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4293		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4294		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4295
4296	if (bma->logflags)
4297		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4298	if (bma->cur)
4299		xfs_btree_del_cursor(bma->cur, error);
4300}
4301
4302/*
4303 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4304 * extent state if necessary.  Details behaviour is controlled by the flags
4305 * parameter.  Only allocates blocks from a single allocation group, to avoid
4306 * locking problems.
4307 */
4308int
4309xfs_bmapi_write(
4310	struct xfs_trans	*tp,		/* transaction pointer */
4311	struct xfs_inode	*ip,		/* incore inode */
4312	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4313	xfs_filblks_t		len,		/* length to map in file */
4314	int			flags,		/* XFS_BMAPI_... */
4315	xfs_extlen_t		total,		/* total blocks needed */
4316	struct xfs_bmbt_irec	*mval,		/* output: map values */
4317	int			*nmap)		/* i/o: mval size/count */
4318{
4319	struct xfs_bmalloca	bma = {
4320		.tp		= tp,
4321		.ip		= ip,
4322		.total		= total,
4323	};
4324	struct xfs_mount	*mp = ip->i_mount;
4325	int			whichfork = xfs_bmapi_whichfork(flags);
4326	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4327	xfs_fileoff_t		end;		/* end of mapped file region */
4328	bool			eof = false;	/* after the end of extents */
4329	int			error;		/* error return */
4330	int			n;		/* current extent index */
4331	xfs_fileoff_t		obno;		/* old block number (offset) */
4332
4333#ifdef DEBUG
4334	xfs_fileoff_t		orig_bno;	/* original block number value */
4335	int			orig_flags;	/* original flags arg value */
4336	xfs_filblks_t		orig_len;	/* original value of len arg */
4337	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4338	int			orig_nmap;	/* original value of *nmap */
4339
4340	orig_bno = bno;
4341	orig_len = len;
4342	orig_flags = flags;
4343	orig_mval = mval;
4344	orig_nmap = *nmap;
4345#endif
4346
4347	ASSERT(*nmap >= 1);
4348	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4349	ASSERT(tp != NULL);
4350	ASSERT(len > 0);
4351	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4352	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4353	ASSERT(!(flags & XFS_BMAPI_REMAP));
4354
4355	/* zeroing is for currently only for data extents, not metadata */
4356	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4357			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4358	/*
4359	 * we can allocate unwritten extents or pre-zero allocated blocks,
4360	 * but it makes no sense to do both at once. This would result in
4361	 * zeroing the unwritten extent twice, but it still being an
4362	 * unwritten extent....
4363	 */
4364	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4365			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4366
4367	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4368	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4369		return -EFSCORRUPTED;
4370	}
4371
4372	if (XFS_FORCED_SHUTDOWN(mp))
4373		return -EIO;
4374
4375	XFS_STATS_INC(mp, xs_blk_mapw);
4376
4377	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4378		error = xfs_iread_extents(tp, ip, whichfork);
4379		if (error)
4380			goto error0;
4381	}
4382
4383	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4384		eof = true;
4385	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4386		bma.prev.br_startoff = NULLFILEOFF;
4387	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4388
4389	n = 0;
4390	end = bno + len;
4391	obno = bno;
4392	while (bno < end && n < *nmap) {
4393		bool			need_alloc = false, wasdelay = false;
4394
4395		/* in hole or beyond EOF? */
4396		if (eof || bma.got.br_startoff > bno) {
4397			/*
4398			 * CoW fork conversions should /never/ hit EOF or
4399			 * holes.  There should always be something for us
4400			 * to work on.
4401			 */
4402			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4403			         (flags & XFS_BMAPI_COWFORK)));
4404
4405			need_alloc = true;
4406		} else if (isnullstartblock(bma.got.br_startblock)) {
4407			wasdelay = true;
4408		}
4409
4410		/*
4411		 * First, deal with the hole before the allocated space
4412		 * that we found, if any.
4413		 */
4414		if (need_alloc || wasdelay) {
4415			bma.eof = eof;
4416			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4417			bma.wasdel = wasdelay;
4418			bma.offset = bno;
4419			bma.flags = flags;
4420
4421			/*
4422			 * There's a 32/64 bit type mismatch between the
4423			 * allocation length request (which can be 64 bits in
4424			 * length) and the bma length request, which is
4425			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4426			 * check for 32-bit overflows and handle them here.
4427			 */
4428			if (len > (xfs_filblks_t)MAXEXTLEN)
4429				bma.length = MAXEXTLEN;
4430			else
4431				bma.length = len;
4432
4433			ASSERT(len > 0);
4434			ASSERT(bma.length > 0);
4435			error = xfs_bmapi_allocate(&bma);
4436			if (error)
4437				goto error0;
4438			if (bma.blkno == NULLFSBLOCK)
4439				break;
4440
4441			/*
4442			 * If this is a CoW allocation, record the data in
4443			 * the refcount btree for orphan recovery.
4444			 */
4445			if (whichfork == XFS_COW_FORK)
4446				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4447						bma.length);
4448		}
4449
4450		/* Deal with the allocated space we found.  */
4451		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4452							end, n, flags);
4453
4454		/* Execute unwritten extent conversion if necessary */
4455		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4456		if (error == -EAGAIN)
4457			continue;
4458		if (error)
4459			goto error0;
4460
4461		/* update the extent map to return */
4462		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4463
4464		/*
4465		 * If we're done, stop now.  Stop when we've allocated
4466		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4467		 * the transaction may get too big.
4468		 */
4469		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4470			break;
4471
4472		/* Else go on to the next record. */
4473		bma.prev = bma.got;
4474		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4475			eof = true;
4476	}
4477	*nmap = n;
4478
4479	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4480			whichfork);
4481	if (error)
4482		goto error0;
4483
4484	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4485	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4486	xfs_bmapi_finish(&bma, whichfork, 0);
4487	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4488		orig_nmap, *nmap);
4489	return 0;
4490error0:
4491	xfs_bmapi_finish(&bma, whichfork, error);
4492	return error;
4493}
4494
4495/*
4496 * Convert an existing delalloc extent to real blocks based on file offset. This
4497 * attempts to allocate the entire delalloc extent and may require multiple
4498 * invocations to allocate the target offset if a large enough physical extent
4499 * is not available.
4500 */
4501int
4502xfs_bmapi_convert_delalloc(
4503	struct xfs_inode	*ip,
4504	int			whichfork,
4505	xfs_off_t		offset,
4506	struct iomap		*iomap,
4507	unsigned int		*seq)
4508{
4509	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4510	struct xfs_mount	*mp = ip->i_mount;
4511	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4512	struct xfs_bmalloca	bma = { NULL };
4513	uint16_t		flags = 0;
4514	struct xfs_trans	*tp;
4515	int			error;
4516
4517	if (whichfork == XFS_COW_FORK)
4518		flags |= IOMAP_F_SHARED;
4519
4520	/*
4521	 * Space for the extent and indirect blocks was reserved when the
4522	 * delalloc extent was created so there's no need to do so here.
4523	 */
4524	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4525				XFS_TRANS_RESERVE, &tp);
4526	if (error)
4527		return error;
4528
4529	xfs_ilock(ip, XFS_ILOCK_EXCL);
4530	xfs_trans_ijoin(tp, ip, 0);
4531
4532	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4533	    bma.got.br_startoff > offset_fsb) {
4534		/*
4535		 * No extent found in the range we are trying to convert.  This
4536		 * should only happen for the COW fork, where another thread
4537		 * might have moved the extent to the data fork in the meantime.
4538		 */
4539		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4540		error = -EAGAIN;
4541		goto out_trans_cancel;
4542	}
4543
4544	/*
4545	 * If we find a real extent here we raced with another thread converting
4546	 * the extent.  Just return the real extent at this offset.
4547	 */
4548	if (!isnullstartblock(bma.got.br_startblock)) {
4549		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4550		*seq = READ_ONCE(ifp->if_seq);
4551		goto out_trans_cancel;
4552	}
4553
4554	bma.tp = tp;
4555	bma.ip = ip;
4556	bma.wasdel = true;
4557	bma.offset = bma.got.br_startoff;
4558	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
4559	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4560
4561	/*
4562	 * When we're converting the delalloc reservations backing dirty pages
4563	 * in the page cache, we must be careful about how we create the new
4564	 * extents:
4565	 *
4566	 * New CoW fork extents are created unwritten, turned into real extents
4567	 * when we're about to write the data to disk, and mapped into the data
4568	 * fork after the write finishes.  End of story.
4569	 *
4570	 * New data fork extents must be mapped in as unwritten and converted
4571	 * to real extents after the write succeeds to avoid exposing stale
4572	 * disk contents if we crash.
4573	 */
4574	bma.flags = XFS_BMAPI_PREALLOC;
4575	if (whichfork == XFS_COW_FORK)
4576		bma.flags |= XFS_BMAPI_COWFORK;
4577
4578	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4579		bma.prev.br_startoff = NULLFILEOFF;
4580
4581	error = xfs_bmapi_allocate(&bma);
4582	if (error)
4583		goto out_finish;
4584
4585	error = -ENOSPC;
4586	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4587		goto out_finish;
4588	error = -EFSCORRUPTED;
4589	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4590		goto out_finish;
4591
4592	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4593	XFS_STATS_INC(mp, xs_xstrat_quick);
4594
4595	ASSERT(!isnullstartblock(bma.got.br_startblock));
4596	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4597	*seq = READ_ONCE(ifp->if_seq);
4598
4599	if (whichfork == XFS_COW_FORK)
4600		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4601
4602	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4603			whichfork);
4604	if (error)
4605		goto out_finish;
4606
4607	xfs_bmapi_finish(&bma, whichfork, 0);
4608	error = xfs_trans_commit(tp);
4609	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4610	return error;
4611
4612out_finish:
4613	xfs_bmapi_finish(&bma, whichfork, error);
4614out_trans_cancel:
4615	xfs_trans_cancel(tp);
4616	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4617	return error;
4618}
4619
4620int
4621xfs_bmapi_remap(
4622	struct xfs_trans	*tp,
4623	struct xfs_inode	*ip,
4624	xfs_fileoff_t		bno,
4625	xfs_filblks_t		len,
4626	xfs_fsblock_t		startblock,
4627	int			flags)
4628{
4629	struct xfs_mount	*mp = ip->i_mount;
4630	struct xfs_ifork	*ifp;
4631	struct xfs_btree_cur	*cur = NULL;
4632	struct xfs_bmbt_irec	got;
4633	struct xfs_iext_cursor	icur;
4634	int			whichfork = xfs_bmapi_whichfork(flags);
4635	int			logflags = 0, error;
4636
4637	ifp = XFS_IFORK_PTR(ip, whichfork);
4638	ASSERT(len > 0);
4639	ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4640	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4641	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4642			   XFS_BMAPI_NORMAP)));
4643	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4644			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4645
4646	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4647	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4648		return -EFSCORRUPTED;
4649	}
4650
4651	if (XFS_FORCED_SHUTDOWN(mp))
4652		return -EIO;
4653
4654	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4655		error = xfs_iread_extents(tp, ip, whichfork);
4656		if (error)
4657			return error;
4658	}
4659
4660	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4661		/* make sure we only reflink into a hole. */
4662		ASSERT(got.br_startoff > bno);
4663		ASSERT(got.br_startoff - bno >= len);
4664	}
4665
4666	ip->i_d.di_nblocks += len;
4667	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4668
4669	if (ifp->if_flags & XFS_IFBROOT) {
4670		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4671		cur->bc_ino.flags = 0;
4672	}
4673
4674	got.br_startoff = bno;
4675	got.br_startblock = startblock;
4676	got.br_blockcount = len;
4677	if (flags & XFS_BMAPI_PREALLOC)
4678		got.br_state = XFS_EXT_UNWRITTEN;
4679	else
4680		got.br_state = XFS_EXT_NORM;
4681
4682	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4683			&cur, &got, &logflags, flags);
4684	if (error)
4685		goto error0;
4686
4687	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4688
4689error0:
4690	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4691		logflags &= ~XFS_ILOG_DEXT;
4692	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4693		logflags &= ~XFS_ILOG_DBROOT;
4694
4695	if (logflags)
4696		xfs_trans_log_inode(tp, ip, logflags);
4697	if (cur)
4698		xfs_btree_del_cursor(cur, error);
4699	return error;
4700}
4701
4702/*
4703 * When a delalloc extent is split (e.g., due to a hole punch), the original
4704 * indlen reservation must be shared across the two new extents that are left
4705 * behind.
4706 *
4707 * Given the original reservation and the worst case indlen for the two new
4708 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4709 * reservation fairly across the two new extents. If necessary, steal available
4710 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4711 * ores == 1). The number of stolen blocks is returned. The availability and
4712 * subsequent accounting of stolen blocks is the responsibility of the caller.
4713 */
4714static xfs_filblks_t
4715xfs_bmap_split_indlen(
4716	xfs_filblks_t			ores,		/* original res. */
4717	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4718	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4719	xfs_filblks_t			avail)		/* stealable blocks */
4720{
4721	xfs_filblks_t			len1 = *indlen1;
4722	xfs_filblks_t			len2 = *indlen2;
4723	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4724	xfs_filblks_t			stolen = 0;
4725	xfs_filblks_t			resfactor;
4726
4727	/*
4728	 * Steal as many blocks as we can to try and satisfy the worst case
4729	 * indlen for both new extents.
4730	 */
4731	if (ores < nres && avail)
4732		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4733	ores += stolen;
4734
4735	 /* nothing else to do if we've satisfied the new reservation */
4736	if (ores >= nres)
4737		return stolen;
4738
4739	/*
4740	 * We can't meet the total required reservation for the two extents.
4741	 * Calculate the percent of the overall shortage between both extents
4742	 * and apply this percentage to each of the requested indlen values.
4743	 * This distributes the shortage fairly and reduces the chances that one
4744	 * of the two extents is left with nothing when extents are repeatedly
4745	 * split.
4746	 */
4747	resfactor = (ores * 100);
4748	do_div(resfactor, nres);
4749	len1 *= resfactor;
4750	do_div(len1, 100);
4751	len2 *= resfactor;
4752	do_div(len2, 100);
4753	ASSERT(len1 + len2 <= ores);
4754	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4755
4756	/*
4757	 * Hand out the remainder to each extent. If one of the two reservations
4758	 * is zero, we want to make sure that one gets a block first. The loop
4759	 * below starts with len1, so hand len2 a block right off the bat if it
4760	 * is zero.
4761	 */
4762	ores -= (len1 + len2);
4763	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4764	if (ores && !len2 && *indlen2) {
4765		len2++;
4766		ores--;
4767	}
4768	while (ores) {
4769		if (len1 < *indlen1) {
4770			len1++;
4771			ores--;
4772		}
4773		if (!ores)
4774			break;
4775		if (len2 < *indlen2) {
4776			len2++;
4777			ores--;
4778		}
4779	}
4780
4781	*indlen1 = len1;
4782	*indlen2 = len2;
4783
4784	return stolen;
4785}
4786
4787int
4788xfs_bmap_del_extent_delay(
4789	struct xfs_inode	*ip,
4790	int			whichfork,
4791	struct xfs_iext_cursor	*icur,
4792	struct xfs_bmbt_irec	*got,
4793	struct xfs_bmbt_irec	*del)
4794{
4795	struct xfs_mount	*mp = ip->i_mount;
4796	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4797	struct xfs_bmbt_irec	new;
4798	int64_t			da_old, da_new, da_diff = 0;
4799	xfs_fileoff_t		del_endoff, got_endoff;
4800	xfs_filblks_t		got_indlen, new_indlen, stolen;
4801	int			state = xfs_bmap_fork_to_state(whichfork);
4802	int			error = 0;
4803	bool			isrt;
4804
4805	XFS_STATS_INC(mp, xs_del_exlist);
4806
4807	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4808	del_endoff = del->br_startoff + del->br_blockcount;
4809	got_endoff = got->br_startoff + got->br_blockcount;
4810	da_old = startblockval(got->br_startblock);
4811	da_new = 0;
4812
4813	ASSERT(del->br_blockcount > 0);
4814	ASSERT(got->br_startoff <= del->br_startoff);
4815	ASSERT(got_endoff >= del_endoff);
4816
4817	if (isrt) {
4818		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4819
4820		do_div(rtexts, mp->m_sb.sb_rextsize);
4821		xfs_mod_frextents(mp, rtexts);
4822	}
4823
4824	/*
4825	 * Update the inode delalloc counter now and wait to update the
4826	 * sb counters as we might have to borrow some blocks for the
4827	 * indirect block accounting.
4828	 */
4829	error = xfs_trans_reserve_quota_nblks(NULL, ip,
4830			-((long)del->br_blockcount), 0,
4831			isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4832	if (error)
4833		return error;
4834	ip->i_delayed_blks -= del->br_blockcount;
4835
4836	if (got->br_startoff == del->br_startoff)
4837		state |= BMAP_LEFT_FILLING;
4838	if (got_endoff == del_endoff)
4839		state |= BMAP_RIGHT_FILLING;
4840
4841	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4842	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4843		/*
4844		 * Matches the whole extent.  Delete the entry.
4845		 */
4846		xfs_iext_remove(ip, icur, state);
4847		xfs_iext_prev(ifp, icur);
4848		break;
4849	case BMAP_LEFT_FILLING:
4850		/*
4851		 * Deleting the first part of the extent.
4852		 */
4853		got->br_startoff = del_endoff;
4854		got->br_blockcount -= del->br_blockcount;
4855		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4856				got->br_blockcount), da_old);
4857		got->br_startblock = nullstartblock((int)da_new);
4858		xfs_iext_update_extent(ip, state, icur, got);
4859		break;
4860	case BMAP_RIGHT_FILLING:
4861		/*
4862		 * Deleting the last part of the extent.
4863		 */
4864		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4865		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4866				got->br_blockcount), da_old);
4867		got->br_startblock = nullstartblock((int)da_new);
4868		xfs_iext_update_extent(ip, state, icur, got);
4869		break;
4870	case 0:
4871		/*
4872		 * Deleting the middle of the extent.
4873		 *
4874		 * Distribute the original indlen reservation across the two new
4875		 * extents.  Steal blocks from the deleted extent if necessary.
4876		 * Stealing blocks simply fudges the fdblocks accounting below.
4877		 * Warn if either of the new indlen reservations is zero as this
4878		 * can lead to delalloc problems.
4879		 */
4880		got->br_blockcount = del->br_startoff - got->br_startoff;
4881		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4882
4883		new.br_blockcount = got_endoff - del_endoff;
4884		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4885
4886		WARN_ON_ONCE(!got_indlen || !new_indlen);
4887		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4888						       del->br_blockcount);
4889
4890		got->br_startblock = nullstartblock((int)got_indlen);
4891
4892		new.br_startoff = del_endoff;
4893		new.br_state = got->br_state;
4894		new.br_startblock = nullstartblock((int)new_indlen);
4895
4896		xfs_iext_update_extent(ip, state, icur, got);
4897		xfs_iext_next(ifp, icur);
4898		xfs_iext_insert(ip, icur, &new, state);
4899
4900		da_new = got_indlen + new_indlen - stolen;
4901		del->br_blockcount -= stolen;
4902		break;
4903	}
4904
4905	ASSERT(da_old >= da_new);
4906	da_diff = da_old - da_new;
4907	if (!isrt)
4908		da_diff += del->br_blockcount;
4909	if (da_diff) {
4910		xfs_mod_fdblocks(mp, da_diff, false);
4911		xfs_mod_delalloc(mp, -da_diff);
4912	}
4913	return error;
4914}
4915
4916void
4917xfs_bmap_del_extent_cow(
4918	struct xfs_inode	*ip,
4919	struct xfs_iext_cursor	*icur,
4920	struct xfs_bmbt_irec	*got,
4921	struct xfs_bmbt_irec	*del)
4922{
4923	struct xfs_mount	*mp = ip->i_mount;
4924	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4925	struct xfs_bmbt_irec	new;
4926	xfs_fileoff_t		del_endoff, got_endoff;
4927	int			state = BMAP_COWFORK;
4928
4929	XFS_STATS_INC(mp, xs_del_exlist);
4930
4931	del_endoff = del->br_startoff + del->br_blockcount;
4932	got_endoff = got->br_startoff + got->br_blockcount;
4933
4934	ASSERT(del->br_blockcount > 0);
4935	ASSERT(got->br_startoff <= del->br_startoff);
4936	ASSERT(got_endoff >= del_endoff);
4937	ASSERT(!isnullstartblock(got->br_startblock));
4938
4939	if (got->br_startoff == del->br_startoff)
4940		state |= BMAP_LEFT_FILLING;
4941	if (got_endoff == del_endoff)
4942		state |= BMAP_RIGHT_FILLING;
4943
4944	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4945	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4946		/*
4947		 * Matches the whole extent.  Delete the entry.
4948		 */
4949		xfs_iext_remove(ip, icur, state);
4950		xfs_iext_prev(ifp, icur);
4951		break;
4952	case BMAP_LEFT_FILLING:
4953		/*
4954		 * Deleting the first part of the extent.
4955		 */
4956		got->br_startoff = del_endoff;
4957		got->br_blockcount -= del->br_blockcount;
4958		got->br_startblock = del->br_startblock + del->br_blockcount;
4959		xfs_iext_update_extent(ip, state, icur, got);
4960		break;
4961	case BMAP_RIGHT_FILLING:
4962		/*
4963		 * Deleting the last part of the extent.
4964		 */
4965		got->br_blockcount -= del->br_blockcount;
4966		xfs_iext_update_extent(ip, state, icur, got);
4967		break;
4968	case 0:
4969		/*
4970		 * Deleting the middle of the extent.
4971		 */
4972		got->br_blockcount = del->br_startoff - got->br_startoff;
4973
4974		new.br_startoff = del_endoff;
4975		new.br_blockcount = got_endoff - del_endoff;
4976		new.br_state = got->br_state;
4977		new.br_startblock = del->br_startblock + del->br_blockcount;
4978
4979		xfs_iext_update_extent(ip, state, icur, got);
4980		xfs_iext_next(ifp, icur);
4981		xfs_iext_insert(ip, icur, &new, state);
4982		break;
4983	}
4984	ip->i_delayed_blks -= del->br_blockcount;
4985}
4986
4987/*
4988 * Called by xfs_bmapi to update file extent records and the btree
4989 * after removing space.
4990 */
4991STATIC int				/* error */
4992xfs_bmap_del_extent_real(
4993	xfs_inode_t		*ip,	/* incore inode pointer */
4994	xfs_trans_t		*tp,	/* current transaction pointer */
4995	struct xfs_iext_cursor	*icur,
4996	xfs_btree_cur_t		*cur,	/* if null, not a btree */
4997	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
4998	int			*logflagsp, /* inode logging flags */
4999	int			whichfork, /* data or attr fork */
5000	int			bflags)	/* bmapi flags */
5001{
5002	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5003	xfs_fileoff_t		del_endoff;	/* first offset past del */
5004	int			do_fx;	/* free extent at end of routine */
5005	int			error;	/* error return value */
5006	int			flags = 0;/* inode logging flags */
5007	struct xfs_bmbt_irec	got;	/* current extent entry */
5008	xfs_fileoff_t		got_endoff;	/* first offset past got */
5009	int			i;	/* temp state */
5010	struct xfs_ifork	*ifp;	/* inode fork pointer */
5011	xfs_mount_t		*mp;	/* mount structure */
5012	xfs_filblks_t		nblks;	/* quota/sb block count */
5013	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5014	/* REFERENCED */
5015	uint			qfield;	/* quota field to update */
5016	int			state = xfs_bmap_fork_to_state(whichfork);
5017	struct xfs_bmbt_irec	old;
5018
5019	mp = ip->i_mount;
5020	XFS_STATS_INC(mp, xs_del_exlist);
5021
5022	ifp = XFS_IFORK_PTR(ip, whichfork);
5023	ASSERT(del->br_blockcount > 0);
5024	xfs_iext_get_extent(ifp, icur, &got);
5025	ASSERT(got.br_startoff <= del->br_startoff);
5026	del_endoff = del->br_startoff + del->br_blockcount;
5027	got_endoff = got.br_startoff + got.br_blockcount;
5028	ASSERT(got_endoff >= del_endoff);
5029	ASSERT(!isnullstartblock(got.br_startblock));
5030	qfield = 0;
5031	error = 0;
5032
5033	/*
5034	 * If it's the case where the directory code is running with no block
5035	 * reservation, and the deleted block is in the middle of its extent,
5036	 * and the resulting insert of an extent would cause transformation to
5037	 * btree format, then reject it.  The calling code will then swap blocks
5038	 * around instead.  We have to do this now, rather than waiting for the
5039	 * conversion to btree format, since the transaction will be dirty then.
5040	 */
5041	if (tp->t_blk_res == 0 &&
5042	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5043	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5044	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5045		return -ENOSPC;
5046
5047	flags = XFS_ILOG_CORE;
5048	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5049		xfs_filblks_t	len;
5050		xfs_extlen_t	mod;
5051
5052		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5053				  &mod);
5054		ASSERT(mod == 0);
5055
5056		if (!(bflags & XFS_BMAPI_REMAP)) {
5057			xfs_fsblock_t	bno;
5058
5059			bno = div_u64_rem(del->br_startblock,
5060					mp->m_sb.sb_rextsize, &mod);
5061			ASSERT(mod == 0);
5062
5063			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5064			if (error)
5065				goto done;
5066		}
5067
5068		do_fx = 0;
5069		nblks = len * mp->m_sb.sb_rextsize;
5070		qfield = XFS_TRANS_DQ_RTBCOUNT;
5071	} else {
5072		do_fx = 1;
5073		nblks = del->br_blockcount;
5074		qfield = XFS_TRANS_DQ_BCOUNT;
5075	}
5076
5077	del_endblock = del->br_startblock + del->br_blockcount;
5078	if (cur) {
5079		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5080		if (error)
5081			goto done;
5082		if (XFS_IS_CORRUPT(mp, i != 1)) {
5083			error = -EFSCORRUPTED;
5084			goto done;
5085		}
5086	}
5087
5088	if (got.br_startoff == del->br_startoff)
5089		state |= BMAP_LEFT_FILLING;
5090	if (got_endoff == del_endoff)
5091		state |= BMAP_RIGHT_FILLING;
5092
5093	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5094	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5095		/*
5096		 * Matches the whole extent.  Delete the entry.
5097		 */
5098		xfs_iext_remove(ip, icur, state);
5099		xfs_iext_prev(ifp, icur);
5100		ifp->if_nextents--;
5101
5102		flags |= XFS_ILOG_CORE;
5103		if (!cur) {
5104			flags |= xfs_ilog_fext(whichfork);
5105			break;
5106		}
5107		if ((error = xfs_btree_delete(cur, &i)))
5108			goto done;
5109		if (XFS_IS_CORRUPT(mp, i != 1)) {
5110			error = -EFSCORRUPTED;
5111			goto done;
5112		}
5113		break;
5114	case BMAP_LEFT_FILLING:
5115		/*
5116		 * Deleting the first part of the extent.
5117		 */
5118		got.br_startoff = del_endoff;
5119		got.br_startblock = del_endblock;
5120		got.br_blockcount -= del->br_blockcount;
5121		xfs_iext_update_extent(ip, state, icur, &got);
5122		if (!cur) {
5123			flags |= xfs_ilog_fext(whichfork);
5124			break;
5125		}
5126		error = xfs_bmbt_update(cur, &got);
5127		if (error)
5128			goto done;
5129		break;
5130	case BMAP_RIGHT_FILLING:
5131		/*
5132		 * Deleting the last part of the extent.
5133		 */
5134		got.br_blockcount -= del->br_blockcount;
5135		xfs_iext_update_extent(ip, state, icur, &got);
5136		if (!cur) {
5137			flags |= xfs_ilog_fext(whichfork);
5138			break;
5139		}
5140		error = xfs_bmbt_update(cur, &got);
5141		if (error)
5142			goto done;
5143		break;
5144	case 0:
5145		/*
5146		 * Deleting the middle of the extent.
5147		 */
5148		old = got;
5149
5150		got.br_blockcount = del->br_startoff - got.br_startoff;
5151		xfs_iext_update_extent(ip, state, icur, &got);
5152
5153		new.br_startoff = del_endoff;
5154		new.br_blockcount = got_endoff - del_endoff;
5155		new.br_state = got.br_state;
5156		new.br_startblock = del_endblock;
5157
5158		flags |= XFS_ILOG_CORE;
5159		if (cur) {
5160			error = xfs_bmbt_update(cur, &got);
5161			if (error)
5162				goto done;
5163			error = xfs_btree_increment(cur, 0, &i);
5164			if (error)
5165				goto done;
5166			cur->bc_rec.b = new;
5167			error = xfs_btree_insert(cur, &i);
5168			if (error && error != -ENOSPC)
5169				goto done;
5170			/*
5171			 * If get no-space back from btree insert, it tried a
5172			 * split, and we have a zero block reservation.  Fix up
5173			 * our state and return the error.
5174			 */
5175			if (error == -ENOSPC) {
5176				/*
5177				 * Reset the cursor, don't trust it after any
5178				 * insert operation.
5179				 */
5180				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5181				if (error)
5182					goto done;
5183				if (XFS_IS_CORRUPT(mp, i != 1)) {
5184					error = -EFSCORRUPTED;
5185					goto done;
5186				}
5187				/*
5188				 * Update the btree record back
5189				 * to the original value.
5190				 */
5191				error = xfs_bmbt_update(cur, &old);
5192				if (error)
5193					goto done;
5194				/*
5195				 * Reset the extent record back
5196				 * to the original value.
5197				 */
5198				xfs_iext_update_extent(ip, state, icur, &old);
5199				flags = 0;
5200				error = -ENOSPC;
5201				goto done;
5202			}
5203			if (XFS_IS_CORRUPT(mp, i != 1)) {
5204				error = -EFSCORRUPTED;
5205				goto done;
5206			}
5207		} else
5208			flags |= xfs_ilog_fext(whichfork);
5209
5210		ifp->if_nextents++;
5211		xfs_iext_next(ifp, icur);
5212		xfs_iext_insert(ip, icur, &new, state);
5213		break;
5214	}
5215
5216	/* remove reverse mapping */
5217	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5218
5219	/*
5220	 * If we need to, add to list of extents to delete.
5221	 */
5222	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5223		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5224			xfs_refcount_decrease_extent(tp, del);
5225		} else {
5226			__xfs_bmap_add_free(tp, del->br_startblock,
5227					del->br_blockcount, NULL,
5228					(bflags & XFS_BMAPI_NODISCARD) ||
5229					del->br_state == XFS_EXT_UNWRITTEN);
5230		}
5231	}
5232
5233	/*
5234	 * Adjust inode # blocks in the file.
5235	 */
5236	if (nblks)
5237		ip->i_d.di_nblocks -= nblks;
5238	/*
5239	 * Adjust quota data.
5240	 */
5241	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5242		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5243
5244done:
5245	*logflagsp = flags;
5246	return error;
5247}
5248
5249/*
5250 * Unmap (remove) blocks from a file.
5251 * If nexts is nonzero then the number of extents to remove is limited to
5252 * that value.  If not all extents in the block range can be removed then
5253 * *done is set.
5254 */
5255int						/* error */
5256__xfs_bunmapi(
5257	struct xfs_trans	*tp,		/* transaction pointer */
5258	struct xfs_inode	*ip,		/* incore inode */
5259	xfs_fileoff_t		start,		/* first file offset deleted */
5260	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5261	int			flags,		/* misc flags */
5262	xfs_extnum_t		nexts)		/* number of extents max */
5263{
5264	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5265	struct xfs_bmbt_irec	del;		/* extent being deleted */
5266	int			error;		/* error return value */
5267	xfs_extnum_t		extno;		/* extent number in list */
5268	struct xfs_bmbt_irec	got;		/* current extent record */
5269	struct xfs_ifork	*ifp;		/* inode fork pointer */
5270	int			isrt;		/* freeing in rt area */
5271	int			logflags;	/* transaction logging flags */
5272	xfs_extlen_t		mod;		/* rt extent offset */
5273	struct xfs_mount	*mp = ip->i_mount;
5274	int			tmp_logflags;	/* partial logging flags */
5275	int			wasdel;		/* was a delayed alloc extent */
5276	int			whichfork;	/* data or attribute fork */
5277	xfs_fsblock_t		sum;
5278	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
5279	xfs_fileoff_t		max_len;
5280	xfs_agnumber_t		prev_agno = NULLAGNUMBER, agno;
5281	xfs_fileoff_t		end;
5282	struct xfs_iext_cursor	icur;
5283	bool			done = false;
5284
5285	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5286
5287	whichfork = xfs_bmapi_whichfork(flags);
5288	ASSERT(whichfork != XFS_COW_FORK);
5289	ifp = XFS_IFORK_PTR(ip, whichfork);
5290	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5291		return -EFSCORRUPTED;
5292	if (XFS_FORCED_SHUTDOWN(mp))
5293		return -EIO;
5294
5295	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5296	ASSERT(len > 0);
5297	ASSERT(nexts >= 0);
5298
5299	/*
5300	 * Guesstimate how many blocks we can unmap without running the risk of
5301	 * blowing out the transaction with a mix of EFIs and reflink
5302	 * adjustments.
5303	 */
5304	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5305		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5306	else
5307		max_len = len;
5308
5309	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5310	    (error = xfs_iread_extents(tp, ip, whichfork)))
5311		return error;
5312	if (xfs_iext_count(ifp) == 0) {
5313		*rlen = 0;
5314		return 0;
5315	}
5316	XFS_STATS_INC(mp, xs_blk_unmap);
5317	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5318	end = start + len;
5319
5320	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5321		*rlen = 0;
5322		return 0;
5323	}
5324	end--;
5325
5326	logflags = 0;
5327	if (ifp->if_flags & XFS_IFBROOT) {
5328		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5329		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5330		cur->bc_ino.flags = 0;
5331	} else
5332		cur = NULL;
5333
5334	if (isrt) {
5335		/*
5336		 * Synchronize by locking the bitmap inode.
5337		 */
5338		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5339		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5340		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5341		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5342	}
5343
5344	extno = 0;
5345	while (end != (xfs_fileoff_t)-1 && end >= start &&
5346	       (nexts == 0 || extno < nexts) && max_len > 0) {
5347		/*
5348		 * Is the found extent after a hole in which end lives?
5349		 * Just back up to the previous extent, if so.
5350		 */
5351		if (got.br_startoff > end &&
5352		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5353			done = true;
5354			break;
5355		}
5356		/*
5357		 * Is the last block of this extent before the range
5358		 * we're supposed to delete?  If so, we're done.
5359		 */
5360		end = XFS_FILEOFF_MIN(end,
5361			got.br_startoff + got.br_blockcount - 1);
5362		if (end < start)
5363			break;
5364		/*
5365		 * Then deal with the (possibly delayed) allocated space
5366		 * we found.
5367		 */
5368		del = got;
5369		wasdel = isnullstartblock(del.br_startblock);
5370
5371		/*
5372		 * Make sure we don't touch multiple AGF headers out of order
5373		 * in a single transaction, as that could cause AB-BA deadlocks.
5374		 */
5375		if (!wasdel && !isrt) {
5376			agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5377			if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5378				break;
5379			prev_agno = agno;
5380		}
5381		if (got.br_startoff < start) {
5382			del.br_startoff = start;
5383			del.br_blockcount -= start - got.br_startoff;
5384			if (!wasdel)
5385				del.br_startblock += start - got.br_startoff;
5386		}
5387		if (del.br_startoff + del.br_blockcount > end + 1)
5388			del.br_blockcount = end + 1 - del.br_startoff;
5389
5390		/* How much can we safely unmap? */
5391		if (max_len < del.br_blockcount) {
5392			del.br_startoff += del.br_blockcount - max_len;
5393			if (!wasdel)
5394				del.br_startblock += del.br_blockcount - max_len;
5395			del.br_blockcount = max_len;
5396		}
5397
5398		if (!isrt)
5399			goto delete;
5400
5401		sum = del.br_startblock + del.br_blockcount;
5402		div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5403		if (mod) {
5404			/*
5405			 * Realtime extent not lined up at the end.
5406			 * The extent could have been split into written
5407			 * and unwritten pieces, or we could just be
5408			 * unmapping part of it.  But we can't really
5409			 * get rid of part of a realtime extent.
5410			 */
5411			if (del.br_state == XFS_EXT_UNWRITTEN) {
5412				/*
5413				 * This piece is unwritten, or we're not
5414				 * using unwritten extents.  Skip over it.
5415				 */
5416				ASSERT(end >= mod);
5417				end -= mod > del.br_blockcount ?
5418					del.br_blockcount : mod;
5419				if (end < got.br_startoff &&
5420				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5421					done = true;
5422					break;
5423				}
5424				continue;
5425			}
5426			/*
5427			 * It's written, turn it unwritten.
5428			 * This is better than zeroing it.
5429			 */
5430			ASSERT(del.br_state == XFS_EXT_NORM);
5431			ASSERT(tp->t_blk_res > 0);
5432			/*
5433			 * If this spans a realtime extent boundary,
5434			 * chop it back to the start of the one we end at.
5435			 */
5436			if (del.br_blockcount > mod) {
5437				del.br_startoff += del.br_blockcount - mod;
5438				del.br_startblock += del.br_blockcount - mod;
5439				del.br_blockcount = mod;
5440			}
5441			del.br_state = XFS_EXT_UNWRITTEN;
5442			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5443					whichfork, &icur, &cur, &del,
5444					&logflags);
5445			if (error)
5446				goto error0;
5447			goto nodelete;
5448		}
5449		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
5450		if (mod) {
5451			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5452
5453			/*
5454			 * Realtime extent is lined up at the end but not
5455			 * at the front.  We'll get rid of full extents if
5456			 * we can.
5457			 */
5458			if (del.br_blockcount > off) {
5459				del.br_blockcount -= off;
5460				del.br_startoff += off;
5461				del.br_startblock += off;
5462			} else if (del.br_startoff == start &&
5463				   (del.br_state == XFS_EXT_UNWRITTEN ||
5464				    tp->t_blk_res == 0)) {
5465				/*
5466				 * Can't make it unwritten.  There isn't
5467				 * a full extent here so just skip it.
5468				 */
5469				ASSERT(end >= del.br_blockcount);
5470				end -= del.br_blockcount;
5471				if (got.br_startoff > end &&
5472				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5473					done = true;
5474					break;
5475				}
5476				continue;
5477			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5478				struct xfs_bmbt_irec	prev;
5479				xfs_fileoff_t		unwrite_start;
5480
5481				/*
5482				 * This one is already unwritten.
5483				 * It must have a written left neighbor.
5484				 * Unwrite the killed part of that one and
5485				 * try again.
5486				 */
5487				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5488					ASSERT(0);
5489				ASSERT(prev.br_state == XFS_EXT_NORM);
5490				ASSERT(!isnullstartblock(prev.br_startblock));
5491				ASSERT(del.br_startblock ==
5492				       prev.br_startblock + prev.br_blockcount);
5493				unwrite_start = max3(start,
5494						     del.br_startoff - mod,
5495						     prev.br_startoff);
5496				mod = unwrite_start - prev.br_startoff;
5497				prev.br_startoff = unwrite_start;
5498				prev.br_startblock += mod;
5499				prev.br_blockcount -= mod;
5500				prev.br_state = XFS_EXT_UNWRITTEN;
5501				error = xfs_bmap_add_extent_unwritten_real(tp,
5502						ip, whichfork, &icur, &cur,
5503						&prev, &logflags);
5504				if (error)
5505					goto error0;
5506				goto nodelete;
5507			} else {
5508				ASSERT(del.br_state == XFS_EXT_NORM);
5509				del.br_state = XFS_EXT_UNWRITTEN;
5510				error = xfs_bmap_add_extent_unwritten_real(tp,
5511						ip, whichfork, &icur, &cur,
5512						&del, &logflags);
5513				if (error)
5514					goto error0;
5515				goto nodelete;
5516			}
5517		}
5518
5519delete:
5520		if (wasdel) {
5521			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5522					&got, &del);
5523		} else {
5524			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5525					&del, &tmp_logflags, whichfork,
5526					flags);
5527			logflags |= tmp_logflags;
5528		}
5529
5530		if (error)
5531			goto error0;
5532
5533		max_len -= del.br_blockcount;
5534		end = del.br_startoff - 1;
5535nodelete:
5536		/*
5537		 * If not done go on to the next (previous) record.
5538		 */
5539		if (end != (xfs_fileoff_t)-1 && end >= start) {
5540			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5541			    (got.br_startoff > end &&
5542			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5543				done = true;
5544				break;
5545			}
5546			extno++;
5547		}
5548	}
5549	if (done || end == (xfs_fileoff_t)-1 || end < start)
5550		*rlen = 0;
5551	else
5552		*rlen = end - start + 1;
5553
5554	/*
5555	 * Convert to a btree if necessary.
5556	 */
5557	if (xfs_bmap_needs_btree(ip, whichfork)) {
5558		ASSERT(cur == NULL);
5559		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5560				&tmp_logflags, whichfork);
5561		logflags |= tmp_logflags;
5562	} else {
5563		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5564			whichfork);
5565	}
5566
5567error0:
5568	/*
5569	 * Log everything.  Do this after conversion, there's no point in
5570	 * logging the extent records if we've converted to btree format.
5571	 */
5572	if ((logflags & xfs_ilog_fext(whichfork)) &&
5573	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5574		logflags &= ~xfs_ilog_fext(whichfork);
5575	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5576		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5577		logflags &= ~xfs_ilog_fbroot(whichfork);
5578	/*
5579	 * Log inode even in the error case, if the transaction
5580	 * is dirty we'll need to shut down the filesystem.
5581	 */
5582	if (logflags)
5583		xfs_trans_log_inode(tp, ip, logflags);
5584	if (cur) {
5585		if (!error)
5586			cur->bc_ino.allocated = 0;
5587		xfs_btree_del_cursor(cur, error);
5588	}
5589	return error;
5590}
5591
5592/* Unmap a range of a file. */
5593int
5594xfs_bunmapi(
5595	xfs_trans_t		*tp,
5596	struct xfs_inode	*ip,
5597	xfs_fileoff_t		bno,
5598	xfs_filblks_t		len,
5599	int			flags,
5600	xfs_extnum_t		nexts,
5601	int			*done)
5602{
5603	int			error;
5604
5605	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5606	*done = (len == 0);
5607	return error;
5608}
5609
5610/*
5611 * Determine whether an extent shift can be accomplished by a merge with the
5612 * extent that precedes the target hole of the shift.
5613 */
5614STATIC bool
5615xfs_bmse_can_merge(
5616	struct xfs_bmbt_irec	*left,	/* preceding extent */
5617	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5618	xfs_fileoff_t		shift)	/* shift fsb */
5619{
5620	xfs_fileoff_t		startoff;
5621
5622	startoff = got->br_startoff - shift;
5623
5624	/*
5625	 * The extent, once shifted, must be adjacent in-file and on-disk with
5626	 * the preceding extent.
5627	 */
5628	if ((left->br_startoff + left->br_blockcount != startoff) ||
5629	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5630	    (left->br_state != got->br_state) ||
5631	    (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5632		return false;
5633
5634	return true;
5635}
5636
5637/*
5638 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5639 * hole in the file. If an extent shift would result in the extent being fully
5640 * adjacent to the extent that currently precedes the hole, we can merge with
5641 * the preceding extent rather than do the shift.
5642 *
5643 * This function assumes the caller has verified a shift-by-merge is possible
5644 * with the provided extents via xfs_bmse_can_merge().
5645 */
5646STATIC int
5647xfs_bmse_merge(
5648	struct xfs_trans		*tp,
5649	struct xfs_inode		*ip,
5650	int				whichfork,
5651	xfs_fileoff_t			shift,		/* shift fsb */
5652	struct xfs_iext_cursor		*icur,
5653	struct xfs_bmbt_irec		*got,		/* extent to shift */
5654	struct xfs_bmbt_irec		*left,		/* preceding extent */
5655	struct xfs_btree_cur		*cur,
5656	int				*logflags)	/* output */
5657{
5658	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5659	struct xfs_bmbt_irec		new;
5660	xfs_filblks_t			blockcount;
5661	int				error, i;
5662	struct xfs_mount		*mp = ip->i_mount;
5663
5664	blockcount = left->br_blockcount + got->br_blockcount;
5665
5666	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5667	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5668	ASSERT(xfs_bmse_can_merge(left, got, shift));
5669
5670	new = *left;
5671	new.br_blockcount = blockcount;
5672
5673	/*
5674	 * Update the on-disk extent count, the btree if necessary and log the
5675	 * inode.
5676	 */
5677	ifp->if_nextents--;
5678	*logflags |= XFS_ILOG_CORE;
5679	if (!cur) {
5680		*logflags |= XFS_ILOG_DEXT;
5681		goto done;
5682	}
5683
5684	/* lookup and remove the extent to merge */
5685	error = xfs_bmbt_lookup_eq(cur, got, &i);
5686	if (error)
5687		return error;
5688	if (XFS_IS_CORRUPT(mp, i != 1))
5689		return -EFSCORRUPTED;
5690
5691	error = xfs_btree_delete(cur, &i);
5692	if (error)
5693		return error;
5694	if (XFS_IS_CORRUPT(mp, i != 1))
5695		return -EFSCORRUPTED;
5696
5697	/* lookup and update size of the previous extent */
5698	error = xfs_bmbt_lookup_eq(cur, left, &i);
5699	if (error)
5700		return error;
5701	if (XFS_IS_CORRUPT(mp, i != 1))
5702		return -EFSCORRUPTED;
5703
5704	error = xfs_bmbt_update(cur, &new);
5705	if (error)
5706		return error;
5707
5708	/* change to extent format if required after extent removal */
5709	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5710	if (error)
5711		return error;
5712
5713done:
5714	xfs_iext_remove(ip, icur, 0);
5715	xfs_iext_prev(ifp, icur);
5716	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5717			&new);
5718
5719	/* update reverse mapping. rmap functions merge the rmaps for us */
5720	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5721	memcpy(&new, got, sizeof(new));
5722	new.br_startoff = left->br_startoff + left->br_blockcount;
5723	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5724	return 0;
5725}
5726
5727static int
5728xfs_bmap_shift_update_extent(
5729	struct xfs_trans	*tp,
5730	struct xfs_inode	*ip,
5731	int			whichfork,
5732	struct xfs_iext_cursor	*icur,
5733	struct xfs_bmbt_irec	*got,
5734	struct xfs_btree_cur	*cur,
5735	int			*logflags,
5736	xfs_fileoff_t		startoff)
5737{
5738	struct xfs_mount	*mp = ip->i_mount;
5739	struct xfs_bmbt_irec	prev = *got;
5740	int			error, i;
5741
5742	*logflags |= XFS_ILOG_CORE;
5743
5744	got->br_startoff = startoff;
5745
5746	if (cur) {
5747		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5748		if (error)
5749			return error;
5750		if (XFS_IS_CORRUPT(mp, i != 1))
5751			return -EFSCORRUPTED;
5752
5753		error = xfs_bmbt_update(cur, got);
5754		if (error)
5755			return error;
5756	} else {
5757		*logflags |= XFS_ILOG_DEXT;
5758	}
5759
5760	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5761			got);
5762
5763	/* update reverse mapping */
5764	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5765	xfs_rmap_map_extent(tp, ip, whichfork, got);
5766	return 0;
5767}
5768
5769int
5770xfs_bmap_collapse_extents(
5771	struct xfs_trans	*tp,
5772	struct xfs_inode	*ip,
5773	xfs_fileoff_t		*next_fsb,
5774	xfs_fileoff_t		offset_shift_fsb,
5775	bool			*done)
5776{
5777	int			whichfork = XFS_DATA_FORK;
5778	struct xfs_mount	*mp = ip->i_mount;
5779	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5780	struct xfs_btree_cur	*cur = NULL;
5781	struct xfs_bmbt_irec	got, prev;
5782	struct xfs_iext_cursor	icur;
5783	xfs_fileoff_t		new_startoff;
5784	int			error = 0;
5785	int			logflags = 0;
5786
5787	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5788	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5789		return -EFSCORRUPTED;
5790	}
5791
5792	if (XFS_FORCED_SHUTDOWN(mp))
5793		return -EIO;
5794
5795	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5796
5797	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5798		error = xfs_iread_extents(tp, ip, whichfork);
5799		if (error)
5800			return error;
5801	}
5802
5803	if (ifp->if_flags & XFS_IFBROOT) {
5804		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5805		cur->bc_ino.flags = 0;
5806	}
5807
5808	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5809		*done = true;
5810		goto del_cursor;
5811	}
5812	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5813		error = -EFSCORRUPTED;
5814		goto del_cursor;
5815	}
5816
5817	new_startoff = got.br_startoff - offset_shift_fsb;
5818	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5819		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5820			error = -EINVAL;
5821			goto del_cursor;
5822		}
5823
5824		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5825			error = xfs_bmse_merge(tp, ip, whichfork,
5826					offset_shift_fsb, &icur, &got, &prev,
5827					cur, &logflags);
5828			if (error)
5829				goto del_cursor;
5830			goto done;
5831		}
5832	} else {
5833		if (got.br_startoff < offset_shift_fsb) {
5834			error = -EINVAL;
5835			goto del_cursor;
5836		}
5837	}
5838
5839	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5840			cur, &logflags, new_startoff);
5841	if (error)
5842		goto del_cursor;
5843
5844done:
5845	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5846		*done = true;
5847		goto del_cursor;
5848	}
5849
5850	*next_fsb = got.br_startoff;
5851del_cursor:
5852	if (cur)
5853		xfs_btree_del_cursor(cur, error);
5854	if (logflags)
5855		xfs_trans_log_inode(tp, ip, logflags);
5856	return error;
5857}
5858
5859/* Make sure we won't be right-shifting an extent past the maximum bound. */
5860int
5861xfs_bmap_can_insert_extents(
5862	struct xfs_inode	*ip,
5863	xfs_fileoff_t		off,
5864	xfs_fileoff_t		shift)
5865{
5866	struct xfs_bmbt_irec	got;
5867	int			is_empty;
5868	int			error = 0;
5869
5870	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5871
5872	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5873		return -EIO;
5874
5875	xfs_ilock(ip, XFS_ILOCK_EXCL);
5876	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5877	if (!error && !is_empty && got.br_startoff >= off &&
5878	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5879		error = -EINVAL;
5880	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5881
5882	return error;
5883}
5884
5885int
5886xfs_bmap_insert_extents(
5887	struct xfs_trans	*tp,
5888	struct xfs_inode	*ip,
5889	xfs_fileoff_t		*next_fsb,
5890	xfs_fileoff_t		offset_shift_fsb,
5891	bool			*done,
5892	xfs_fileoff_t		stop_fsb)
5893{
5894	int			whichfork = XFS_DATA_FORK;
5895	struct xfs_mount	*mp = ip->i_mount;
5896	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5897	struct xfs_btree_cur	*cur = NULL;
5898	struct xfs_bmbt_irec	got, next;
5899	struct xfs_iext_cursor	icur;
5900	xfs_fileoff_t		new_startoff;
5901	int			error = 0;
5902	int			logflags = 0;
5903
5904	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5905	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5906		return -EFSCORRUPTED;
5907	}
5908
5909	if (XFS_FORCED_SHUTDOWN(mp))
5910		return -EIO;
5911
5912	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5913
5914	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5915		error = xfs_iread_extents(tp, ip, whichfork);
5916		if (error)
5917			return error;
5918	}
5919
5920	if (ifp->if_flags & XFS_IFBROOT) {
5921		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5922		cur->bc_ino.flags = 0;
5923	}
5924
5925	if (*next_fsb == NULLFSBLOCK) {
5926		xfs_iext_last(ifp, &icur);
5927		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5928		    stop_fsb > got.br_startoff) {
5929			*done = true;
5930			goto del_cursor;
5931		}
5932	} else {
5933		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5934			*done = true;
5935			goto del_cursor;
5936		}
5937	}
5938	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5939		error = -EFSCORRUPTED;
5940		goto del_cursor;
5941	}
5942
5943	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
5944		error = -EFSCORRUPTED;
5945		goto del_cursor;
5946	}
5947
5948	new_startoff = got.br_startoff + offset_shift_fsb;
5949	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5950		if (new_startoff + got.br_blockcount > next.br_startoff) {
5951			error = -EINVAL;
5952			goto del_cursor;
5953		}
5954
5955		/*
5956		 * Unlike a left shift (which involves a hole punch), a right
5957		 * shift does not modify extent neighbors in any way.  We should
5958		 * never find mergeable extents in this scenario.  Check anyways
5959		 * and warn if we encounter two extents that could be one.
5960		 */
5961		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5962			WARN_ON_ONCE(1);
5963	}
5964
5965	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5966			cur, &logflags, new_startoff);
5967	if (error)
5968		goto del_cursor;
5969
5970	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5971	    stop_fsb >= got.br_startoff + got.br_blockcount) {
5972		*done = true;
5973		goto del_cursor;
5974	}
5975
5976	*next_fsb = got.br_startoff;
5977del_cursor:
5978	if (cur)
5979		xfs_btree_del_cursor(cur, error);
5980	if (logflags)
5981		xfs_trans_log_inode(tp, ip, logflags);
5982	return error;
5983}
5984
5985/*
5986 * Splits an extent into two extents at split_fsb block such that it is the
5987 * first block of the current_ext. @ext is a target extent to be split.
5988 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
5989 * hole or the first block of extents, just return 0.
5990 */
5991int
5992xfs_bmap_split_extent(
5993	struct xfs_trans	*tp,
5994	struct xfs_inode	*ip,
5995	xfs_fileoff_t		split_fsb)
5996{
5997	int				whichfork = XFS_DATA_FORK;
5998	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5999	struct xfs_btree_cur		*cur = NULL;
6000	struct xfs_bmbt_irec		got;
6001	struct xfs_bmbt_irec		new; /* split extent */
6002	struct xfs_mount		*mp = ip->i_mount;
6003	xfs_fsblock_t			gotblkcnt; /* new block count for got */
6004	struct xfs_iext_cursor		icur;
6005	int				error = 0;
6006	int				logflags = 0;
6007	int				i = 0;
6008
6009	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6010	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6011		return -EFSCORRUPTED;
6012	}
6013
6014	if (XFS_FORCED_SHUTDOWN(mp))
6015		return -EIO;
6016
6017	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6018		/* Read in all the extents */
6019		error = xfs_iread_extents(tp, ip, whichfork);
6020		if (error)
6021			return error;
6022	}
6023
6024	/*
6025	 * If there are not extents, or split_fsb lies in a hole we are done.
6026	 */
6027	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6028	    got.br_startoff >= split_fsb)
6029		return 0;
6030
6031	gotblkcnt = split_fsb - got.br_startoff;
6032	new.br_startoff = split_fsb;
6033	new.br_startblock = got.br_startblock + gotblkcnt;
6034	new.br_blockcount = got.br_blockcount - gotblkcnt;
6035	new.br_state = got.br_state;
6036
6037	if (ifp->if_flags & XFS_IFBROOT) {
6038		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6039		cur->bc_ino.flags = 0;
6040		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6041		if (error)
6042			goto del_cursor;
6043		if (XFS_IS_CORRUPT(mp, i != 1)) {
6044			error = -EFSCORRUPTED;
6045			goto del_cursor;
6046		}
6047	}
6048
6049	got.br_blockcount = gotblkcnt;
6050	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6051			&got);
6052
6053	logflags = XFS_ILOG_CORE;
6054	if (cur) {
6055		error = xfs_bmbt_update(cur, &got);
6056		if (error)
6057			goto del_cursor;
6058	} else
6059		logflags |= XFS_ILOG_DEXT;
6060
6061	/* Add new extent */
6062	xfs_iext_next(ifp, &icur);
6063	xfs_iext_insert(ip, &icur, &new, 0);
6064	ifp->if_nextents++;
6065
6066	if (cur) {
6067		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6068		if (error)
6069			goto del_cursor;
6070		if (XFS_IS_CORRUPT(mp, i != 0)) {
6071			error = -EFSCORRUPTED;
6072			goto del_cursor;
6073		}
6074		error = xfs_btree_insert(cur, &i);
6075		if (error)
6076			goto del_cursor;
6077		if (XFS_IS_CORRUPT(mp, i != 1)) {
6078			error = -EFSCORRUPTED;
6079			goto del_cursor;
6080		}
6081	}
6082
6083	/*
6084	 * Convert to a btree if necessary.
6085	 */
6086	if (xfs_bmap_needs_btree(ip, whichfork)) {
6087		int tmp_logflags; /* partial log flag return val */
6088
6089		ASSERT(cur == NULL);
6090		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6091				&tmp_logflags, whichfork);
6092		logflags |= tmp_logflags;
6093	}
6094
6095del_cursor:
6096	if (cur) {
6097		cur->bc_ino.allocated = 0;
6098		xfs_btree_del_cursor(cur, error);
6099	}
6100
6101	if (logflags)
6102		xfs_trans_log_inode(tp, ip, logflags);
6103	return error;
6104}
6105
6106/* Deferred mapping is only for real extents in the data fork. */
6107static bool
6108xfs_bmap_is_update_needed(
6109	struct xfs_bmbt_irec	*bmap)
6110{
6111	return  bmap->br_startblock != HOLESTARTBLOCK &&
6112		bmap->br_startblock != DELAYSTARTBLOCK;
6113}
6114
6115/* Record a bmap intent. */
6116static int
6117__xfs_bmap_add(
6118	struct xfs_trans		*tp,
6119	enum xfs_bmap_intent_type	type,
6120	struct xfs_inode		*ip,
6121	int				whichfork,
6122	struct xfs_bmbt_irec		*bmap)
6123{
6124	struct xfs_bmap_intent		*bi;
6125
6126	trace_xfs_bmap_defer(tp->t_mountp,
6127			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6128			type,
6129			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6130			ip->i_ino, whichfork,
6131			bmap->br_startoff,
6132			bmap->br_blockcount,
6133			bmap->br_state);
6134
6135	bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6136	INIT_LIST_HEAD(&bi->bi_list);
6137	bi->bi_type = type;
6138	bi->bi_owner = ip;
6139	bi->bi_whichfork = whichfork;
6140	bi->bi_bmap = *bmap;
6141
6142	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6143	return 0;
6144}
6145
6146/* Map an extent into a file. */
6147void
6148xfs_bmap_map_extent(
6149	struct xfs_trans	*tp,
6150	struct xfs_inode	*ip,
6151	struct xfs_bmbt_irec	*PREV)
6152{
6153	if (!xfs_bmap_is_update_needed(PREV))
6154		return;
6155
6156	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6157}
6158
6159/* Unmap an extent out of a file. */
6160void
6161xfs_bmap_unmap_extent(
6162	struct xfs_trans	*tp,
6163	struct xfs_inode	*ip,
6164	struct xfs_bmbt_irec	*PREV)
6165{
6166	if (!xfs_bmap_is_update_needed(PREV))
6167		return;
6168
6169	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6170}
6171
6172/*
6173 * Process one of the deferred bmap operations.  We pass back the
6174 * btree cursor to maintain our lock on the bmapbt between calls.
6175 */
6176int
6177xfs_bmap_finish_one(
6178	struct xfs_trans		*tp,
6179	struct xfs_inode		*ip,
6180	enum xfs_bmap_intent_type	type,
6181	int				whichfork,
6182	xfs_fileoff_t			startoff,
6183	xfs_fsblock_t			startblock,
6184	xfs_filblks_t			*blockcount,
6185	xfs_exntst_t			state)
6186{
6187	int				error = 0;
6188
6189	ASSERT(tp->t_firstblock == NULLFSBLOCK);
6190
6191	trace_xfs_bmap_deferred(tp->t_mountp,
6192			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6193			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6194			ip->i_ino, whichfork, startoff, *blockcount, state);
6195
6196	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6197		return -EFSCORRUPTED;
6198
6199	if (XFS_TEST_ERROR(false, tp->t_mountp,
6200			XFS_ERRTAG_BMAP_FINISH_ONE))
6201		return -EIO;
6202
6203	switch (type) {
6204	case XFS_BMAP_MAP:
6205		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6206				startblock, 0);
6207		*blockcount = 0;
6208		break;
6209	case XFS_BMAP_UNMAP:
6210		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6211				XFS_BMAPI_REMAP, 1);
6212		break;
6213	default:
6214		ASSERT(0);
6215		error = -EFSCORRUPTED;
6216	}
6217
6218	return error;
6219}
6220
6221/* Check that an inode's extent does not have invalid flags or bad ranges. */
6222xfs_failaddr_t
6223xfs_bmap_validate_extent(
6224	struct xfs_inode	*ip,
6225	int			whichfork,
6226	struct xfs_bmbt_irec	*irec)
6227{
6228	struct xfs_mount	*mp = ip->i_mount;
6229	xfs_fsblock_t		endfsb;
6230	bool			isrt;
6231
6232	if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
6233		return __this_address;
6234	if (irec->br_startoff + irec->br_blockcount <= irec->br_startoff)
6235		return __this_address;
6236
6237	isrt = XFS_IS_REALTIME_INODE(ip);
6238	endfsb = irec->br_startblock + irec->br_blockcount - 1;
6239	if (isrt && whichfork == XFS_DATA_FORK) {
6240		if (!xfs_verify_rtbno(mp, irec->br_startblock))
6241			return __this_address;
6242		if (!xfs_verify_rtbno(mp, endfsb))
6243			return __this_address;
6244	} else {
6245		if (!xfs_verify_fsbno(mp, irec->br_startblock))
6246			return __this_address;
6247		if (!xfs_verify_fsbno(mp, endfsb))
6248			return __this_address;
6249		if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
6250		    XFS_FSB_TO_AGNO(mp, endfsb))
6251			return __this_address;
6252	}
6253	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6254		return __this_address;
6255	return NULL;
6256}
6257