1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2017 Christoph Hellwig.
4 */
5
6#include "xfs.h"
7#include "xfs_shared.h"
8#include "xfs_format.h"
9#include "xfs_bit.h"
10#include "xfs_log_format.h"
11#include "xfs_inode.h"
12#include "xfs_trans_resv.h"
13#include "xfs_mount.h"
14#include "xfs_trace.h"
15
16/*
17 * In-core extent record layout:
18 *
19 * +-------+----------------------------+
20 * | 00:53 | all 54 bits of startoff    |
21 * | 54:63 | low 10 bits of startblock  |
22 * +-------+----------------------------+
23 * | 00:20 | all 21 bits of length      |
24 * |    21 | unwritten extent bit       |
25 * | 22:63 | high 42 bits of startblock |
26 * +-------+----------------------------+
27 */
28#define XFS_IEXT_STARTOFF_MASK		xfs_mask64lo(BMBT_STARTOFF_BITLEN)
29#define XFS_IEXT_LENGTH_MASK		xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
30#define XFS_IEXT_STARTBLOCK_MASK	xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
31
32struct xfs_iext_rec {
33	uint64_t			lo;
34	uint64_t			hi;
35};
36
37/*
38 * Given that the length can't be a zero, only an empty hi value indicates an
39 * unused record.
40 */
41static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
42{
43	return rec->hi == 0;
44}
45
46static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
47{
48	rec->lo = 0;
49	rec->hi = 0;
50}
51
52static void
53xfs_iext_set(
54	struct xfs_iext_rec	*rec,
55	struct xfs_bmbt_irec	*irec)
56{
57	ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
58	ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
59	ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
60
61	rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
62	rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
63
64	rec->lo |= (irec->br_startblock << 54);
65	rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
66
67	if (irec->br_state == XFS_EXT_UNWRITTEN)
68		rec->hi |= (1 << 21);
69}
70
71static void
72xfs_iext_get(
73	struct xfs_bmbt_irec	*irec,
74	struct xfs_iext_rec	*rec)
75{
76	irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
77	irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
78
79	irec->br_startblock = rec->lo >> 54;
80	irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
81
82	if (rec->hi & (1 << 21))
83		irec->br_state = XFS_EXT_UNWRITTEN;
84	else
85		irec->br_state = XFS_EXT_NORM;
86}
87
88enum {
89	NODE_SIZE	= 256,
90	KEYS_PER_NODE	= NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
91	RECS_PER_LEAF	= (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
92				sizeof(struct xfs_iext_rec),
93};
94
95/*
96 * In-core extent btree block layout:
97 *
98 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
99 *
100 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
101 * contain the startoffset, blockcount, startblock and unwritten extent flag.
102 * See above for the exact format, followed by pointers to the previous and next
103 * leaf blocks (if there are any).
104 *
105 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
106 * by an equal number of pointers to the btree blocks at the next lower level.
107 *
108 *		+-------+-------+-------+-------+-------+----------+----------+
109 * Leaf:	| rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
110 *		+-------+-------+-------+-------+-------+----------+----------+
111 *
112 *		+-------+-------+-------+-------+-------+-------+------+-------+
113 * Inner:	| key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
114 *		+-------+-------+-------+-------+-------+-------+------+-------+
115 */
116struct xfs_iext_node {
117	uint64_t		keys[KEYS_PER_NODE];
118#define XFS_IEXT_KEY_INVALID	(1ULL << 63)
119	void			*ptrs[KEYS_PER_NODE];
120};
121
122struct xfs_iext_leaf {
123	struct xfs_iext_rec	recs[RECS_PER_LEAF];
124	struct xfs_iext_leaf	*prev;
125	struct xfs_iext_leaf	*next;
126};
127
128inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
129{
130	return ifp->if_bytes / sizeof(struct xfs_iext_rec);
131}
132
133static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
134{
135	if (ifp->if_height == 1)
136		return xfs_iext_count(ifp);
137	return RECS_PER_LEAF;
138}
139
140static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
141{
142	return &cur->leaf->recs[cur->pos];
143}
144
145static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
146		struct xfs_iext_cursor *cur)
147{
148	if (!cur->leaf)
149		return false;
150	if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
151		return false;
152	if (xfs_iext_rec_is_empty(cur_rec(cur)))
153		return false;
154	return true;
155}
156
157static void *
158xfs_iext_find_first_leaf(
159	struct xfs_ifork	*ifp)
160{
161	struct xfs_iext_node	*node = ifp->if_u1.if_root;
162	int			height;
163
164	if (!ifp->if_height)
165		return NULL;
166
167	for (height = ifp->if_height; height > 1; height--) {
168		node = node->ptrs[0];
169		ASSERT(node);
170	}
171
172	return node;
173}
174
175static void *
176xfs_iext_find_last_leaf(
177	struct xfs_ifork	*ifp)
178{
179	struct xfs_iext_node	*node = ifp->if_u1.if_root;
180	int			height, i;
181
182	if (!ifp->if_height)
183		return NULL;
184
185	for (height = ifp->if_height; height > 1; height--) {
186		for (i = 1; i < KEYS_PER_NODE; i++)
187			if (!node->ptrs[i])
188				break;
189		node = node->ptrs[i - 1];
190		ASSERT(node);
191	}
192
193	return node;
194}
195
196void
197xfs_iext_first(
198	struct xfs_ifork	*ifp,
199	struct xfs_iext_cursor	*cur)
200{
201	cur->pos = 0;
202	cur->leaf = xfs_iext_find_first_leaf(ifp);
203}
204
205void
206xfs_iext_last(
207	struct xfs_ifork	*ifp,
208	struct xfs_iext_cursor	*cur)
209{
210	int			i;
211
212	cur->leaf = xfs_iext_find_last_leaf(ifp);
213	if (!cur->leaf) {
214		cur->pos = 0;
215		return;
216	}
217
218	for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
219		if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
220			break;
221	}
222	cur->pos = i - 1;
223}
224
225void
226xfs_iext_next(
227	struct xfs_ifork	*ifp,
228	struct xfs_iext_cursor	*cur)
229{
230	if (!cur->leaf) {
231		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
232		xfs_iext_first(ifp, cur);
233		return;
234	}
235
236	ASSERT(cur->pos >= 0);
237	ASSERT(cur->pos < xfs_iext_max_recs(ifp));
238
239	cur->pos++;
240	if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
241	    cur->leaf->next) {
242		cur->leaf = cur->leaf->next;
243		cur->pos = 0;
244	}
245}
246
247void
248xfs_iext_prev(
249	struct xfs_ifork	*ifp,
250	struct xfs_iext_cursor	*cur)
251{
252	if (!cur->leaf) {
253		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
254		xfs_iext_last(ifp, cur);
255		return;
256	}
257
258	ASSERT(cur->pos >= 0);
259	ASSERT(cur->pos <= RECS_PER_LEAF);
260
261recurse:
262	do {
263		cur->pos--;
264		if (xfs_iext_valid(ifp, cur))
265			return;
266	} while (cur->pos > 0);
267
268	if (ifp->if_height > 1 && cur->leaf->prev) {
269		cur->leaf = cur->leaf->prev;
270		cur->pos = RECS_PER_LEAF;
271		goto recurse;
272	}
273}
274
275static inline int
276xfs_iext_key_cmp(
277	struct xfs_iext_node	*node,
278	int			n,
279	xfs_fileoff_t		offset)
280{
281	if (node->keys[n] > offset)
282		return 1;
283	if (node->keys[n] < offset)
284		return -1;
285	return 0;
286}
287
288static inline int
289xfs_iext_rec_cmp(
290	struct xfs_iext_rec	*rec,
291	xfs_fileoff_t		offset)
292{
293	uint64_t		rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
294	uint32_t		rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
295
296	if (rec_offset > offset)
297		return 1;
298	if (rec_offset + rec_len <= offset)
299		return -1;
300	return 0;
301}
302
303static void *
304xfs_iext_find_level(
305	struct xfs_ifork	*ifp,
306	xfs_fileoff_t		offset,
307	int			level)
308{
309	struct xfs_iext_node	*node = ifp->if_u1.if_root;
310	int			height, i;
311
312	if (!ifp->if_height)
313		return NULL;
314
315	for (height = ifp->if_height; height > level; height--) {
316		for (i = 1; i < KEYS_PER_NODE; i++)
317			if (xfs_iext_key_cmp(node, i, offset) > 0)
318				break;
319
320		node = node->ptrs[i - 1];
321		if (!node)
322			break;
323	}
324
325	return node;
326}
327
328static int
329xfs_iext_node_pos(
330	struct xfs_iext_node	*node,
331	xfs_fileoff_t		offset)
332{
333	int			i;
334
335	for (i = 1; i < KEYS_PER_NODE; i++) {
336		if (xfs_iext_key_cmp(node, i, offset) > 0)
337			break;
338	}
339
340	return i - 1;
341}
342
343static int
344xfs_iext_node_insert_pos(
345	struct xfs_iext_node	*node,
346	xfs_fileoff_t		offset)
347{
348	int			i;
349
350	for (i = 0; i < KEYS_PER_NODE; i++) {
351		if (xfs_iext_key_cmp(node, i, offset) > 0)
352			return i;
353	}
354
355	return KEYS_PER_NODE;
356}
357
358static int
359xfs_iext_node_nr_entries(
360	struct xfs_iext_node	*node,
361	int			start)
362{
363	int			i;
364
365	for (i = start; i < KEYS_PER_NODE; i++) {
366		if (node->keys[i] == XFS_IEXT_KEY_INVALID)
367			break;
368	}
369
370	return i;
371}
372
373static int
374xfs_iext_leaf_nr_entries(
375	struct xfs_ifork	*ifp,
376	struct xfs_iext_leaf	*leaf,
377	int			start)
378{
379	int			i;
380
381	for (i = start; i < xfs_iext_max_recs(ifp); i++) {
382		if (xfs_iext_rec_is_empty(&leaf->recs[i]))
383			break;
384	}
385
386	return i;
387}
388
389static inline uint64_t
390xfs_iext_leaf_key(
391	struct xfs_iext_leaf	*leaf,
392	int			n)
393{
394	return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
395}
396
397static void
398xfs_iext_grow(
399	struct xfs_ifork	*ifp)
400{
401	struct xfs_iext_node	*node = kmem_zalloc(NODE_SIZE, KM_NOFS);
402	int			i;
403
404	if (ifp->if_height == 1) {
405		struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
406
407		node->keys[0] = xfs_iext_leaf_key(prev, 0);
408		node->ptrs[0] = prev;
409	} else  {
410		struct xfs_iext_node *prev = ifp->if_u1.if_root;
411
412		ASSERT(ifp->if_height > 1);
413
414		node->keys[0] = prev->keys[0];
415		node->ptrs[0] = prev;
416	}
417
418	for (i = 1; i < KEYS_PER_NODE; i++)
419		node->keys[i] = XFS_IEXT_KEY_INVALID;
420
421	ifp->if_u1.if_root = node;
422	ifp->if_height++;
423}
424
425static void
426xfs_iext_update_node(
427	struct xfs_ifork	*ifp,
428	xfs_fileoff_t		old_offset,
429	xfs_fileoff_t		new_offset,
430	int			level,
431	void			*ptr)
432{
433	struct xfs_iext_node	*node = ifp->if_u1.if_root;
434	int			height, i;
435
436	for (height = ifp->if_height; height > level; height--) {
437		for (i = 0; i < KEYS_PER_NODE; i++) {
438			if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
439				break;
440			if (node->keys[i] == old_offset)
441				node->keys[i] = new_offset;
442		}
443		node = node->ptrs[i - 1];
444		ASSERT(node);
445	}
446
447	ASSERT(node == ptr);
448}
449
450static struct xfs_iext_node *
451xfs_iext_split_node(
452	struct xfs_iext_node	**nodep,
453	int			*pos,
454	int			*nr_entries)
455{
456	struct xfs_iext_node	*node = *nodep;
457	struct xfs_iext_node	*new = kmem_zalloc(NODE_SIZE, KM_NOFS);
458	const int		nr_move = KEYS_PER_NODE / 2;
459	int			nr_keep = nr_move + (KEYS_PER_NODE & 1);
460	int			i = 0;
461
462	/* for sequential append operations just spill over into the new node */
463	if (*pos == KEYS_PER_NODE) {
464		*nodep = new;
465		*pos = 0;
466		*nr_entries = 0;
467		goto done;
468	}
469
470
471	for (i = 0; i < nr_move; i++) {
472		new->keys[i] = node->keys[nr_keep + i];
473		new->ptrs[i] = node->ptrs[nr_keep + i];
474
475		node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
476		node->ptrs[nr_keep + i] = NULL;
477	}
478
479	if (*pos >= nr_keep) {
480		*nodep = new;
481		*pos -= nr_keep;
482		*nr_entries = nr_move;
483	} else {
484		*nr_entries = nr_keep;
485	}
486done:
487	for (; i < KEYS_PER_NODE; i++)
488		new->keys[i] = XFS_IEXT_KEY_INVALID;
489	return new;
490}
491
492static void
493xfs_iext_insert_node(
494	struct xfs_ifork	*ifp,
495	uint64_t		offset,
496	void			*ptr,
497	int			level)
498{
499	struct xfs_iext_node	*node, *new;
500	int			i, pos, nr_entries;
501
502again:
503	if (ifp->if_height < level)
504		xfs_iext_grow(ifp);
505
506	new = NULL;
507	node = xfs_iext_find_level(ifp, offset, level);
508	pos = xfs_iext_node_insert_pos(node, offset);
509	nr_entries = xfs_iext_node_nr_entries(node, pos);
510
511	ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
512	ASSERT(nr_entries <= KEYS_PER_NODE);
513
514	if (nr_entries == KEYS_PER_NODE)
515		new = xfs_iext_split_node(&node, &pos, &nr_entries);
516
517	/*
518	 * Update the pointers in higher levels if the first entry changes
519	 * in an existing node.
520	 */
521	if (node != new && pos == 0 && nr_entries > 0)
522		xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
523
524	for (i = nr_entries; i > pos; i--) {
525		node->keys[i] = node->keys[i - 1];
526		node->ptrs[i] = node->ptrs[i - 1];
527	}
528	node->keys[pos] = offset;
529	node->ptrs[pos] = ptr;
530
531	if (new) {
532		offset = new->keys[0];
533		ptr = new;
534		level++;
535		goto again;
536	}
537}
538
539static struct xfs_iext_leaf *
540xfs_iext_split_leaf(
541	struct xfs_iext_cursor	*cur,
542	int			*nr_entries)
543{
544	struct xfs_iext_leaf	*leaf = cur->leaf;
545	struct xfs_iext_leaf	*new = kmem_zalloc(NODE_SIZE, KM_NOFS);
546	const int		nr_move = RECS_PER_LEAF / 2;
547	int			nr_keep = nr_move + (RECS_PER_LEAF & 1);
548	int			i;
549
550	/* for sequential append operations just spill over into the new node */
551	if (cur->pos == RECS_PER_LEAF) {
552		cur->leaf = new;
553		cur->pos = 0;
554		*nr_entries = 0;
555		goto done;
556	}
557
558	for (i = 0; i < nr_move; i++) {
559		new->recs[i] = leaf->recs[nr_keep + i];
560		xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
561	}
562
563	if (cur->pos >= nr_keep) {
564		cur->leaf = new;
565		cur->pos -= nr_keep;
566		*nr_entries = nr_move;
567	} else {
568		*nr_entries = nr_keep;
569	}
570done:
571	if (leaf->next)
572		leaf->next->prev = new;
573	new->next = leaf->next;
574	new->prev = leaf;
575	leaf->next = new;
576	return new;
577}
578
579static void
580xfs_iext_alloc_root(
581	struct xfs_ifork	*ifp,
582	struct xfs_iext_cursor	*cur)
583{
584	ASSERT(ifp->if_bytes == 0);
585
586	ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
587	ifp->if_height = 1;
588
589	/* now that we have a node step into it */
590	cur->leaf = ifp->if_u1.if_root;
591	cur->pos = 0;
592}
593
594static void
595xfs_iext_realloc_root(
596	struct xfs_ifork	*ifp,
597	struct xfs_iext_cursor	*cur)
598{
599	int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
600	void *new;
601
602	/* account for the prev/next pointers */
603	if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
604		new_size = NODE_SIZE;
605
606	new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL);
607	memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
608	ifp->if_u1.if_root = new;
609	cur->leaf = new;
610}
611
612/*
613 * Increment the sequence counter on extent tree changes. If we are on a COW
614 * fork, this allows the writeback code to skip looking for a COW extent if the
615 * COW fork hasn't changed. We use WRITE_ONCE here to ensure the update to the
616 * sequence counter is seen before the modifications to the extent tree itself
617 * take effect.
618 */
619static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
620{
621	WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
622}
623
624void
625xfs_iext_insert(
626	struct xfs_inode	*ip,
627	struct xfs_iext_cursor	*cur,
628	struct xfs_bmbt_irec	*irec,
629	int			state)
630{
631	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
632	xfs_fileoff_t		offset = irec->br_startoff;
633	struct xfs_iext_leaf	*new = NULL;
634	int			nr_entries, i;
635
636	xfs_iext_inc_seq(ifp);
637
638	if (ifp->if_height == 0)
639		xfs_iext_alloc_root(ifp, cur);
640	else if (ifp->if_height == 1)
641		xfs_iext_realloc_root(ifp, cur);
642
643	nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
644	ASSERT(nr_entries <= RECS_PER_LEAF);
645	ASSERT(cur->pos >= nr_entries ||
646	       xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
647
648	if (nr_entries == RECS_PER_LEAF)
649		new = xfs_iext_split_leaf(cur, &nr_entries);
650
651	/*
652	 * Update the pointers in higher levels if the first entry changes
653	 * in an existing node.
654	 */
655	if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
656		xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
657				offset, 1, cur->leaf);
658	}
659
660	for (i = nr_entries; i > cur->pos; i--)
661		cur->leaf->recs[i] = cur->leaf->recs[i - 1];
662	xfs_iext_set(cur_rec(cur), irec);
663	ifp->if_bytes += sizeof(struct xfs_iext_rec);
664
665	trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
666
667	if (new)
668		xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
669}
670
671static struct xfs_iext_node *
672xfs_iext_rebalance_node(
673	struct xfs_iext_node	*parent,
674	int			*pos,
675	struct xfs_iext_node	*node,
676	int			nr_entries)
677{
678	/*
679	 * If the neighbouring nodes are completely full, or have different
680	 * parents, we might never be able to merge our node, and will only
681	 * delete it once the number of entries hits zero.
682	 */
683	if (nr_entries == 0)
684		return node;
685
686	if (*pos > 0) {
687		struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
688		int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
689
690		if (nr_prev + nr_entries <= KEYS_PER_NODE) {
691			for (i = 0; i < nr_entries; i++) {
692				prev->keys[nr_prev + i] = node->keys[i];
693				prev->ptrs[nr_prev + i] = node->ptrs[i];
694			}
695			return node;
696		}
697	}
698
699	if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
700		struct xfs_iext_node *next = parent->ptrs[*pos + 1];
701		int nr_next = xfs_iext_node_nr_entries(next, 0), i;
702
703		if (nr_entries + nr_next <= KEYS_PER_NODE) {
704			/*
705			 * Merge the next node into this node so that we don't
706			 * have to do an additional update of the keys in the
707			 * higher levels.
708			 */
709			for (i = 0; i < nr_next; i++) {
710				node->keys[nr_entries + i] = next->keys[i];
711				node->ptrs[nr_entries + i] = next->ptrs[i];
712			}
713
714			++*pos;
715			return next;
716		}
717	}
718
719	return NULL;
720}
721
722static void
723xfs_iext_remove_node(
724	struct xfs_ifork	*ifp,
725	xfs_fileoff_t		offset,
726	void			*victim)
727{
728	struct xfs_iext_node	*node, *parent;
729	int			level = 2, pos, nr_entries, i;
730
731	ASSERT(level <= ifp->if_height);
732	node = xfs_iext_find_level(ifp, offset, level);
733	pos = xfs_iext_node_pos(node, offset);
734again:
735	ASSERT(node->ptrs[pos]);
736	ASSERT(node->ptrs[pos] == victim);
737	kmem_free(victim);
738
739	nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
740	offset = node->keys[0];
741	for (i = pos; i < nr_entries; i++) {
742		node->keys[i] = node->keys[i + 1];
743		node->ptrs[i] = node->ptrs[i + 1];
744	}
745	node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
746	node->ptrs[nr_entries] = NULL;
747
748	if (pos == 0 && nr_entries > 0) {
749		xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
750		offset = node->keys[0];
751	}
752
753	if (nr_entries >= KEYS_PER_NODE / 2)
754		return;
755
756	if (level < ifp->if_height) {
757		/*
758		 * If we aren't at the root yet try to find a neighbour node to
759		 * merge with (or delete the node if it is empty), and then
760		 * recurse up to the next level.
761		 */
762		level++;
763		parent = xfs_iext_find_level(ifp, offset, level);
764		pos = xfs_iext_node_pos(parent, offset);
765
766		ASSERT(pos != KEYS_PER_NODE);
767		ASSERT(parent->ptrs[pos] == node);
768
769		node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
770		if (node) {
771			victim = node;
772			node = parent;
773			goto again;
774		}
775	} else if (nr_entries == 1) {
776		/*
777		 * If we are at the root and only one entry is left we can just
778		 * free this node and update the root pointer.
779		 */
780		ASSERT(node == ifp->if_u1.if_root);
781		ifp->if_u1.if_root = node->ptrs[0];
782		ifp->if_height--;
783		kmem_free(node);
784	}
785}
786
787static void
788xfs_iext_rebalance_leaf(
789	struct xfs_ifork	*ifp,
790	struct xfs_iext_cursor	*cur,
791	struct xfs_iext_leaf	*leaf,
792	xfs_fileoff_t		offset,
793	int			nr_entries)
794{
795	/*
796	 * If the neighbouring nodes are completely full we might never be able
797	 * to merge our node, and will only delete it once the number of
798	 * entries hits zero.
799	 */
800	if (nr_entries == 0)
801		goto remove_node;
802
803	if (leaf->prev) {
804		int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
805
806		if (nr_prev + nr_entries <= RECS_PER_LEAF) {
807			for (i = 0; i < nr_entries; i++)
808				leaf->prev->recs[nr_prev + i] = leaf->recs[i];
809
810			if (cur->leaf == leaf) {
811				cur->leaf = leaf->prev;
812				cur->pos += nr_prev;
813			}
814			goto remove_node;
815		}
816	}
817
818	if (leaf->next) {
819		int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
820
821		if (nr_entries + nr_next <= RECS_PER_LEAF) {
822			/*
823			 * Merge the next node into this node so that we don't
824			 * have to do an additional update of the keys in the
825			 * higher levels.
826			 */
827			for (i = 0; i < nr_next; i++) {
828				leaf->recs[nr_entries + i] =
829					leaf->next->recs[i];
830			}
831
832			if (cur->leaf == leaf->next) {
833				cur->leaf = leaf;
834				cur->pos += nr_entries;
835			}
836
837			offset = xfs_iext_leaf_key(leaf->next, 0);
838			leaf = leaf->next;
839			goto remove_node;
840		}
841	}
842
843	return;
844remove_node:
845	if (leaf->prev)
846		leaf->prev->next = leaf->next;
847	if (leaf->next)
848		leaf->next->prev = leaf->prev;
849	xfs_iext_remove_node(ifp, offset, leaf);
850}
851
852static void
853xfs_iext_free_last_leaf(
854	struct xfs_ifork	*ifp)
855{
856	ifp->if_height--;
857	kmem_free(ifp->if_u1.if_root);
858	ifp->if_u1.if_root = NULL;
859}
860
861void
862xfs_iext_remove(
863	struct xfs_inode	*ip,
864	struct xfs_iext_cursor	*cur,
865	int			state)
866{
867	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
868	struct xfs_iext_leaf	*leaf = cur->leaf;
869	xfs_fileoff_t		offset = xfs_iext_leaf_key(leaf, 0);
870	int			i, nr_entries;
871
872	trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
873
874	ASSERT(ifp->if_height > 0);
875	ASSERT(ifp->if_u1.if_root != NULL);
876	ASSERT(xfs_iext_valid(ifp, cur));
877
878	xfs_iext_inc_seq(ifp);
879
880	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
881	for (i = cur->pos; i < nr_entries; i++)
882		leaf->recs[i] = leaf->recs[i + 1];
883	xfs_iext_rec_clear(&leaf->recs[nr_entries]);
884	ifp->if_bytes -= sizeof(struct xfs_iext_rec);
885
886	if (cur->pos == 0 && nr_entries > 0) {
887		xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
888				leaf);
889		offset = xfs_iext_leaf_key(leaf, 0);
890	} else if (cur->pos == nr_entries) {
891		if (ifp->if_height > 1 && leaf->next)
892			cur->leaf = leaf->next;
893		else
894			cur->leaf = NULL;
895		cur->pos = 0;
896	}
897
898	if (nr_entries >= RECS_PER_LEAF / 2)
899		return;
900
901	if (ifp->if_height > 1)
902		xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
903	else if (nr_entries == 0)
904		xfs_iext_free_last_leaf(ifp);
905}
906
907/*
908 * Lookup the extent covering bno.
909 *
910 * If there is an extent covering bno return the extent index, and store the
911 * expanded extent structure in *gotp, and the extent cursor in *cur.
912 * If there is no extent covering bno, but there is an extent after it (e.g.
913 * it lies in a hole) return that extent in *gotp and its cursor in *cur
914 * instead.
915 * If bno is beyond the last extent return false, and return an invalid
916 * cursor value.
917 */
918bool
919xfs_iext_lookup_extent(
920	struct xfs_inode	*ip,
921	struct xfs_ifork	*ifp,
922	xfs_fileoff_t		offset,
923	struct xfs_iext_cursor	*cur,
924	struct xfs_bmbt_irec	*gotp)
925{
926	XFS_STATS_INC(ip->i_mount, xs_look_exlist);
927
928	cur->leaf = xfs_iext_find_level(ifp, offset, 1);
929	if (!cur->leaf) {
930		cur->pos = 0;
931		return false;
932	}
933
934	for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
935		struct xfs_iext_rec *rec = cur_rec(cur);
936
937		if (xfs_iext_rec_is_empty(rec))
938			break;
939		if (xfs_iext_rec_cmp(rec, offset) >= 0)
940			goto found;
941	}
942
943	/* Try looking in the next node for an entry > offset */
944	if (ifp->if_height == 1 || !cur->leaf->next)
945		return false;
946	cur->leaf = cur->leaf->next;
947	cur->pos = 0;
948	if (!xfs_iext_valid(ifp, cur))
949		return false;
950found:
951	xfs_iext_get(gotp, cur_rec(cur));
952	return true;
953}
954
955/*
956 * Returns the last extent before end, and if this extent doesn't cover
957 * end, update end to the end of the extent.
958 */
959bool
960xfs_iext_lookup_extent_before(
961	struct xfs_inode	*ip,
962	struct xfs_ifork	*ifp,
963	xfs_fileoff_t		*end,
964	struct xfs_iext_cursor	*cur,
965	struct xfs_bmbt_irec	*gotp)
966{
967	/* could be optimized to not even look up the next on a match.. */
968	if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
969	    gotp->br_startoff <= *end - 1)
970		return true;
971	if (!xfs_iext_prev_extent(ifp, cur, gotp))
972		return false;
973	*end = gotp->br_startoff + gotp->br_blockcount;
974	return true;
975}
976
977void
978xfs_iext_update_extent(
979	struct xfs_inode	*ip,
980	int			state,
981	struct xfs_iext_cursor	*cur,
982	struct xfs_bmbt_irec	*new)
983{
984	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
985
986	xfs_iext_inc_seq(ifp);
987
988	if (cur->pos == 0) {
989		struct xfs_bmbt_irec	old;
990
991		xfs_iext_get(&old, cur_rec(cur));
992		if (new->br_startoff != old.br_startoff) {
993			xfs_iext_update_node(ifp, old.br_startoff,
994					new->br_startoff, 1, cur->leaf);
995		}
996	}
997
998	trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
999	xfs_iext_set(cur_rec(cur), new);
1000	trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
1001}
1002
1003/*
1004 * Return true if the cursor points at an extent and return the extent structure
1005 * in gotp.  Else return false.
1006 */
1007bool
1008xfs_iext_get_extent(
1009	struct xfs_ifork	*ifp,
1010	struct xfs_iext_cursor	*cur,
1011	struct xfs_bmbt_irec	*gotp)
1012{
1013	if (!xfs_iext_valid(ifp, cur))
1014		return false;
1015	xfs_iext_get(gotp, cur_rec(cur));
1016	return true;
1017}
1018
1019/*
1020 * This is a recursive function, because of that we need to be extremely
1021 * careful with stack usage.
1022 */
1023static void
1024xfs_iext_destroy_node(
1025	struct xfs_iext_node	*node,
1026	int			level)
1027{
1028	int			i;
1029
1030	if (level > 1) {
1031		for (i = 0; i < KEYS_PER_NODE; i++) {
1032			if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1033				break;
1034			xfs_iext_destroy_node(node->ptrs[i], level - 1);
1035		}
1036	}
1037
1038	kmem_free(node);
1039}
1040
1041void
1042xfs_iext_destroy(
1043	struct xfs_ifork	*ifp)
1044{
1045	xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
1046
1047	ifp->if_bytes = 0;
1048	ifp->if_height = 0;
1049	ifp->if_u1.if_root = NULL;
1050}
1051