xref: /kernel/linux/linux-6.6/fs/gfs2/bmap.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
5 */
6
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
10#include <linux/blkdev.h>
11#include <linux/gfs2_ondisk.h>
12#include <linux/crc32.h>
13#include <linux/iomap.h>
14#include <linux/ktime.h>
15
16#include "gfs2.h"
17#include "incore.h"
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
21#include "meta_io.h"
22#include "quota.h"
23#include "rgrp.h"
24#include "log.h"
25#include "super.h"
26#include "trans.h"
27#include "dir.h"
28#include "util.h"
29#include "aops.h"
30#include "trace_gfs2.h"
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
37	struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
38	__u16 mp_list[GFS2_MAX_META_HEIGHT];
39	int mp_fheight; /* find_metapath height */
40	int mp_aheight; /* actual height (lookup height) */
41};
42
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
45/**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
50 * @page: The (optional) page. This is looked up if @page is NULL
51 *
52 * Returns: errno
53 */
54
55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
56			       u64 block, struct page *page)
57{
58	struct inode *inode = &ip->i_inode;
59
60	if (!PageUptodate(page)) {
61		void *kaddr = kmap(page);
62		u64 dsize = i_size_read(inode);
63
64		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
65		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
66		kunmap(page);
67
68		SetPageUptodate(page);
69	}
70
71	if (gfs2_is_jdata(ip)) {
72		struct buffer_head *bh;
73
74		if (!page_has_buffers(page))
75			create_empty_buffers(page, BIT(inode->i_blkbits),
76					     BIT(BH_Uptodate));
77
78		bh = page_buffers(page);
79		if (!buffer_mapped(bh))
80			map_bh(bh, inode->i_sb, block);
81
82		set_buffer_uptodate(bh);
83		gfs2_trans_add_data(ip->i_gl, bh);
84	} else {
85		set_page_dirty(page);
86		gfs2_ordered_add_inode(ip);
87	}
88
89	return 0;
90}
91
92static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
93{
94	struct buffer_head *bh, *dibh;
95	struct gfs2_dinode *di;
96	u64 block = 0;
97	int isdir = gfs2_is_dir(ip);
98	int error;
99
100	error = gfs2_meta_inode_buffer(ip, &dibh);
101	if (error)
102		return error;
103
104	if (i_size_read(&ip->i_inode)) {
105		/* Get a free block, fill it with the stuffed data,
106		   and write it out to disk */
107
108		unsigned int n = 1;
109		error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
110		if (error)
111			goto out_brelse;
112		if (isdir) {
113			gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
114			error = gfs2_dir_get_new_buffer(ip, block, &bh);
115			if (error)
116				goto out_brelse;
117			gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
118					      dibh, sizeof(struct gfs2_dinode));
119			brelse(bh);
120		} else {
121			error = gfs2_unstuffer_page(ip, dibh, block, page);
122			if (error)
123				goto out_brelse;
124		}
125	}
126
127	/*  Set up the pointer to the new block  */
128
129	gfs2_trans_add_meta(ip->i_gl, dibh);
130	di = (struct gfs2_dinode *)dibh->b_data;
131	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
132
133	if (i_size_read(&ip->i_inode)) {
134		*(__be64 *)(di + 1) = cpu_to_be64(block);
135		gfs2_add_inode_blocks(&ip->i_inode, 1);
136		di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
137	}
138
139	ip->i_height = 1;
140	di->di_height = cpu_to_be16(1);
141
142out_brelse:
143	brelse(dibh);
144	return error;
145}
146
147/**
148 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
149 * @ip: The GFS2 inode to unstuff
150 *
151 * This routine unstuffs a dinode and returns it to a "normal" state such
152 * that the height can be grown in the traditional way.
153 *
154 * Returns: errno
155 */
156
157int gfs2_unstuff_dinode(struct gfs2_inode *ip)
158{
159	struct inode *inode = &ip->i_inode;
160	struct page *page;
161	int error;
162
163	down_write(&ip->i_rw_mutex);
164	page = grab_cache_page(inode->i_mapping, 0);
165	error = -ENOMEM;
166	if (!page)
167		goto out;
168	error = __gfs2_unstuff_inode(ip, page);
169	unlock_page(page);
170	put_page(page);
171out:
172	up_write(&ip->i_rw_mutex);
173	return error;
174}
175
176/**
177 * find_metapath - Find path through the metadata tree
178 * @sdp: The superblock
179 * @block: The disk block to look up
180 * @mp: The metapath to return the result in
181 * @height: The pre-calculated height of the metadata tree
182 *
183 *   This routine returns a struct metapath structure that defines a path
184 *   through the metadata of inode "ip" to get to block "block".
185 *
186 *   Example:
187 *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
188 *   filesystem with a blocksize of 4096.
189 *
190 *   find_metapath() would return a struct metapath structure set to:
191 *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
192 *
193 *   That means that in order to get to the block containing the byte at
194 *   offset 101342453, we would load the indirect block pointed to by pointer
195 *   0 in the dinode.  We would then load the indirect block pointed to by
196 *   pointer 48 in that indirect block.  We would then load the data block
197 *   pointed to by pointer 165 in that indirect block.
198 *
199 *             ----------------------------------------
200 *             | Dinode |                             |
201 *             |        |                            4|
202 *             |        |0 1 2 3 4 5                 9|
203 *             |        |                            6|
204 *             ----------------------------------------
205 *                       |
206 *                       |
207 *                       V
208 *             ----------------------------------------
209 *             | Indirect Block                       |
210 *             |                                     5|
211 *             |            4 4 4 4 4 5 5            1|
212 *             |0           5 6 7 8 9 0 1            2|
213 *             ----------------------------------------
214 *                                |
215 *                                |
216 *                                V
217 *             ----------------------------------------
218 *             | Indirect Block                       |
219 *             |                         1 1 1 1 1   5|
220 *             |                         6 6 6 6 6   1|
221 *             |0                        3 4 5 6 7   2|
222 *             ----------------------------------------
223 *                                           |
224 *                                           |
225 *                                           V
226 *             ----------------------------------------
227 *             | Data block containing offset         |
228 *             |            101342453                 |
229 *             |                                      |
230 *             |                                      |
231 *             ----------------------------------------
232 *
233 */
234
235static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
236			  struct metapath *mp, unsigned int height)
237{
238	unsigned int i;
239
240	mp->mp_fheight = height;
241	for (i = height; i--;)
242		mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
243}
244
245static inline unsigned int metapath_branch_start(const struct metapath *mp)
246{
247	if (mp->mp_list[0] == 0)
248		return 2;
249	return 1;
250}
251
252/**
253 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
254 * @height: The metadata height (0 = dinode)
255 * @mp: The metapath
256 */
257static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
258{
259	struct buffer_head *bh = mp->mp_bh[height];
260	if (height == 0)
261		return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
262	return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
263}
264
265/**
266 * metapointer - Return pointer to start of metadata in a buffer
267 * @height: The metadata height (0 = dinode)
268 * @mp: The metapath
269 *
270 * Return a pointer to the block number of the next height of the metadata
271 * tree given a buffer containing the pointer to the current height of the
272 * metadata tree.
273 */
274
275static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
276{
277	__be64 *p = metaptr1(height, mp);
278	return p + mp->mp_list[height];
279}
280
281static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
282{
283	const struct buffer_head *bh = mp->mp_bh[height];
284	return (const __be64 *)(bh->b_data + bh->b_size);
285}
286
287static void clone_metapath(struct metapath *clone, struct metapath *mp)
288{
289	unsigned int hgt;
290
291	*clone = *mp;
292	for (hgt = 0; hgt < mp->mp_aheight; hgt++)
293		get_bh(clone->mp_bh[hgt]);
294}
295
296static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
297{
298	const __be64 *t;
299
300	for (t = start; t < end; t++) {
301		struct buffer_head *rabh;
302
303		if (!*t)
304			continue;
305
306		rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
307		if (trylock_buffer(rabh)) {
308			if (!buffer_uptodate(rabh)) {
309				rabh->b_end_io = end_buffer_read_sync;
310				submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META |
311					  REQ_PRIO, rabh);
312				continue;
313			}
314			unlock_buffer(rabh);
315		}
316		brelse(rabh);
317	}
318}
319
320static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
321			     unsigned int x, unsigned int h)
322{
323	for (; x < h; x++) {
324		__be64 *ptr = metapointer(x, mp);
325		u64 dblock = be64_to_cpu(*ptr);
326		int ret;
327
328		if (!dblock)
329			break;
330		ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]);
331		if (ret)
332			return ret;
333	}
334	mp->mp_aheight = x + 1;
335	return 0;
336}
337
338/**
339 * lookup_metapath - Walk the metadata tree to a specific point
340 * @ip: The inode
341 * @mp: The metapath
342 *
343 * Assumes that the inode's buffer has already been looked up and
344 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
345 * by find_metapath().
346 *
347 * If this function encounters part of the tree which has not been
348 * allocated, it returns the current height of the tree at the point
349 * at which it found the unallocated block. Blocks which are found are
350 * added to the mp->mp_bh[] list.
351 *
352 * Returns: error
353 */
354
355static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
356{
357	return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
358}
359
360/**
361 * fillup_metapath - fill up buffers for the metadata path to a specific height
362 * @ip: The inode
363 * @mp: The metapath
364 * @h: The height to which it should be mapped
365 *
366 * Similar to lookup_metapath, but does lookups for a range of heights
367 *
368 * Returns: error or the number of buffers filled
369 */
370
371static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
372{
373	unsigned int x = 0;
374	int ret;
375
376	if (h) {
377		/* find the first buffer we need to look up. */
378		for (x = h - 1; x > 0; x--) {
379			if (mp->mp_bh[x])
380				break;
381		}
382	}
383	ret = __fillup_metapath(ip, mp, x, h);
384	if (ret)
385		return ret;
386	return mp->mp_aheight - x - 1;
387}
388
389static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
390{
391	sector_t factor = 1, block = 0;
392	int hgt;
393
394	for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
395		if (hgt < mp->mp_aheight)
396			block += mp->mp_list[hgt] * factor;
397		factor *= sdp->sd_inptrs;
398	}
399	return block;
400}
401
402static void release_metapath(struct metapath *mp)
403{
404	int i;
405
406	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
407		if (mp->mp_bh[i] == NULL)
408			break;
409		brelse(mp->mp_bh[i]);
410		mp->mp_bh[i] = NULL;
411	}
412}
413
414/**
415 * gfs2_extent_length - Returns length of an extent of blocks
416 * @bh: The metadata block
417 * @ptr: Current position in @bh
418 * @limit: Max extent length to return
419 * @eob: Set to 1 if we hit "end of block"
420 *
421 * Returns: The length of the extent (minimum of one block)
422 */
423
424static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
425{
426	const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
427	const __be64 *first = ptr;
428	u64 d = be64_to_cpu(*ptr);
429
430	*eob = 0;
431	do {
432		ptr++;
433		if (ptr >= end)
434			break;
435		d++;
436	} while(be64_to_cpu(*ptr) == d);
437	if (ptr >= end)
438		*eob = 1;
439	return ptr - first;
440}
441
442enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
443
444/*
445 * gfs2_metadata_walker - walk an indirect block
446 * @mp: Metapath to indirect block
447 * @ptrs: Number of pointers to look at
448 *
449 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
450 * indirect block to follow.
451 */
452typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
453						   unsigned int ptrs);
454
455/*
456 * gfs2_walk_metadata - walk a tree of indirect blocks
457 * @inode: The inode
458 * @mp: Starting point of walk
459 * @max_len: Maximum number of blocks to walk
460 * @walker: Called during the walk
461 *
462 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
463 * past the end of metadata, and a negative error code otherwise.
464 */
465
466static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
467		u64 max_len, gfs2_metadata_walker walker)
468{
469	struct gfs2_inode *ip = GFS2_I(inode);
470	struct gfs2_sbd *sdp = GFS2_SB(inode);
471	u64 factor = 1;
472	unsigned int hgt;
473	int ret;
474
475	/*
476	 * The walk starts in the lowest allocated indirect block, which may be
477	 * before the position indicated by @mp.  Adjust @max_len accordingly
478	 * to avoid a short walk.
479	 */
480	for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
481		max_len += mp->mp_list[hgt] * factor;
482		mp->mp_list[hgt] = 0;
483		factor *= sdp->sd_inptrs;
484	}
485
486	for (;;) {
487		u16 start = mp->mp_list[hgt];
488		enum walker_status status;
489		unsigned int ptrs;
490		u64 len;
491
492		/* Walk indirect block. */
493		ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
494		len = ptrs * factor;
495		if (len > max_len)
496			ptrs = DIV_ROUND_UP_ULL(max_len, factor);
497		status = walker(mp, ptrs);
498		switch (status) {
499		case WALK_STOP:
500			return 1;
501		case WALK_FOLLOW:
502			BUG_ON(mp->mp_aheight == mp->mp_fheight);
503			ptrs = mp->mp_list[hgt] - start;
504			len = ptrs * factor;
505			break;
506		case WALK_CONTINUE:
507			break;
508		}
509		if (len >= max_len)
510			break;
511		max_len -= len;
512		if (status == WALK_FOLLOW)
513			goto fill_up_metapath;
514
515lower_metapath:
516		/* Decrease height of metapath. */
517		brelse(mp->mp_bh[hgt]);
518		mp->mp_bh[hgt] = NULL;
519		mp->mp_list[hgt] = 0;
520		if (!hgt)
521			break;
522		hgt--;
523		factor *= sdp->sd_inptrs;
524
525		/* Advance in metadata tree. */
526		(mp->mp_list[hgt])++;
527		if (hgt) {
528			if (mp->mp_list[hgt] >= sdp->sd_inptrs)
529				goto lower_metapath;
530		} else {
531			if (mp->mp_list[hgt] >= sdp->sd_diptrs)
532				break;
533		}
534
535fill_up_metapath:
536		/* Increase height of metapath. */
537		ret = fillup_metapath(ip, mp, ip->i_height - 1);
538		if (ret < 0)
539			return ret;
540		hgt += ret;
541		for (; ret; ret--)
542			do_div(factor, sdp->sd_inptrs);
543		mp->mp_aheight = hgt + 1;
544	}
545	return 0;
546}
547
548static enum walker_status gfs2_hole_walker(struct metapath *mp,
549					   unsigned int ptrs)
550{
551	const __be64 *start, *ptr, *end;
552	unsigned int hgt;
553
554	hgt = mp->mp_aheight - 1;
555	start = metapointer(hgt, mp);
556	end = start + ptrs;
557
558	for (ptr = start; ptr < end; ptr++) {
559		if (*ptr) {
560			mp->mp_list[hgt] += ptr - start;
561			if (mp->mp_aheight == mp->mp_fheight)
562				return WALK_STOP;
563			return WALK_FOLLOW;
564		}
565	}
566	return WALK_CONTINUE;
567}
568
569/**
570 * gfs2_hole_size - figure out the size of a hole
571 * @inode: The inode
572 * @lblock: The logical starting block number
573 * @len: How far to look (in blocks)
574 * @mp: The metapath at lblock
575 * @iomap: The iomap to store the hole size in
576 *
577 * This function modifies @mp.
578 *
579 * Returns: errno on error
580 */
581static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
582			  struct metapath *mp, struct iomap *iomap)
583{
584	struct metapath clone;
585	u64 hole_size;
586	int ret;
587
588	clone_metapath(&clone, mp);
589	ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
590	if (ret < 0)
591		goto out;
592
593	if (ret == 1)
594		hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
595	else
596		hole_size = len;
597	iomap->length = hole_size << inode->i_blkbits;
598	ret = 0;
599
600out:
601	release_metapath(&clone);
602	return ret;
603}
604
605static inline void gfs2_indirect_init(struct metapath *mp,
606				      struct gfs2_glock *gl, unsigned int i,
607				      unsigned offset, u64 bn)
608{
609	__be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
610		       ((i > 1) ? sizeof(struct gfs2_meta_header) :
611				 sizeof(struct gfs2_dinode)));
612	BUG_ON(i < 1);
613	BUG_ON(mp->mp_bh[i] != NULL);
614	mp->mp_bh[i] = gfs2_meta_new(gl, bn);
615	gfs2_trans_add_meta(gl, mp->mp_bh[i]);
616	gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
617	gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
618	ptr += offset;
619	*ptr = cpu_to_be64(bn);
620}
621
622enum alloc_state {
623	ALLOC_DATA = 0,
624	ALLOC_GROW_DEPTH = 1,
625	ALLOC_GROW_HEIGHT = 2,
626	/* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
627};
628
629/**
630 * __gfs2_iomap_alloc - Build a metadata tree of the requested height
631 * @inode: The GFS2 inode
632 * @iomap: The iomap structure
633 * @mp: The metapath, with proper height information calculated
634 *
635 * In this routine we may have to alloc:
636 *   i) Indirect blocks to grow the metadata tree height
637 *  ii) Indirect blocks to fill in lower part of the metadata tree
638 * iii) Data blocks
639 *
640 * This function is called after __gfs2_iomap_get, which works out the
641 * total number of blocks which we need via gfs2_alloc_size.
642 *
643 * We then do the actual allocation asking for an extent at a time (if
644 * enough contiguous free blocks are available, there will only be one
645 * allocation request per call) and uses the state machine to initialise
646 * the blocks in order.
647 *
648 * Right now, this function will allocate at most one indirect block
649 * worth of data -- with a default block size of 4K, that's slightly
650 * less than 2M.  If this limitation is ever removed to allow huge
651 * allocations, we would probably still want to limit the iomap size we
652 * return to avoid stalling other tasks during huge writes; the next
653 * iomap iteration would then find the blocks already allocated.
654 *
655 * Returns: errno on error
656 */
657
658static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
659			      struct metapath *mp)
660{
661	struct gfs2_inode *ip = GFS2_I(inode);
662	struct gfs2_sbd *sdp = GFS2_SB(inode);
663	struct buffer_head *dibh = mp->mp_bh[0];
664	u64 bn;
665	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
666	size_t dblks = iomap->length >> inode->i_blkbits;
667	const unsigned end_of_metadata = mp->mp_fheight - 1;
668	int ret;
669	enum alloc_state state;
670	__be64 *ptr;
671	__be64 zero_bn = 0;
672
673	BUG_ON(mp->mp_aheight < 1);
674	BUG_ON(dibh == NULL);
675	BUG_ON(dblks < 1);
676
677	gfs2_trans_add_meta(ip->i_gl, dibh);
678
679	down_write(&ip->i_rw_mutex);
680
681	if (mp->mp_fheight == mp->mp_aheight) {
682		/* Bottom indirect block exists */
683		state = ALLOC_DATA;
684	} else {
685		/* Need to allocate indirect blocks */
686		if (mp->mp_fheight == ip->i_height) {
687			/* Writing into existing tree, extend tree down */
688			iblks = mp->mp_fheight - mp->mp_aheight;
689			state = ALLOC_GROW_DEPTH;
690		} else {
691			/* Building up tree height */
692			state = ALLOC_GROW_HEIGHT;
693			iblks = mp->mp_fheight - ip->i_height;
694			branch_start = metapath_branch_start(mp);
695			iblks += (mp->mp_fheight - branch_start);
696		}
697	}
698
699	/* start of the second part of the function (state machine) */
700
701	blks = dblks + iblks;
702	i = mp->mp_aheight;
703	do {
704		n = blks - alloced;
705		ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
706		if (ret)
707			goto out;
708		alloced += n;
709		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
710			gfs2_trans_remove_revoke(sdp, bn, n);
711		switch (state) {
712		/* Growing height of tree */
713		case ALLOC_GROW_HEIGHT:
714			if (i == 1) {
715				ptr = (__be64 *)(dibh->b_data +
716						 sizeof(struct gfs2_dinode));
717				zero_bn = *ptr;
718			}
719			for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
720			     i++, n--)
721				gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
722			if (i - 1 == mp->mp_fheight - ip->i_height) {
723				i--;
724				gfs2_buffer_copy_tail(mp->mp_bh[i],
725						sizeof(struct gfs2_meta_header),
726						dibh, sizeof(struct gfs2_dinode));
727				gfs2_buffer_clear_tail(dibh,
728						sizeof(struct gfs2_dinode) +
729						sizeof(__be64));
730				ptr = (__be64 *)(mp->mp_bh[i]->b_data +
731					sizeof(struct gfs2_meta_header));
732				*ptr = zero_bn;
733				state = ALLOC_GROW_DEPTH;
734				for(i = branch_start; i < mp->mp_fheight; i++) {
735					if (mp->mp_bh[i] == NULL)
736						break;
737					brelse(mp->mp_bh[i]);
738					mp->mp_bh[i] = NULL;
739				}
740				i = branch_start;
741			}
742			if (n == 0)
743				break;
744			fallthrough;	/* To branching from existing tree */
745		case ALLOC_GROW_DEPTH:
746			if (i > 1 && i < mp->mp_fheight)
747				gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
748			for (; i < mp->mp_fheight && n > 0; i++, n--)
749				gfs2_indirect_init(mp, ip->i_gl, i,
750						   mp->mp_list[i-1], bn++);
751			if (i == mp->mp_fheight)
752				state = ALLOC_DATA;
753			if (n == 0)
754				break;
755			fallthrough;	/* To tree complete, adding data blocks */
756		case ALLOC_DATA:
757			BUG_ON(n > dblks);
758			BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
759			gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
760			dblks = n;
761			ptr = metapointer(end_of_metadata, mp);
762			iomap->addr = bn << inode->i_blkbits;
763			iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
764			while (n-- > 0)
765				*ptr++ = cpu_to_be64(bn++);
766			break;
767		}
768	} while (iomap->addr == IOMAP_NULL_ADDR);
769
770	iomap->type = IOMAP_MAPPED;
771	iomap->length = (u64)dblks << inode->i_blkbits;
772	ip->i_height = mp->mp_fheight;
773	gfs2_add_inode_blocks(&ip->i_inode, alloced);
774	gfs2_dinode_out(ip, dibh->b_data);
775out:
776	up_write(&ip->i_rw_mutex);
777	return ret;
778}
779
780#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
781
782/**
783 * gfs2_alloc_size - Compute the maximum allocation size
784 * @inode: The inode
785 * @mp: The metapath
786 * @size: Requested size in blocks
787 *
788 * Compute the maximum size of the next allocation at @mp.
789 *
790 * Returns: size in blocks
791 */
792static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
793{
794	struct gfs2_inode *ip = GFS2_I(inode);
795	struct gfs2_sbd *sdp = GFS2_SB(inode);
796	const __be64 *first, *ptr, *end;
797
798	/*
799	 * For writes to stuffed files, this function is called twice via
800	 * __gfs2_iomap_get, before and after unstuffing. The size we return the
801	 * first time needs to be large enough to get the reservation and
802	 * allocation sizes right.  The size we return the second time must
803	 * be exact or else __gfs2_iomap_alloc won't do the right thing.
804	 */
805
806	if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
807		unsigned int maxsize = mp->mp_fheight > 1 ?
808			sdp->sd_inptrs : sdp->sd_diptrs;
809		maxsize -= mp->mp_list[mp->mp_fheight - 1];
810		if (size > maxsize)
811			size = maxsize;
812		return size;
813	}
814
815	first = metapointer(ip->i_height - 1, mp);
816	end = metaend(ip->i_height - 1, mp);
817	if (end - first > size)
818		end = first + size;
819	for (ptr = first; ptr < end; ptr++) {
820		if (*ptr)
821			break;
822	}
823	return ptr - first;
824}
825
826/**
827 * __gfs2_iomap_get - Map blocks from an inode to disk blocks
828 * @inode: The inode
829 * @pos: Starting position in bytes
830 * @length: Length to map, in bytes
831 * @flags: iomap flags
832 * @iomap: The iomap structure
833 * @mp: The metapath
834 *
835 * Returns: errno
836 */
837static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
838			    unsigned flags, struct iomap *iomap,
839			    struct metapath *mp)
840{
841	struct gfs2_inode *ip = GFS2_I(inode);
842	struct gfs2_sbd *sdp = GFS2_SB(inode);
843	loff_t size = i_size_read(inode);
844	__be64 *ptr;
845	sector_t lblock;
846	sector_t lblock_stop;
847	int ret;
848	int eob;
849	u64 len;
850	struct buffer_head *dibh = NULL, *bh;
851	u8 height;
852
853	if (!length)
854		return -EINVAL;
855
856	down_read(&ip->i_rw_mutex);
857
858	ret = gfs2_meta_inode_buffer(ip, &dibh);
859	if (ret)
860		goto unlock;
861	mp->mp_bh[0] = dibh;
862
863	if (gfs2_is_stuffed(ip)) {
864		if (flags & IOMAP_WRITE) {
865			loff_t max_size = gfs2_max_stuffed_size(ip);
866
867			if (pos + length > max_size)
868				goto unstuff;
869			iomap->length = max_size;
870		} else {
871			if (pos >= size) {
872				if (flags & IOMAP_REPORT) {
873					ret = -ENOENT;
874					goto unlock;
875				} else {
876					iomap->offset = pos;
877					iomap->length = length;
878					goto hole_found;
879				}
880			}
881			iomap->length = size;
882		}
883		iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
884			      sizeof(struct gfs2_dinode);
885		iomap->type = IOMAP_INLINE;
886		iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
887		goto out;
888	}
889
890unstuff:
891	lblock = pos >> inode->i_blkbits;
892	iomap->offset = lblock << inode->i_blkbits;
893	lblock_stop = (pos + length - 1) >> inode->i_blkbits;
894	len = lblock_stop - lblock + 1;
895	iomap->length = len << inode->i_blkbits;
896
897	height = ip->i_height;
898	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
899		height++;
900	find_metapath(sdp, lblock, mp, height);
901	if (height > ip->i_height || gfs2_is_stuffed(ip))
902		goto do_alloc;
903
904	ret = lookup_metapath(ip, mp);
905	if (ret)
906		goto unlock;
907
908	if (mp->mp_aheight != ip->i_height)
909		goto do_alloc;
910
911	ptr = metapointer(ip->i_height - 1, mp);
912	if (*ptr == 0)
913		goto do_alloc;
914
915	bh = mp->mp_bh[ip->i_height - 1];
916	len = gfs2_extent_length(bh, ptr, len, &eob);
917
918	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
919	iomap->length = len << inode->i_blkbits;
920	iomap->type = IOMAP_MAPPED;
921	iomap->flags |= IOMAP_F_MERGED;
922	if (eob)
923		iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
924
925out:
926	iomap->bdev = inode->i_sb->s_bdev;
927unlock:
928	up_read(&ip->i_rw_mutex);
929	return ret;
930
931do_alloc:
932	if (flags & IOMAP_REPORT) {
933		if (pos >= size)
934			ret = -ENOENT;
935		else if (height == ip->i_height)
936			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
937		else
938			iomap->length = size - iomap->offset;
939	} else if (flags & IOMAP_WRITE) {
940		u64 alloc_size;
941
942		if (flags & IOMAP_DIRECT)
943			goto out;  /* (see gfs2_file_direct_write) */
944
945		len = gfs2_alloc_size(inode, mp, len);
946		alloc_size = len << inode->i_blkbits;
947		if (alloc_size < iomap->length)
948			iomap->length = alloc_size;
949	} else {
950		if (pos < size && height == ip->i_height)
951			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
952	}
953hole_found:
954	iomap->addr = IOMAP_NULL_ADDR;
955	iomap->type = IOMAP_HOLE;
956	goto out;
957}
958
959static struct folio *
960gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
961{
962	struct inode *inode = iter->inode;
963	unsigned int blockmask = i_blocksize(inode) - 1;
964	struct gfs2_sbd *sdp = GFS2_SB(inode);
965	unsigned int blocks;
966	struct folio *folio;
967	int status;
968
969	blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
970	status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
971	if (status)
972		return ERR_PTR(status);
973
974	folio = iomap_get_folio(iter, pos, len);
975	if (IS_ERR(folio))
976		gfs2_trans_end(sdp);
977	return folio;
978}
979
980static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
981				 unsigned copied, struct folio *folio)
982{
983	struct gfs2_trans *tr = current->journal_info;
984	struct gfs2_inode *ip = GFS2_I(inode);
985	struct gfs2_sbd *sdp = GFS2_SB(inode);
986
987	if (!gfs2_is_stuffed(ip))
988		gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos),
989					copied);
990
991	folio_unlock(folio);
992	folio_put(folio);
993
994	if (tr->tr_num_buf_new)
995		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
996
997	gfs2_trans_end(sdp);
998}
999
1000static const struct iomap_folio_ops gfs2_iomap_folio_ops = {
1001	.get_folio = gfs2_iomap_get_folio,
1002	.put_folio = gfs2_iomap_put_folio,
1003};
1004
1005static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1006				  loff_t length, unsigned flags,
1007				  struct iomap *iomap,
1008				  struct metapath *mp)
1009{
1010	struct gfs2_inode *ip = GFS2_I(inode);
1011	struct gfs2_sbd *sdp = GFS2_SB(inode);
1012	bool unstuff;
1013	int ret;
1014
1015	unstuff = gfs2_is_stuffed(ip) &&
1016		  pos + length > gfs2_max_stuffed_size(ip);
1017
1018	if (unstuff || iomap->type == IOMAP_HOLE) {
1019		unsigned int data_blocks, ind_blocks;
1020		struct gfs2_alloc_parms ap = {};
1021		unsigned int rblocks;
1022		struct gfs2_trans *tr;
1023
1024		gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1025				       &ind_blocks);
1026		ap.target = data_blocks + ind_blocks;
1027		ret = gfs2_quota_lock_check(ip, &ap);
1028		if (ret)
1029			return ret;
1030
1031		ret = gfs2_inplace_reserve(ip, &ap);
1032		if (ret)
1033			goto out_qunlock;
1034
1035		rblocks = RES_DINODE + ind_blocks;
1036		if (gfs2_is_jdata(ip))
1037			rblocks += data_blocks;
1038		if (ind_blocks || data_blocks)
1039			rblocks += RES_STATFS + RES_QUOTA;
1040		if (inode == sdp->sd_rindex)
1041			rblocks += 2 * RES_STATFS;
1042		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1043
1044		ret = gfs2_trans_begin(sdp, rblocks,
1045				       iomap->length >> inode->i_blkbits);
1046		if (ret)
1047			goto out_trans_fail;
1048
1049		if (unstuff) {
1050			ret = gfs2_unstuff_dinode(ip);
1051			if (ret)
1052				goto out_trans_end;
1053			release_metapath(mp);
1054			ret = __gfs2_iomap_get(inode, iomap->offset,
1055					       iomap->length, flags, iomap, mp);
1056			if (ret)
1057				goto out_trans_end;
1058		}
1059
1060		if (iomap->type == IOMAP_HOLE) {
1061			ret = __gfs2_iomap_alloc(inode, iomap, mp);
1062			if (ret) {
1063				gfs2_trans_end(sdp);
1064				gfs2_inplace_release(ip);
1065				punch_hole(ip, iomap->offset, iomap->length);
1066				goto out_qunlock;
1067			}
1068		}
1069
1070		tr = current->journal_info;
1071		if (tr->tr_num_buf_new)
1072			__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1073
1074		gfs2_trans_end(sdp);
1075	}
1076
1077	if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
1078		iomap->folio_ops = &gfs2_iomap_folio_ops;
1079	return 0;
1080
1081out_trans_end:
1082	gfs2_trans_end(sdp);
1083out_trans_fail:
1084	gfs2_inplace_release(ip);
1085out_qunlock:
1086	gfs2_quota_unlock(ip);
1087	return ret;
1088}
1089
1090static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1091			    unsigned flags, struct iomap *iomap,
1092			    struct iomap *srcmap)
1093{
1094	struct gfs2_inode *ip = GFS2_I(inode);
1095	struct metapath mp = { .mp_aheight = 1, };
1096	int ret;
1097
1098	if (gfs2_is_jdata(ip))
1099		iomap->flags |= IOMAP_F_BUFFER_HEAD;
1100
1101	trace_gfs2_iomap_start(ip, pos, length, flags);
1102	ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1103	if (ret)
1104		goto out_unlock;
1105
1106	switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) {
1107	case IOMAP_WRITE:
1108		if (flags & IOMAP_DIRECT) {
1109			/*
1110			 * Silently fall back to buffered I/O for stuffed files
1111			 * or if we've got a hole (see gfs2_file_direct_write).
1112			 */
1113			if (iomap->type != IOMAP_MAPPED)
1114				ret = -ENOTBLK;
1115			goto out_unlock;
1116		}
1117		break;
1118	case IOMAP_ZERO:
1119		if (iomap->type == IOMAP_HOLE)
1120			goto out_unlock;
1121		break;
1122	default:
1123		goto out_unlock;
1124	}
1125
1126	ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1127
1128out_unlock:
1129	release_metapath(&mp);
1130	trace_gfs2_iomap_end(ip, iomap, ret);
1131	return ret;
1132}
1133
1134static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1135			  ssize_t written, unsigned flags, struct iomap *iomap)
1136{
1137	struct gfs2_inode *ip = GFS2_I(inode);
1138	struct gfs2_sbd *sdp = GFS2_SB(inode);
1139
1140	switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
1141	case IOMAP_WRITE:
1142		if (flags & IOMAP_DIRECT)
1143			return 0;
1144		break;
1145	case IOMAP_ZERO:
1146		 if (iomap->type == IOMAP_HOLE)
1147			 return 0;
1148		 break;
1149	default:
1150		 return 0;
1151	}
1152
1153	if (!gfs2_is_stuffed(ip))
1154		gfs2_ordered_add_inode(ip);
1155
1156	if (inode == sdp->sd_rindex)
1157		adjust_fs_space(inode);
1158
1159	gfs2_inplace_release(ip);
1160
1161	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1162		gfs2_quota_unlock(ip);
1163
1164	if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1165		/* Deallocate blocks that were just allocated. */
1166		loff_t hstart = round_up(pos + written, i_blocksize(inode));
1167		loff_t hend = iomap->offset + iomap->length;
1168
1169		if (hstart < hend) {
1170			truncate_pagecache_range(inode, hstart, hend - 1);
1171			punch_hole(ip, hstart, hend - hstart);
1172		}
1173	}
1174
1175	if (unlikely(!written))
1176		return 0;
1177
1178	if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1179		mark_inode_dirty(inode);
1180	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
1181	return 0;
1182}
1183
1184const struct iomap_ops gfs2_iomap_ops = {
1185	.iomap_begin = gfs2_iomap_begin,
1186	.iomap_end = gfs2_iomap_end,
1187};
1188
1189/**
1190 * gfs2_block_map - Map one or more blocks of an inode to a disk block
1191 * @inode: The inode
1192 * @lblock: The logical block number
1193 * @bh_map: The bh to be mapped
1194 * @create: True if its ok to alloc blocks to satify the request
1195 *
1196 * The size of the requested mapping is defined in bh_map->b_size.
1197 *
1198 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1199 * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
1200 * bh_map->b_size to indicate the size of the mapping when @lblock and
1201 * successive blocks are mapped, up to the requested size.
1202 *
1203 * Sets buffer_boundary() if a read of metadata will be required
1204 * before the next block can be mapped. Sets buffer_new() if new
1205 * blocks were allocated.
1206 *
1207 * Returns: errno
1208 */
1209
1210int gfs2_block_map(struct inode *inode, sector_t lblock,
1211		   struct buffer_head *bh_map, int create)
1212{
1213	struct gfs2_inode *ip = GFS2_I(inode);
1214	loff_t pos = (loff_t)lblock << inode->i_blkbits;
1215	loff_t length = bh_map->b_size;
1216	struct iomap iomap = { };
1217	int ret;
1218
1219	clear_buffer_mapped(bh_map);
1220	clear_buffer_new(bh_map);
1221	clear_buffer_boundary(bh_map);
1222	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1223
1224	if (!create)
1225		ret = gfs2_iomap_get(inode, pos, length, &iomap);
1226	else
1227		ret = gfs2_iomap_alloc(inode, pos, length, &iomap);
1228	if (ret)
1229		goto out;
1230
1231	if (iomap.length > bh_map->b_size) {
1232		iomap.length = bh_map->b_size;
1233		iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
1234	}
1235	if (iomap.addr != IOMAP_NULL_ADDR)
1236		map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1237	bh_map->b_size = iomap.length;
1238	if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
1239		set_buffer_boundary(bh_map);
1240	if (iomap.flags & IOMAP_F_NEW)
1241		set_buffer_new(bh_map);
1242
1243out:
1244	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1245	return ret;
1246}
1247
1248int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
1249		    unsigned int *extlen)
1250{
1251	unsigned int blkbits = inode->i_blkbits;
1252	struct iomap iomap = { };
1253	unsigned int len;
1254	int ret;
1255
1256	ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits,
1257			     &iomap);
1258	if (ret)
1259		return ret;
1260	if (iomap.type != IOMAP_MAPPED)
1261		return -EIO;
1262	*dblock = iomap.addr >> blkbits;
1263	len = iomap.length >> blkbits;
1264	if (len < *extlen)
1265		*extlen = len;
1266	return 0;
1267}
1268
1269int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
1270		      unsigned int *extlen, bool *new)
1271{
1272	unsigned int blkbits = inode->i_blkbits;
1273	struct iomap iomap = { };
1274	unsigned int len;
1275	int ret;
1276
1277	ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits,
1278			       &iomap);
1279	if (ret)
1280		return ret;
1281	if (iomap.type != IOMAP_MAPPED)
1282		return -EIO;
1283	*dblock = iomap.addr >> blkbits;
1284	len = iomap.length >> blkbits;
1285	if (len < *extlen)
1286		*extlen = len;
1287	*new = iomap.flags & IOMAP_F_NEW;
1288	return 0;
1289}
1290
1291/*
1292 * NOTE: Never call gfs2_block_zero_range with an open transaction because it
1293 * uses iomap write to perform its actions, which begin their own transactions
1294 * (iomap_begin, get_folio, etc.)
1295 */
1296static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1297				 unsigned int length)
1298{
1299	BUG_ON(current->journal_info);
1300	return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
1301}
1302
1303#define GFS2_JTRUNC_REVOKES 8192
1304
1305/**
1306 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1307 * @inode: The inode being truncated
1308 * @oldsize: The original (larger) size
1309 * @newsize: The new smaller size
1310 *
1311 * With jdata files, we have to journal a revoke for each block which is
1312 * truncated. As a result, we need to split this into separate transactions
1313 * if the number of pages being truncated gets too large.
1314 */
1315
1316static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1317{
1318	struct gfs2_sbd *sdp = GFS2_SB(inode);
1319	u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1320	u64 chunk;
1321	int error;
1322
1323	while (oldsize != newsize) {
1324		struct gfs2_trans *tr;
1325		unsigned int offs;
1326
1327		chunk = oldsize - newsize;
1328		if (chunk > max_chunk)
1329			chunk = max_chunk;
1330
1331		offs = oldsize & ~PAGE_MASK;
1332		if (offs && chunk > PAGE_SIZE)
1333			chunk = offs + ((chunk - offs) & PAGE_MASK);
1334
1335		truncate_pagecache(inode, oldsize - chunk);
1336		oldsize -= chunk;
1337
1338		tr = current->journal_info;
1339		if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1340			continue;
1341
1342		gfs2_trans_end(sdp);
1343		error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1344		if (error)
1345			return error;
1346	}
1347
1348	return 0;
1349}
1350
1351static int trunc_start(struct inode *inode, u64 newsize)
1352{
1353	struct gfs2_inode *ip = GFS2_I(inode);
1354	struct gfs2_sbd *sdp = GFS2_SB(inode);
1355	struct buffer_head *dibh = NULL;
1356	int journaled = gfs2_is_jdata(ip);
1357	u64 oldsize = inode->i_size;
1358	int error;
1359
1360	if (!gfs2_is_stuffed(ip)) {
1361		unsigned int blocksize = i_blocksize(inode);
1362		unsigned int offs = newsize & (blocksize - 1);
1363		if (offs) {
1364			error = gfs2_block_zero_range(inode, newsize,
1365						      blocksize - offs);
1366			if (error)
1367				return error;
1368		}
1369	}
1370	if (journaled)
1371		error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1372	else
1373		error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1374	if (error)
1375		return error;
1376
1377	error = gfs2_meta_inode_buffer(ip, &dibh);
1378	if (error)
1379		goto out;
1380
1381	gfs2_trans_add_meta(ip->i_gl, dibh);
1382
1383	if (gfs2_is_stuffed(ip))
1384		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1385	else
1386		ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1387
1388	i_size_write(inode, newsize);
1389	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
1390	gfs2_dinode_out(ip, dibh->b_data);
1391
1392	if (journaled)
1393		error = gfs2_journaled_truncate(inode, oldsize, newsize);
1394	else
1395		truncate_pagecache(inode, newsize);
1396
1397out:
1398	brelse(dibh);
1399	if (current->journal_info)
1400		gfs2_trans_end(sdp);
1401	return error;
1402}
1403
1404int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
1405		   struct iomap *iomap)
1406{
1407	struct metapath mp = { .mp_aheight = 1, };
1408	int ret;
1409
1410	ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp);
1411	release_metapath(&mp);
1412	return ret;
1413}
1414
1415int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
1416		     struct iomap *iomap)
1417{
1418	struct metapath mp = { .mp_aheight = 1, };
1419	int ret;
1420
1421	ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1422	if (!ret && iomap->type == IOMAP_HOLE)
1423		ret = __gfs2_iomap_alloc(inode, iomap, &mp);
1424	release_metapath(&mp);
1425	return ret;
1426}
1427
1428/**
1429 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1430 * @ip: inode
1431 * @rd_gh: holder of resource group glock
1432 * @bh: buffer head to sweep
1433 * @start: starting point in bh
1434 * @end: end point in bh
1435 * @meta: true if bh points to metadata (rather than data)
1436 * @btotal: place to keep count of total blocks freed
1437 *
1438 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1439 * free, and free them all. However, we do it one rgrp at a time. If this
1440 * block has references to multiple rgrps, we break it into individual
1441 * transactions. This allows other processes to use the rgrps while we're
1442 * focused on a single one, for better concurrency / performance.
1443 * At every transaction boundary, we rewrite the inode into the journal.
1444 * That way the bitmaps are kept consistent with the inode and we can recover
1445 * if we're interrupted by power-outages.
1446 *
1447 * Returns: 0, or return code if an error occurred.
1448 *          *btotal has the total number of blocks freed
1449 */
1450static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1451			      struct buffer_head *bh, __be64 *start, __be64 *end,
1452			      bool meta, u32 *btotal)
1453{
1454	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1455	struct gfs2_rgrpd *rgd;
1456	struct gfs2_trans *tr;
1457	__be64 *p;
1458	int blks_outside_rgrp;
1459	u64 bn, bstart, isize_blks;
1460	s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1461	int ret = 0;
1462	bool buf_in_tr = false; /* buffer was added to transaction */
1463
1464more_rgrps:
1465	rgd = NULL;
1466	if (gfs2_holder_initialized(rd_gh)) {
1467		rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1468		gfs2_assert_withdraw(sdp,
1469			     gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1470	}
1471	blks_outside_rgrp = 0;
1472	bstart = 0;
1473	blen = 0;
1474
1475	for (p = start; p < end; p++) {
1476		if (!*p)
1477			continue;
1478		bn = be64_to_cpu(*p);
1479
1480		if (rgd) {
1481			if (!rgrp_contains_block(rgd, bn)) {
1482				blks_outside_rgrp++;
1483				continue;
1484			}
1485		} else {
1486			rgd = gfs2_blk2rgrpd(sdp, bn, true);
1487			if (unlikely(!rgd)) {
1488				ret = -EIO;
1489				goto out;
1490			}
1491			ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1492						 LM_FLAG_NODE_SCOPE, rd_gh);
1493			if (ret)
1494				goto out;
1495
1496			/* Must be done with the rgrp glock held: */
1497			if (gfs2_rs_active(&ip->i_res) &&
1498			    rgd == ip->i_res.rs_rgd)
1499				gfs2_rs_deltree(&ip->i_res);
1500		}
1501
1502		/* The size of our transactions will be unknown until we
1503		   actually process all the metadata blocks that relate to
1504		   the rgrp. So we estimate. We know it can't be more than
1505		   the dinode's i_blocks and we don't want to exceed the
1506		   journal flush threshold, sd_log_thresh2. */
1507		if (current->journal_info == NULL) {
1508			unsigned int jblocks_rqsted, revokes;
1509
1510			jblocks_rqsted = rgd->rd_length + RES_DINODE +
1511				RES_INDIRECT;
1512			isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1513			if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1514				jblocks_rqsted +=
1515					atomic_read(&sdp->sd_log_thresh2);
1516			else
1517				jblocks_rqsted += isize_blks;
1518			revokes = jblocks_rqsted;
1519			if (meta)
1520				revokes += end - start;
1521			else if (ip->i_depth)
1522				revokes += sdp->sd_inptrs;
1523			ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1524			if (ret)
1525				goto out_unlock;
1526			down_write(&ip->i_rw_mutex);
1527		}
1528		/* check if we will exceed the transaction blocks requested */
1529		tr = current->journal_info;
1530		if (tr->tr_num_buf_new + RES_STATFS +
1531		    RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1532			/* We set blks_outside_rgrp to ensure the loop will
1533			   be repeated for the same rgrp, but with a new
1534			   transaction. */
1535			blks_outside_rgrp++;
1536			/* This next part is tricky. If the buffer was added
1537			   to the transaction, we've already set some block
1538			   pointers to 0, so we better follow through and free
1539			   them, or we will introduce corruption (so break).
1540			   This may be impossible, or at least rare, but I
1541			   decided to cover the case regardless.
1542
1543			   If the buffer was not added to the transaction
1544			   (this call), doing so would exceed our transaction
1545			   size, so we need to end the transaction and start a
1546			   new one (so goto). */
1547
1548			if (buf_in_tr)
1549				break;
1550			goto out_unlock;
1551		}
1552
1553		gfs2_trans_add_meta(ip->i_gl, bh);
1554		buf_in_tr = true;
1555		*p = 0;
1556		if (bstart + blen == bn) {
1557			blen++;
1558			continue;
1559		}
1560		if (bstart) {
1561			__gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
1562			(*btotal) += blen;
1563			gfs2_add_inode_blocks(&ip->i_inode, -blen);
1564		}
1565		bstart = bn;
1566		blen = 1;
1567	}
1568	if (bstart) {
1569		__gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
1570		(*btotal) += blen;
1571		gfs2_add_inode_blocks(&ip->i_inode, -blen);
1572	}
1573out_unlock:
1574	if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1575					    outside the rgrp we just processed,
1576					    do it all over again. */
1577		if (current->journal_info) {
1578			struct buffer_head *dibh;
1579
1580			ret = gfs2_meta_inode_buffer(ip, &dibh);
1581			if (ret)
1582				goto out;
1583
1584			/* Every transaction boundary, we rewrite the dinode
1585			   to keep its di_blocks current in case of failure. */
1586			ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
1587			gfs2_trans_add_meta(ip->i_gl, dibh);
1588			gfs2_dinode_out(ip, dibh->b_data);
1589			brelse(dibh);
1590			up_write(&ip->i_rw_mutex);
1591			gfs2_trans_end(sdp);
1592			buf_in_tr = false;
1593		}
1594		gfs2_glock_dq_uninit(rd_gh);
1595		cond_resched();
1596		goto more_rgrps;
1597	}
1598out:
1599	return ret;
1600}
1601
1602static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1603{
1604	if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1605		return false;
1606	return true;
1607}
1608
1609/**
1610 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1611 * @sdp: The superblock
1612 * @mp: starting metapath
1613 * @h: desired height to search
1614 * @end_list: See punch_hole().
1615 * @end_aligned: See punch_hole().
1616 *
1617 * Assumes the metapath is valid (with buffers) out to height h.
1618 * Returns: true if a non-null pointer was found in the metapath buffer
1619 *          false if all remaining pointers are NULL in the buffer
1620 */
1621static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1622			     unsigned int h,
1623			     __u16 *end_list, unsigned int end_aligned)
1624{
1625	struct buffer_head *bh = mp->mp_bh[h];
1626	__be64 *first, *ptr, *end;
1627
1628	first = metaptr1(h, mp);
1629	ptr = first + mp->mp_list[h];
1630	end = (__be64 *)(bh->b_data + bh->b_size);
1631	if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1632		bool keep_end = h < end_aligned;
1633		end = first + end_list[h] + keep_end;
1634	}
1635
1636	while (ptr < end) {
1637		if (*ptr) { /* if we have a non-null pointer */
1638			mp->mp_list[h] = ptr - first;
1639			h++;
1640			if (h < GFS2_MAX_META_HEIGHT)
1641				mp->mp_list[h] = 0;
1642			return true;
1643		}
1644		ptr++;
1645	}
1646	return false;
1647}
1648
1649enum dealloc_states {
1650	DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1651	DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1652	DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1653	DEALLOC_DONE = 3,       /* process complete */
1654};
1655
1656static inline void
1657metapointer_range(struct metapath *mp, int height,
1658		  __u16 *start_list, unsigned int start_aligned,
1659		  __u16 *end_list, unsigned int end_aligned,
1660		  __be64 **start, __be64 **end)
1661{
1662	struct buffer_head *bh = mp->mp_bh[height];
1663	__be64 *first;
1664
1665	first = metaptr1(height, mp);
1666	*start = first;
1667	if (mp_eq_to_hgt(mp, start_list, height)) {
1668		bool keep_start = height < start_aligned;
1669		*start = first + start_list[height] + keep_start;
1670	}
1671	*end = (__be64 *)(bh->b_data + bh->b_size);
1672	if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1673		bool keep_end = height < end_aligned;
1674		*end = first + end_list[height] + keep_end;
1675	}
1676}
1677
1678static inline bool walk_done(struct gfs2_sbd *sdp,
1679			     struct metapath *mp, int height,
1680			     __u16 *end_list, unsigned int end_aligned)
1681{
1682	__u16 end;
1683
1684	if (end_list) {
1685		bool keep_end = height < end_aligned;
1686		if (!mp_eq_to_hgt(mp, end_list, height))
1687			return false;
1688		end = end_list[height] + keep_end;
1689	} else
1690		end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1691	return mp->mp_list[height] >= end;
1692}
1693
1694/**
1695 * punch_hole - deallocate blocks in a file
1696 * @ip: inode to truncate
1697 * @offset: the start of the hole
1698 * @length: the size of the hole (or 0 for truncate)
1699 *
1700 * Punch a hole into a file or truncate a file at a given position.  This
1701 * function operates in whole blocks (@offset and @length are rounded
1702 * accordingly); partially filled blocks must be cleared otherwise.
1703 *
1704 * This function works from the bottom up, and from the right to the left. In
1705 * other words, it strips off the highest layer (data) before stripping any of
1706 * the metadata. Doing it this way is best in case the operation is interrupted
1707 * by power failure, etc.  The dinode is rewritten in every transaction to
1708 * guarantee integrity.
1709 */
1710static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1711{
1712	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1713	u64 maxsize = sdp->sd_heightsize[ip->i_height];
1714	struct metapath mp = {};
1715	struct buffer_head *dibh, *bh;
1716	struct gfs2_holder rd_gh;
1717	unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1718	u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1719	__u16 start_list[GFS2_MAX_META_HEIGHT];
1720	__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1721	unsigned int start_aligned, end_aligned;
1722	unsigned int strip_h = ip->i_height - 1;
1723	u32 btotal = 0;
1724	int ret, state;
1725	int mp_h; /* metapath buffers are read in to this height */
1726	u64 prev_bnr = 0;
1727	__be64 *start, *end;
1728
1729	if (offset >= maxsize) {
1730		/*
1731		 * The starting point lies beyond the allocated metadata;
1732		 * there are no blocks to deallocate.
1733		 */
1734		return 0;
1735	}
1736
1737	/*
1738	 * The start position of the hole is defined by lblock, start_list, and
1739	 * start_aligned.  The end position of the hole is defined by lend,
1740	 * end_list, and end_aligned.
1741	 *
1742	 * start_aligned and end_aligned define down to which height the start
1743	 * and end positions are aligned to the metadata tree (i.e., the
1744	 * position is a multiple of the metadata granularity at the height
1745	 * above).  This determines at which heights additional meta pointers
1746	 * needs to be preserved for the remaining data.
1747	 */
1748
1749	if (length) {
1750		u64 end_offset = offset + length;
1751		u64 lend;
1752
1753		/*
1754		 * Clip the end at the maximum file size for the given height:
1755		 * that's how far the metadata goes; files bigger than that
1756		 * will have additional layers of indirection.
1757		 */
1758		if (end_offset > maxsize)
1759			end_offset = maxsize;
1760		lend = end_offset >> bsize_shift;
1761
1762		if (lblock >= lend)
1763			return 0;
1764
1765		find_metapath(sdp, lend, &mp, ip->i_height);
1766		end_list = __end_list;
1767		memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1768
1769		for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1770			if (end_list[mp_h])
1771				break;
1772		}
1773		end_aligned = mp_h;
1774	}
1775
1776	find_metapath(sdp, lblock, &mp, ip->i_height);
1777	memcpy(start_list, mp.mp_list, sizeof(start_list));
1778
1779	for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1780		if (start_list[mp_h])
1781			break;
1782	}
1783	start_aligned = mp_h;
1784
1785	ret = gfs2_meta_inode_buffer(ip, &dibh);
1786	if (ret)
1787		return ret;
1788
1789	mp.mp_bh[0] = dibh;
1790	ret = lookup_metapath(ip, &mp);
1791	if (ret)
1792		goto out_metapath;
1793
1794	/* issue read-ahead on metadata */
1795	for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1796		metapointer_range(&mp, mp_h, start_list, start_aligned,
1797				  end_list, end_aligned, &start, &end);
1798		gfs2_metapath_ra(ip->i_gl, start, end);
1799	}
1800
1801	if (mp.mp_aheight == ip->i_height)
1802		state = DEALLOC_MP_FULL; /* We have a complete metapath */
1803	else
1804		state = DEALLOC_FILL_MP; /* deal with partial metapath */
1805
1806	ret = gfs2_rindex_update(sdp);
1807	if (ret)
1808		goto out_metapath;
1809
1810	ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1811	if (ret)
1812		goto out_metapath;
1813	gfs2_holder_mark_uninitialized(&rd_gh);
1814
1815	mp_h = strip_h;
1816
1817	while (state != DEALLOC_DONE) {
1818		switch (state) {
1819		/* Truncate a full metapath at the given strip height.
1820		 * Note that strip_h == mp_h in order to be in this state. */
1821		case DEALLOC_MP_FULL:
1822			bh = mp.mp_bh[mp_h];
1823			gfs2_assert_withdraw(sdp, bh);
1824			if (gfs2_assert_withdraw(sdp,
1825						 prev_bnr != bh->b_blocknr)) {
1826				fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1827					 "s_h:%u, mp_h:%u\n",
1828				       (unsigned long long)ip->i_no_addr,
1829				       prev_bnr, ip->i_height, strip_h, mp_h);
1830			}
1831			prev_bnr = bh->b_blocknr;
1832
1833			if (gfs2_metatype_check(sdp, bh,
1834						(mp_h ? GFS2_METATYPE_IN :
1835							GFS2_METATYPE_DI))) {
1836				ret = -EIO;
1837				goto out;
1838			}
1839
1840			/*
1841			 * Below, passing end_aligned as 0 gives us the
1842			 * metapointer range excluding the end point: the end
1843			 * point is the first metapath we must not deallocate!
1844			 */
1845
1846			metapointer_range(&mp, mp_h, start_list, start_aligned,
1847					  end_list, 0 /* end_aligned */,
1848					  &start, &end);
1849			ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1850						 start, end,
1851						 mp_h != ip->i_height - 1,
1852						 &btotal);
1853
1854			/* If we hit an error or just swept dinode buffer,
1855			   just exit. */
1856			if (ret || !mp_h) {
1857				state = DEALLOC_DONE;
1858				break;
1859			}
1860			state = DEALLOC_MP_LOWER;
1861			break;
1862
1863		/* lower the metapath strip height */
1864		case DEALLOC_MP_LOWER:
1865			/* We're done with the current buffer, so release it,
1866			   unless it's the dinode buffer. Then back up to the
1867			   previous pointer. */
1868			if (mp_h) {
1869				brelse(mp.mp_bh[mp_h]);
1870				mp.mp_bh[mp_h] = NULL;
1871			}
1872			/* If we can't get any lower in height, we've stripped
1873			   off all we can. Next step is to back up and start
1874			   stripping the previous level of metadata. */
1875			if (mp_h == 0) {
1876				strip_h--;
1877				memcpy(mp.mp_list, start_list, sizeof(start_list));
1878				mp_h = strip_h;
1879				state = DEALLOC_FILL_MP;
1880				break;
1881			}
1882			mp.mp_list[mp_h] = 0;
1883			mp_h--; /* search one metadata height down */
1884			mp.mp_list[mp_h]++;
1885			if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1886				break;
1887			/* Here we've found a part of the metapath that is not
1888			 * allocated. We need to search at that height for the
1889			 * next non-null pointer. */
1890			if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1891				state = DEALLOC_FILL_MP;
1892				mp_h++;
1893			}
1894			/* No more non-null pointers at this height. Back up
1895			   to the previous height and try again. */
1896			break; /* loop around in the same state */
1897
1898		/* Fill the metapath with buffers to the given height. */
1899		case DEALLOC_FILL_MP:
1900			/* Fill the buffers out to the current height. */
1901			ret = fillup_metapath(ip, &mp, mp_h);
1902			if (ret < 0)
1903				goto out;
1904
1905			/* On the first pass, issue read-ahead on metadata. */
1906			if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1907				unsigned int height = mp.mp_aheight - 1;
1908
1909				/* No read-ahead for data blocks. */
1910				if (mp.mp_aheight - 1 == strip_h)
1911					height--;
1912
1913				for (; height >= mp.mp_aheight - ret; height--) {
1914					metapointer_range(&mp, height,
1915							  start_list, start_aligned,
1916							  end_list, end_aligned,
1917							  &start, &end);
1918					gfs2_metapath_ra(ip->i_gl, start, end);
1919				}
1920			}
1921
1922			/* If buffers found for the entire strip height */
1923			if (mp.mp_aheight - 1 == strip_h) {
1924				state = DEALLOC_MP_FULL;
1925				break;
1926			}
1927			if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1928				mp_h = mp.mp_aheight - 1;
1929
1930			/* If we find a non-null block pointer, crawl a bit
1931			   higher up in the metapath and try again, otherwise
1932			   we need to look lower for a new starting point. */
1933			if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1934				mp_h++;
1935			else
1936				state = DEALLOC_MP_LOWER;
1937			break;
1938		}
1939	}
1940
1941	if (btotal) {
1942		if (current->journal_info == NULL) {
1943			ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1944					       RES_QUOTA, 0);
1945			if (ret)
1946				goto out;
1947			down_write(&ip->i_rw_mutex);
1948		}
1949		gfs2_statfs_change(sdp, 0, +btotal, 0);
1950		gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1951				  ip->i_inode.i_gid);
1952		ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
1953		gfs2_trans_add_meta(ip->i_gl, dibh);
1954		gfs2_dinode_out(ip, dibh->b_data);
1955		up_write(&ip->i_rw_mutex);
1956		gfs2_trans_end(sdp);
1957	}
1958
1959out:
1960	if (gfs2_holder_initialized(&rd_gh))
1961		gfs2_glock_dq_uninit(&rd_gh);
1962	if (current->journal_info) {
1963		up_write(&ip->i_rw_mutex);
1964		gfs2_trans_end(sdp);
1965		cond_resched();
1966	}
1967	gfs2_quota_unhold(ip);
1968out_metapath:
1969	release_metapath(&mp);
1970	return ret;
1971}
1972
1973static int trunc_end(struct gfs2_inode *ip)
1974{
1975	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1976	struct buffer_head *dibh;
1977	int error;
1978
1979	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1980	if (error)
1981		return error;
1982
1983	down_write(&ip->i_rw_mutex);
1984
1985	error = gfs2_meta_inode_buffer(ip, &dibh);
1986	if (error)
1987		goto out;
1988
1989	if (!i_size_read(&ip->i_inode)) {
1990		ip->i_height = 0;
1991		ip->i_goal = ip->i_no_addr;
1992		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1993		gfs2_ordered_del_inode(ip);
1994	}
1995	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
1996	ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1997
1998	gfs2_trans_add_meta(ip->i_gl, dibh);
1999	gfs2_dinode_out(ip, dibh->b_data);
2000	brelse(dibh);
2001
2002out:
2003	up_write(&ip->i_rw_mutex);
2004	gfs2_trans_end(sdp);
2005	return error;
2006}
2007
2008/**
2009 * do_shrink - make a file smaller
2010 * @inode: the inode
2011 * @newsize: the size to make the file
2012 *
2013 * Called with an exclusive lock on @inode. The @size must
2014 * be equal to or smaller than the current inode size.
2015 *
2016 * Returns: errno
2017 */
2018
2019static int do_shrink(struct inode *inode, u64 newsize)
2020{
2021	struct gfs2_inode *ip = GFS2_I(inode);
2022	int error;
2023
2024	error = trunc_start(inode, newsize);
2025	if (error < 0)
2026		return error;
2027	if (gfs2_is_stuffed(ip))
2028		return 0;
2029
2030	error = punch_hole(ip, newsize, 0);
2031	if (error == 0)
2032		error = trunc_end(ip);
2033
2034	return error;
2035}
2036
2037/**
2038 * do_grow - Touch and update inode size
2039 * @inode: The inode
2040 * @size: The new size
2041 *
2042 * This function updates the timestamps on the inode and
2043 * may also increase the size of the inode. This function
2044 * must not be called with @size any smaller than the current
2045 * inode size.
2046 *
2047 * Although it is not strictly required to unstuff files here,
2048 * earlier versions of GFS2 have a bug in the stuffed file reading
2049 * code which will result in a buffer overrun if the size is larger
2050 * than the max stuffed file size. In order to prevent this from
2051 * occurring, such files are unstuffed, but in other cases we can
2052 * just update the inode size directly.
2053 *
2054 * Returns: 0 on success, or -ve on error
2055 */
2056
2057static int do_grow(struct inode *inode, u64 size)
2058{
2059	struct gfs2_inode *ip = GFS2_I(inode);
2060	struct gfs2_sbd *sdp = GFS2_SB(inode);
2061	struct gfs2_alloc_parms ap = { .target = 1, };
2062	struct buffer_head *dibh;
2063	int error;
2064	int unstuff = 0;
2065
2066	if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
2067		error = gfs2_quota_lock_check(ip, &ap);
2068		if (error)
2069			return error;
2070
2071		error = gfs2_inplace_reserve(ip, &ap);
2072		if (error)
2073			goto do_grow_qunlock;
2074		unstuff = 1;
2075	}
2076
2077	error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
2078				 (unstuff &&
2079				  gfs2_is_jdata(ip) ? RES_JDATA : 0) +
2080				 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2081				  0 : RES_QUOTA), 0);
2082	if (error)
2083		goto do_grow_release;
2084
2085	if (unstuff) {
2086		error = gfs2_unstuff_dinode(ip);
2087		if (error)
2088			goto do_end_trans;
2089	}
2090
2091	error = gfs2_meta_inode_buffer(ip, &dibh);
2092	if (error)
2093		goto do_end_trans;
2094
2095	truncate_setsize(inode, size);
2096	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
2097	gfs2_trans_add_meta(ip->i_gl, dibh);
2098	gfs2_dinode_out(ip, dibh->b_data);
2099	brelse(dibh);
2100
2101do_end_trans:
2102	gfs2_trans_end(sdp);
2103do_grow_release:
2104	if (unstuff) {
2105		gfs2_inplace_release(ip);
2106do_grow_qunlock:
2107		gfs2_quota_unlock(ip);
2108	}
2109	return error;
2110}
2111
2112/**
2113 * gfs2_setattr_size - make a file a given size
2114 * @inode: the inode
2115 * @newsize: the size to make the file
2116 *
2117 * The file size can grow, shrink, or stay the same size. This
2118 * is called holding i_rwsem and an exclusive glock on the inode
2119 * in question.
2120 *
2121 * Returns: errno
2122 */
2123
2124int gfs2_setattr_size(struct inode *inode, u64 newsize)
2125{
2126	struct gfs2_inode *ip = GFS2_I(inode);
2127	int ret;
2128
2129	BUG_ON(!S_ISREG(inode->i_mode));
2130
2131	ret = inode_newsize_ok(inode, newsize);
2132	if (ret)
2133		return ret;
2134
2135	inode_dio_wait(inode);
2136
2137	ret = gfs2_qa_get(ip);
2138	if (ret)
2139		goto out;
2140
2141	if (newsize >= inode->i_size) {
2142		ret = do_grow(inode, newsize);
2143		goto out;
2144	}
2145
2146	ret = do_shrink(inode, newsize);
2147out:
2148	gfs2_rs_delete(ip);
2149	gfs2_qa_put(ip);
2150	return ret;
2151}
2152
2153int gfs2_truncatei_resume(struct gfs2_inode *ip)
2154{
2155	int error;
2156	error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
2157	if (!error)
2158		error = trunc_end(ip);
2159	return error;
2160}
2161
2162int gfs2_file_dealloc(struct gfs2_inode *ip)
2163{
2164	return punch_hole(ip, 0, 0);
2165}
2166
2167/**
2168 * gfs2_free_journal_extents - Free cached journal bmap info
2169 * @jd: The journal
2170 *
2171 */
2172
2173void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2174{
2175	struct gfs2_journal_extent *jext;
2176
2177	while(!list_empty(&jd->extent_list)) {
2178		jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
2179		list_del(&jext->list);
2180		kfree(jext);
2181	}
2182}
2183
2184/**
2185 * gfs2_add_jextent - Add or merge a new extent to extent cache
2186 * @jd: The journal descriptor
2187 * @lblock: The logical block at start of new extent
2188 * @dblock: The physical block at start of new extent
2189 * @blocks: Size of extent in fs blocks
2190 *
2191 * Returns: 0 on success or -ENOMEM
2192 */
2193
2194static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2195{
2196	struct gfs2_journal_extent *jext;
2197
2198	if (!list_empty(&jd->extent_list)) {
2199		jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
2200		if ((jext->dblock + jext->blocks) == dblock) {
2201			jext->blocks += blocks;
2202			return 0;
2203		}
2204	}
2205
2206	jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2207	if (jext == NULL)
2208		return -ENOMEM;
2209	jext->dblock = dblock;
2210	jext->lblock = lblock;
2211	jext->blocks = blocks;
2212	list_add_tail(&jext->list, &jd->extent_list);
2213	jd->nr_extents++;
2214	return 0;
2215}
2216
2217/**
2218 * gfs2_map_journal_extents - Cache journal bmap info
2219 * @sdp: The super block
2220 * @jd: The journal to map
2221 *
2222 * Create a reusable "extent" mapping from all logical
2223 * blocks to all physical blocks for the given journal.  This will save
2224 * us time when writing journal blocks.  Most journals will have only one
2225 * extent that maps all their logical blocks.  That's because gfs2.mkfs
2226 * arranges the journal blocks sequentially to maximize performance.
2227 * So the extent would map the first block for the entire file length.
2228 * However, gfs2_jadd can happen while file activity is happening, so
2229 * those journals may not be sequential.  Less likely is the case where
2230 * the users created their own journals by mounting the metafs and
2231 * laying it out.  But it's still possible.  These journals might have
2232 * several extents.
2233 *
2234 * Returns: 0 on success, or error on failure
2235 */
2236
2237int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2238{
2239	u64 lblock = 0;
2240	u64 lblock_stop;
2241	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2242	struct buffer_head bh;
2243	unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2244	u64 size;
2245	int rc;
2246	ktime_t start, end;
2247
2248	start = ktime_get();
2249	lblock_stop = i_size_read(jd->jd_inode) >> shift;
2250	size = (lblock_stop - lblock) << shift;
2251	jd->nr_extents = 0;
2252	WARN_ON(!list_empty(&jd->extent_list));
2253
2254	do {
2255		bh.b_state = 0;
2256		bh.b_blocknr = 0;
2257		bh.b_size = size;
2258		rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2259		if (rc || !buffer_mapped(&bh))
2260			goto fail;
2261		rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2262		if (rc)
2263			goto fail;
2264		size -= bh.b_size;
2265		lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2266	} while(size > 0);
2267
2268	end = ktime_get();
2269	fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2270		jd->nr_extents, ktime_ms_delta(end, start));
2271	return 0;
2272
2273fail:
2274	fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2275		rc, jd->jd_jid,
2276		(unsigned long long)(i_size_read(jd->jd_inode) - size),
2277		jd->nr_extents);
2278	fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2279		rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2280		bh.b_state, (unsigned long long)bh.b_size);
2281	gfs2_free_journal_extents(jd);
2282	return rc;
2283}
2284
2285/**
2286 * gfs2_write_alloc_required - figure out if a write will require an allocation
2287 * @ip: the file being written to
2288 * @offset: the offset to write to
2289 * @len: the number of bytes being written
2290 *
2291 * Returns: 1 if an alloc is required, 0 otherwise
2292 */
2293
2294int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2295			      unsigned int len)
2296{
2297	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2298	struct buffer_head bh;
2299	unsigned int shift;
2300	u64 lblock, lblock_stop, size;
2301	u64 end_of_file;
2302
2303	if (!len)
2304		return 0;
2305
2306	if (gfs2_is_stuffed(ip)) {
2307		if (offset + len > gfs2_max_stuffed_size(ip))
2308			return 1;
2309		return 0;
2310	}
2311
2312	shift = sdp->sd_sb.sb_bsize_shift;
2313	BUG_ON(gfs2_is_dir(ip));
2314	end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2315	lblock = offset >> shift;
2316	lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2317	if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
2318		return 1;
2319
2320	size = (lblock_stop - lblock) << shift;
2321	do {
2322		bh.b_state = 0;
2323		bh.b_size = size;
2324		gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2325		if (!buffer_mapped(&bh))
2326			return 1;
2327		size -= bh.b_size;
2328		lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2329	} while(size > 0);
2330
2331	return 0;
2332}
2333
2334static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2335{
2336	struct gfs2_inode *ip = GFS2_I(inode);
2337	struct buffer_head *dibh;
2338	int error;
2339
2340	if (offset >= inode->i_size)
2341		return 0;
2342	if (offset + length > inode->i_size)
2343		length = inode->i_size - offset;
2344
2345	error = gfs2_meta_inode_buffer(ip, &dibh);
2346	if (error)
2347		return error;
2348	gfs2_trans_add_meta(ip->i_gl, dibh);
2349	memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2350	       length);
2351	brelse(dibh);
2352	return 0;
2353}
2354
2355static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2356					 loff_t length)
2357{
2358	struct gfs2_sbd *sdp = GFS2_SB(inode);
2359	loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2360	int error;
2361
2362	while (length) {
2363		struct gfs2_trans *tr;
2364		loff_t chunk;
2365		unsigned int offs;
2366
2367		chunk = length;
2368		if (chunk > max_chunk)
2369			chunk = max_chunk;
2370
2371		offs = offset & ~PAGE_MASK;
2372		if (offs && chunk > PAGE_SIZE)
2373			chunk = offs + ((chunk - offs) & PAGE_MASK);
2374
2375		truncate_pagecache_range(inode, offset, chunk);
2376		offset += chunk;
2377		length -= chunk;
2378
2379		tr = current->journal_info;
2380		if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2381			continue;
2382
2383		gfs2_trans_end(sdp);
2384		error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2385		if (error)
2386			return error;
2387	}
2388	return 0;
2389}
2390
2391int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2392{
2393	struct inode *inode = file_inode(file);
2394	struct gfs2_inode *ip = GFS2_I(inode);
2395	struct gfs2_sbd *sdp = GFS2_SB(inode);
2396	unsigned int blocksize = i_blocksize(inode);
2397	loff_t start, end;
2398	int error;
2399
2400	if (!gfs2_is_stuffed(ip)) {
2401		unsigned int start_off, end_len;
2402
2403		start_off = offset & (blocksize - 1);
2404		end_len = (offset + length) & (blocksize - 1);
2405		if (start_off) {
2406			unsigned int len = length;
2407			if (length > blocksize - start_off)
2408				len = blocksize - start_off;
2409			error = gfs2_block_zero_range(inode, offset, len);
2410			if (error)
2411				goto out;
2412			if (start_off + length < blocksize)
2413				end_len = 0;
2414		}
2415		if (end_len) {
2416			error = gfs2_block_zero_range(inode,
2417				offset + length - end_len, end_len);
2418			if (error)
2419				goto out;
2420		}
2421	}
2422
2423	start = round_down(offset, blocksize);
2424	end = round_up(offset + length, blocksize) - 1;
2425	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
2426	if (error)
2427		return error;
2428
2429	if (gfs2_is_jdata(ip))
2430		error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2431					 GFS2_JTRUNC_REVOKES);
2432	else
2433		error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2434	if (error)
2435		return error;
2436
2437	if (gfs2_is_stuffed(ip)) {
2438		error = stuffed_zero_range(inode, offset, length);
2439		if (error)
2440			goto out;
2441	}
2442
2443	if (gfs2_is_jdata(ip)) {
2444		BUG_ON(!current->journal_info);
2445		gfs2_journaled_truncate_range(inode, offset, length);
2446	} else
2447		truncate_pagecache_range(inode, offset, offset + length - 1);
2448
2449	file_update_time(file);
2450	mark_inode_dirty(inode);
2451
2452	if (current->journal_info)
2453		gfs2_trans_end(sdp);
2454
2455	if (!gfs2_is_stuffed(ip))
2456		error = punch_hole(ip, offset, length);
2457
2458out:
2459	if (current->journal_info)
2460		gfs2_trans_end(sdp);
2461	return error;
2462}
2463
2464static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
2465		loff_t offset)
2466{
2467	int ret;
2468
2469	if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2470		return -EIO;
2471
2472	if (offset >= wpc->iomap.offset &&
2473	    offset < wpc->iomap.offset + wpc->iomap.length)
2474		return 0;
2475
2476	memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2477	ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
2478	return ret;
2479}
2480
2481const struct iomap_writeback_ops gfs2_writeback_ops = {
2482	.map_blocks		= gfs2_map_blocks,
2483};
2484