162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  fs/ext4/extents_status.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
662306a36Sopenharmony_ci * Modified by
762306a36Sopenharmony_ci *	Allison Henderson <achender@linux.vnet.ibm.com>
862306a36Sopenharmony_ci *	Hugh Dickins <hughd@google.com>
962306a36Sopenharmony_ci *	Zheng Liu <wenqing.lz@taobao.com>
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Ext4 extents status tree core functions.
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci#include <linux/list_sort.h>
1462306a36Sopenharmony_ci#include <linux/proc_fs.h>
1562306a36Sopenharmony_ci#include <linux/seq_file.h>
1662306a36Sopenharmony_ci#include "ext4.h"
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include <trace/events/ext4.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci/*
2162306a36Sopenharmony_ci * According to previous discussion in Ext4 Developer Workshop, we
2262306a36Sopenharmony_ci * will introduce a new structure called io tree to track all extent
2362306a36Sopenharmony_ci * status in order to solve some problems that we have met
2462306a36Sopenharmony_ci * (e.g. Reservation space warning), and provide extent-level locking.
2562306a36Sopenharmony_ci * Delay extent tree is the first step to achieve this goal.  It is
2662306a36Sopenharmony_ci * original built by Yongqiang Yang.  At that time it is called delay
2762306a36Sopenharmony_ci * extent tree, whose goal is only track delayed extents in memory to
2862306a36Sopenharmony_ci * simplify the implementation of fiemap and bigalloc, and introduce
2962306a36Sopenharmony_ci * lseek SEEK_DATA/SEEK_HOLE support.  That is why it is still called
3062306a36Sopenharmony_ci * delay extent tree at the first commit.  But for better understand
3162306a36Sopenharmony_ci * what it does, it has been rename to extent status tree.
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * Step1:
3462306a36Sopenharmony_ci * Currently the first step has been done.  All delayed extents are
3562306a36Sopenharmony_ci * tracked in the tree.  It maintains the delayed extent when a delayed
3662306a36Sopenharmony_ci * allocation is issued, and the delayed extent is written out or
3762306a36Sopenharmony_ci * invalidated.  Therefore the implementation of fiemap and bigalloc
3862306a36Sopenharmony_ci * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * The following comment describes the implemenmtation of extent
4162306a36Sopenharmony_ci * status tree and future works.
4262306a36Sopenharmony_ci *
4362306a36Sopenharmony_ci * Step2:
4462306a36Sopenharmony_ci * In this step all extent status are tracked by extent status tree.
4562306a36Sopenharmony_ci * Thus, we can first try to lookup a block mapping in this tree before
4662306a36Sopenharmony_ci * finding it in extent tree.  Hence, single extent cache can be removed
4762306a36Sopenharmony_ci * because extent status tree can do a better job.  Extents in status
4862306a36Sopenharmony_ci * tree are loaded on-demand.  Therefore, the extent status tree may not
4962306a36Sopenharmony_ci * contain all of the extents in a file.  Meanwhile we define a shrinker
5062306a36Sopenharmony_ci * to reclaim memory from extent status tree because fragmented extent
5162306a36Sopenharmony_ci * tree will make status tree cost too much memory.  written/unwritten/-
5262306a36Sopenharmony_ci * hole extents in the tree will be reclaimed by this shrinker when we
5362306a36Sopenharmony_ci * are under high memory pressure.  Delayed extents will not be
5462306a36Sopenharmony_ci * reclimed because fiemap, bigalloc, and seek_data/hole need it.
5562306a36Sopenharmony_ci */
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci/*
5862306a36Sopenharmony_ci * Extent status tree implementation for ext4.
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci *
6162306a36Sopenharmony_ci * ==========================================================================
6262306a36Sopenharmony_ci * Extent status tree tracks all extent status.
6362306a36Sopenharmony_ci *
6462306a36Sopenharmony_ci * 1. Why we need to implement extent status tree?
6562306a36Sopenharmony_ci *
6662306a36Sopenharmony_ci * Without extent status tree, ext4 identifies a delayed extent by looking
6762306a36Sopenharmony_ci * up page cache, this has several deficiencies - complicated, buggy,
6862306a36Sopenharmony_ci * and inefficient code.
6962306a36Sopenharmony_ci *
7062306a36Sopenharmony_ci * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a
7162306a36Sopenharmony_ci * block or a range of blocks are belonged to a delayed extent.
7262306a36Sopenharmony_ci *
7362306a36Sopenharmony_ci * Let us have a look at how they do without extent status tree.
7462306a36Sopenharmony_ci *   --	FIEMAP
7562306a36Sopenharmony_ci *	FIEMAP looks up page cache to identify delayed allocations from holes.
7662306a36Sopenharmony_ci *
7762306a36Sopenharmony_ci *   --	SEEK_HOLE/DATA
7862306a36Sopenharmony_ci *	SEEK_HOLE/DATA has the same problem as FIEMAP.
7962306a36Sopenharmony_ci *
8062306a36Sopenharmony_ci *   --	bigalloc
8162306a36Sopenharmony_ci *	bigalloc looks up page cache to figure out if a block is
8262306a36Sopenharmony_ci *	already under delayed allocation or not to determine whether
8362306a36Sopenharmony_ci *	quota reserving is needed for the cluster.
8462306a36Sopenharmony_ci *
8562306a36Sopenharmony_ci *   --	writeout
8662306a36Sopenharmony_ci *	Writeout looks up whole page cache to see if a buffer is
8762306a36Sopenharmony_ci *	mapped, If there are not very many delayed buffers, then it is
8862306a36Sopenharmony_ci *	time consuming.
8962306a36Sopenharmony_ci *
9062306a36Sopenharmony_ci * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
9162306a36Sopenharmony_ci * bigalloc and writeout can figure out if a block or a range of
9262306a36Sopenharmony_ci * blocks is under delayed allocation(belonged to a delayed extent) or
9362306a36Sopenharmony_ci * not by searching the extent tree.
9462306a36Sopenharmony_ci *
9562306a36Sopenharmony_ci *
9662306a36Sopenharmony_ci * ==========================================================================
9762306a36Sopenharmony_ci * 2. Ext4 extent status tree impelmentation
9862306a36Sopenharmony_ci *
9962306a36Sopenharmony_ci *   --	extent
10062306a36Sopenharmony_ci *	A extent is a range of blocks which are contiguous logically and
10162306a36Sopenharmony_ci *	physically.  Unlike extent in extent tree, this extent in ext4 is
10262306a36Sopenharmony_ci *	a in-memory struct, there is no corresponding on-disk data.  There
10362306a36Sopenharmony_ci *	is no limit on length of extent, so an extent can contain as many
10462306a36Sopenharmony_ci *	blocks as they are contiguous logically and physically.
10562306a36Sopenharmony_ci *
10662306a36Sopenharmony_ci *   --	extent status tree
10762306a36Sopenharmony_ci *	Every inode has an extent status tree and all allocation blocks
10862306a36Sopenharmony_ci *	are added to the tree with different status.  The extent in the
10962306a36Sopenharmony_ci *	tree are ordered by logical block no.
11062306a36Sopenharmony_ci *
11162306a36Sopenharmony_ci *   --	operations on a extent status tree
11262306a36Sopenharmony_ci *	There are three important operations on a delayed extent tree: find
11362306a36Sopenharmony_ci *	next extent, adding a extent(a range of blocks) and removing a extent.
11462306a36Sopenharmony_ci *
11562306a36Sopenharmony_ci *   --	race on a extent status tree
11662306a36Sopenharmony_ci *	Extent status tree is protected by inode->i_es_lock.
11762306a36Sopenharmony_ci *
11862306a36Sopenharmony_ci *   --	memory consumption
11962306a36Sopenharmony_ci *      Fragmented extent tree will make extent status tree cost too much
12062306a36Sopenharmony_ci *      memory.  Hence, we will reclaim written/unwritten/hole extents from
12162306a36Sopenharmony_ci *      the tree under a heavy memory pressure.
12262306a36Sopenharmony_ci *
12362306a36Sopenharmony_ci *
12462306a36Sopenharmony_ci * ==========================================================================
12562306a36Sopenharmony_ci * 3. Performance analysis
12662306a36Sopenharmony_ci *
12762306a36Sopenharmony_ci *   --	overhead
12862306a36Sopenharmony_ci *	1. There is a cache extent for write access, so if writes are
12962306a36Sopenharmony_ci *	not very random, adding space operaions are in O(1) time.
13062306a36Sopenharmony_ci *
13162306a36Sopenharmony_ci *   --	gain
13262306a36Sopenharmony_ci *	2. Code is much simpler, more readable, more maintainable and
13362306a36Sopenharmony_ci *	more efficient.
13462306a36Sopenharmony_ci *
13562306a36Sopenharmony_ci *
13662306a36Sopenharmony_ci * ==========================================================================
13762306a36Sopenharmony_ci * 4. TODO list
13862306a36Sopenharmony_ci *
13962306a36Sopenharmony_ci *   -- Refactor delayed space reservation
14062306a36Sopenharmony_ci *
14162306a36Sopenharmony_ci *   -- Extent-level locking
14262306a36Sopenharmony_ci */
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_cistatic struct kmem_cache *ext4_es_cachep;
14562306a36Sopenharmony_cistatic struct kmem_cache *ext4_pending_cachep;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_cistatic int __es_insert_extent(struct inode *inode, struct extent_status *newes,
14862306a36Sopenharmony_ci			      struct extent_status *prealloc);
14962306a36Sopenharmony_cistatic int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
15062306a36Sopenharmony_ci			      ext4_lblk_t end, int *reserved,
15162306a36Sopenharmony_ci			      struct extent_status *prealloc);
15262306a36Sopenharmony_cistatic int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
15362306a36Sopenharmony_cistatic int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
15462306a36Sopenharmony_ci		       struct ext4_inode_info *locked_ei);
15562306a36Sopenharmony_cistatic int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
15662306a36Sopenharmony_ci			    ext4_lblk_t len,
15762306a36Sopenharmony_ci			    struct pending_reservation **prealloc);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ciint __init ext4_init_es(void)
16062306a36Sopenharmony_ci{
16162306a36Sopenharmony_ci	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
16262306a36Sopenharmony_ci	if (ext4_es_cachep == NULL)
16362306a36Sopenharmony_ci		return -ENOMEM;
16462306a36Sopenharmony_ci	return 0;
16562306a36Sopenharmony_ci}
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_civoid ext4_exit_es(void)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	kmem_cache_destroy(ext4_es_cachep);
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_civoid ext4_es_init_tree(struct ext4_es_tree *tree)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	tree->root = RB_ROOT;
17562306a36Sopenharmony_ci	tree->cache_es = NULL;
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci#ifdef ES_DEBUG__
17962306a36Sopenharmony_cistatic void ext4_es_print_tree(struct inode *inode)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	struct ext4_es_tree *tree;
18262306a36Sopenharmony_ci	struct rb_node *node;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
18562306a36Sopenharmony_ci	tree = &EXT4_I(inode)->i_es_tree;
18662306a36Sopenharmony_ci	node = rb_first(&tree->root);
18762306a36Sopenharmony_ci	while (node) {
18862306a36Sopenharmony_ci		struct extent_status *es;
18962306a36Sopenharmony_ci		es = rb_entry(node, struct extent_status, rb_node);
19062306a36Sopenharmony_ci		printk(KERN_DEBUG " [%u/%u) %llu %x",
19162306a36Sopenharmony_ci		       es->es_lblk, es->es_len,
19262306a36Sopenharmony_ci		       ext4_es_pblock(es), ext4_es_status(es));
19362306a36Sopenharmony_ci		node = rb_next(node);
19462306a36Sopenharmony_ci	}
19562306a36Sopenharmony_ci	printk(KERN_DEBUG "\n");
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci#else
19862306a36Sopenharmony_ci#define ext4_es_print_tree(inode)
19962306a36Sopenharmony_ci#endif
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_cistatic inline ext4_lblk_t ext4_es_end(struct extent_status *es)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
20462306a36Sopenharmony_ci	return es->es_lblk + es->es_len - 1;
20562306a36Sopenharmony_ci}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci/*
20862306a36Sopenharmony_ci * search through the tree for an delayed extent with a given offset.  If
20962306a36Sopenharmony_ci * it can't be found, try to find next extent.
21062306a36Sopenharmony_ci */
21162306a36Sopenharmony_cistatic struct extent_status *__es_tree_search(struct rb_root *root,
21262306a36Sopenharmony_ci					      ext4_lblk_t lblk)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	struct rb_node *node = root->rb_node;
21562306a36Sopenharmony_ci	struct extent_status *es = NULL;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	while (node) {
21862306a36Sopenharmony_ci		es = rb_entry(node, struct extent_status, rb_node);
21962306a36Sopenharmony_ci		if (lblk < es->es_lblk)
22062306a36Sopenharmony_ci			node = node->rb_left;
22162306a36Sopenharmony_ci		else if (lblk > ext4_es_end(es))
22262306a36Sopenharmony_ci			node = node->rb_right;
22362306a36Sopenharmony_ci		else
22462306a36Sopenharmony_ci			return es;
22562306a36Sopenharmony_ci	}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	if (es && lblk < es->es_lblk)
22862306a36Sopenharmony_ci		return es;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	if (es && lblk > ext4_es_end(es)) {
23162306a36Sopenharmony_ci		node = rb_next(&es->rb_node);
23262306a36Sopenharmony_ci		return node ? rb_entry(node, struct extent_status, rb_node) :
23362306a36Sopenharmony_ci			      NULL;
23462306a36Sopenharmony_ci	}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	return NULL;
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci/*
24062306a36Sopenharmony_ci * ext4_es_find_extent_range - find extent with specified status within block
24162306a36Sopenharmony_ci *                             range or next extent following block range in
24262306a36Sopenharmony_ci *                             extents status tree
24362306a36Sopenharmony_ci *
24462306a36Sopenharmony_ci * @inode - file containing the range
24562306a36Sopenharmony_ci * @matching_fn - pointer to function that matches extents with desired status
24662306a36Sopenharmony_ci * @lblk - logical block defining start of range
24762306a36Sopenharmony_ci * @end - logical block defining end of range
24862306a36Sopenharmony_ci * @es - extent found, if any
24962306a36Sopenharmony_ci *
25062306a36Sopenharmony_ci * Find the first extent within the block range specified by @lblk and @end
25162306a36Sopenharmony_ci * in the extents status tree that satisfies @matching_fn.  If a match
25262306a36Sopenharmony_ci * is found, it's returned in @es.  If not, and a matching extent is found
25362306a36Sopenharmony_ci * beyond the block range, it's returned in @es.  If no match is found, an
25462306a36Sopenharmony_ci * extent is returned in @es whose es_lblk, es_len, and es_pblk components
25562306a36Sopenharmony_ci * are 0.
25662306a36Sopenharmony_ci */
25762306a36Sopenharmony_cistatic void __es_find_extent_range(struct inode *inode,
25862306a36Sopenharmony_ci				   int (*matching_fn)(struct extent_status *es),
25962306a36Sopenharmony_ci				   ext4_lblk_t lblk, ext4_lblk_t end,
26062306a36Sopenharmony_ci				   struct extent_status *es)
26162306a36Sopenharmony_ci{
26262306a36Sopenharmony_ci	struct ext4_es_tree *tree = NULL;
26362306a36Sopenharmony_ci	struct extent_status *es1 = NULL;
26462306a36Sopenharmony_ci	struct rb_node *node;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	WARN_ON(es == NULL);
26762306a36Sopenharmony_ci	WARN_ON(end < lblk);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	tree = &EXT4_I(inode)->i_es_tree;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	/* see if the extent has been cached */
27262306a36Sopenharmony_ci	es->es_lblk = es->es_len = es->es_pblk = 0;
27362306a36Sopenharmony_ci	es1 = READ_ONCE(tree->cache_es);
27462306a36Sopenharmony_ci	if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
27562306a36Sopenharmony_ci		es_debug("%u cached by [%u/%u) %llu %x\n",
27662306a36Sopenharmony_ci			 lblk, es1->es_lblk, es1->es_len,
27762306a36Sopenharmony_ci			 ext4_es_pblock(es1), ext4_es_status(es1));
27862306a36Sopenharmony_ci		goto out;
27962306a36Sopenharmony_ci	}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	es1 = __es_tree_search(&tree->root, lblk);
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ciout:
28462306a36Sopenharmony_ci	if (es1 && !matching_fn(es1)) {
28562306a36Sopenharmony_ci		while ((node = rb_next(&es1->rb_node)) != NULL) {
28662306a36Sopenharmony_ci			es1 = rb_entry(node, struct extent_status, rb_node);
28762306a36Sopenharmony_ci			if (es1->es_lblk > end) {
28862306a36Sopenharmony_ci				es1 = NULL;
28962306a36Sopenharmony_ci				break;
29062306a36Sopenharmony_ci			}
29162306a36Sopenharmony_ci			if (matching_fn(es1))
29262306a36Sopenharmony_ci				break;
29362306a36Sopenharmony_ci		}
29462306a36Sopenharmony_ci	}
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	if (es1 && matching_fn(es1)) {
29762306a36Sopenharmony_ci		WRITE_ONCE(tree->cache_es, es1);
29862306a36Sopenharmony_ci		es->es_lblk = es1->es_lblk;
29962306a36Sopenharmony_ci		es->es_len = es1->es_len;
30062306a36Sopenharmony_ci		es->es_pblk = es1->es_pblk;
30162306a36Sopenharmony_ci	}
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci/*
30662306a36Sopenharmony_ci * Locking for __es_find_extent_range() for external use
30762306a36Sopenharmony_ci */
30862306a36Sopenharmony_civoid ext4_es_find_extent_range(struct inode *inode,
30962306a36Sopenharmony_ci			       int (*matching_fn)(struct extent_status *es),
31062306a36Sopenharmony_ci			       ext4_lblk_t lblk, ext4_lblk_t end,
31162306a36Sopenharmony_ci			       struct extent_status *es)
31262306a36Sopenharmony_ci{
31362306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
31462306a36Sopenharmony_ci		return;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	trace_ext4_es_find_extent_range_enter(inode, lblk);
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	read_lock(&EXT4_I(inode)->i_es_lock);
31962306a36Sopenharmony_ci	__es_find_extent_range(inode, matching_fn, lblk, end, es);
32062306a36Sopenharmony_ci	read_unlock(&EXT4_I(inode)->i_es_lock);
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	trace_ext4_es_find_extent_range_exit(inode, es);
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci/*
32662306a36Sopenharmony_ci * __es_scan_range - search block range for block with specified status
32762306a36Sopenharmony_ci *                   in extents status tree
32862306a36Sopenharmony_ci *
32962306a36Sopenharmony_ci * @inode - file containing the range
33062306a36Sopenharmony_ci * @matching_fn - pointer to function that matches extents with desired status
33162306a36Sopenharmony_ci * @lblk - logical block defining start of range
33262306a36Sopenharmony_ci * @end - logical block defining end of range
33362306a36Sopenharmony_ci *
33462306a36Sopenharmony_ci * Returns true if at least one block in the specified block range satisfies
33562306a36Sopenharmony_ci * the criterion specified by @matching_fn, and false if not.  If at least
33662306a36Sopenharmony_ci * one extent has the specified status, then there is at least one block
33762306a36Sopenharmony_ci * in the cluster with that status.  Should only be called by code that has
33862306a36Sopenharmony_ci * taken i_es_lock.
33962306a36Sopenharmony_ci */
34062306a36Sopenharmony_cistatic bool __es_scan_range(struct inode *inode,
34162306a36Sopenharmony_ci			    int (*matching_fn)(struct extent_status *es),
34262306a36Sopenharmony_ci			    ext4_lblk_t start, ext4_lblk_t end)
34362306a36Sopenharmony_ci{
34462306a36Sopenharmony_ci	struct extent_status es;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	__es_find_extent_range(inode, matching_fn, start, end, &es);
34762306a36Sopenharmony_ci	if (es.es_len == 0)
34862306a36Sopenharmony_ci		return false;   /* no matching extent in the tree */
34962306a36Sopenharmony_ci	else if (es.es_lblk <= start &&
35062306a36Sopenharmony_ci		 start < es.es_lblk + es.es_len)
35162306a36Sopenharmony_ci		return true;
35262306a36Sopenharmony_ci	else if (start <= es.es_lblk && es.es_lblk <= end)
35362306a36Sopenharmony_ci		return true;
35462306a36Sopenharmony_ci	else
35562306a36Sopenharmony_ci		return false;
35662306a36Sopenharmony_ci}
35762306a36Sopenharmony_ci/*
35862306a36Sopenharmony_ci * Locking for __es_scan_range() for external use
35962306a36Sopenharmony_ci */
36062306a36Sopenharmony_cibool ext4_es_scan_range(struct inode *inode,
36162306a36Sopenharmony_ci			int (*matching_fn)(struct extent_status *es),
36262306a36Sopenharmony_ci			ext4_lblk_t lblk, ext4_lblk_t end)
36362306a36Sopenharmony_ci{
36462306a36Sopenharmony_ci	bool ret;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
36762306a36Sopenharmony_ci		return false;
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	read_lock(&EXT4_I(inode)->i_es_lock);
37062306a36Sopenharmony_ci	ret = __es_scan_range(inode, matching_fn, lblk, end);
37162306a36Sopenharmony_ci	read_unlock(&EXT4_I(inode)->i_es_lock);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	return ret;
37462306a36Sopenharmony_ci}
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci/*
37762306a36Sopenharmony_ci * __es_scan_clu - search cluster for block with specified status in
37862306a36Sopenharmony_ci *                 extents status tree
37962306a36Sopenharmony_ci *
38062306a36Sopenharmony_ci * @inode - file containing the cluster
38162306a36Sopenharmony_ci * @matching_fn - pointer to function that matches extents with desired status
38262306a36Sopenharmony_ci * @lblk - logical block in cluster to be searched
38362306a36Sopenharmony_ci *
38462306a36Sopenharmony_ci * Returns true if at least one extent in the cluster containing @lblk
38562306a36Sopenharmony_ci * satisfies the criterion specified by @matching_fn, and false if not.  If at
38662306a36Sopenharmony_ci * least one extent has the specified status, then there is at least one block
38762306a36Sopenharmony_ci * in the cluster with that status.  Should only be called by code that has
38862306a36Sopenharmony_ci * taken i_es_lock.
38962306a36Sopenharmony_ci */
39062306a36Sopenharmony_cistatic bool __es_scan_clu(struct inode *inode,
39162306a36Sopenharmony_ci			  int (*matching_fn)(struct extent_status *es),
39262306a36Sopenharmony_ci			  ext4_lblk_t lblk)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
39562306a36Sopenharmony_ci	ext4_lblk_t lblk_start, lblk_end;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
39862306a36Sopenharmony_ci	lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
40162306a36Sopenharmony_ci}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci/*
40462306a36Sopenharmony_ci * Locking for __es_scan_clu() for external use
40562306a36Sopenharmony_ci */
40662306a36Sopenharmony_cibool ext4_es_scan_clu(struct inode *inode,
40762306a36Sopenharmony_ci		      int (*matching_fn)(struct extent_status *es),
40862306a36Sopenharmony_ci		      ext4_lblk_t lblk)
40962306a36Sopenharmony_ci{
41062306a36Sopenharmony_ci	bool ret;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
41362306a36Sopenharmony_ci		return false;
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	read_lock(&EXT4_I(inode)->i_es_lock);
41662306a36Sopenharmony_ci	ret = __es_scan_clu(inode, matching_fn, lblk);
41762306a36Sopenharmony_ci	read_unlock(&EXT4_I(inode)->i_es_lock);
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	return ret;
42062306a36Sopenharmony_ci}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_cistatic void ext4_es_list_add(struct inode *inode)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
42562306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	if (!list_empty(&ei->i_es_list))
42862306a36Sopenharmony_ci		return;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	spin_lock(&sbi->s_es_lock);
43162306a36Sopenharmony_ci	if (list_empty(&ei->i_es_list)) {
43262306a36Sopenharmony_ci		list_add_tail(&ei->i_es_list, &sbi->s_es_list);
43362306a36Sopenharmony_ci		sbi->s_es_nr_inode++;
43462306a36Sopenharmony_ci	}
43562306a36Sopenharmony_ci	spin_unlock(&sbi->s_es_lock);
43662306a36Sopenharmony_ci}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_cistatic void ext4_es_list_del(struct inode *inode)
43962306a36Sopenharmony_ci{
44062306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
44162306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	spin_lock(&sbi->s_es_lock);
44462306a36Sopenharmony_ci	if (!list_empty(&ei->i_es_list)) {
44562306a36Sopenharmony_ci		list_del_init(&ei->i_es_list);
44662306a36Sopenharmony_ci		sbi->s_es_nr_inode--;
44762306a36Sopenharmony_ci		WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
44862306a36Sopenharmony_ci	}
44962306a36Sopenharmony_ci	spin_unlock(&sbi->s_es_lock);
45062306a36Sopenharmony_ci}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_cistatic inline struct pending_reservation *__alloc_pending(bool nofail)
45362306a36Sopenharmony_ci{
45462306a36Sopenharmony_ci	if (!nofail)
45562306a36Sopenharmony_ci		return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_cistatic inline void __free_pending(struct pending_reservation *pr)
46162306a36Sopenharmony_ci{
46262306a36Sopenharmony_ci	kmem_cache_free(ext4_pending_cachep, pr);
46362306a36Sopenharmony_ci}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci/*
46662306a36Sopenharmony_ci * Returns true if we cannot fail to allocate memory for this extent_status
46762306a36Sopenharmony_ci * entry and cannot reclaim it until its status changes.
46862306a36Sopenharmony_ci */
46962306a36Sopenharmony_cistatic inline bool ext4_es_must_keep(struct extent_status *es)
47062306a36Sopenharmony_ci{
47162306a36Sopenharmony_ci	/* fiemap, bigalloc, and seek_data/hole need to use it. */
47262306a36Sopenharmony_ci	if (ext4_es_is_delayed(es))
47362306a36Sopenharmony_ci		return true;
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	return false;
47662306a36Sopenharmony_ci}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_cistatic inline struct extent_status *__es_alloc_extent(bool nofail)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	if (!nofail)
48162306a36Sopenharmony_ci		return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL);
48462306a36Sopenharmony_ci}
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_cistatic void ext4_es_init_extent(struct inode *inode, struct extent_status *es,
48762306a36Sopenharmony_ci		ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
48862306a36Sopenharmony_ci{
48962306a36Sopenharmony_ci	es->es_lblk = lblk;
49062306a36Sopenharmony_ci	es->es_len = len;
49162306a36Sopenharmony_ci	es->es_pblk = pblk;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	/* We never try to reclaim a must kept extent, so we don't count it. */
49462306a36Sopenharmony_ci	if (!ext4_es_must_keep(es)) {
49562306a36Sopenharmony_ci		if (!EXT4_I(inode)->i_es_shk_nr++)
49662306a36Sopenharmony_ci			ext4_es_list_add(inode);
49762306a36Sopenharmony_ci		percpu_counter_inc(&EXT4_SB(inode->i_sb)->
49862306a36Sopenharmony_ci					s_es_stats.es_stats_shk_cnt);
49962306a36Sopenharmony_ci	}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	EXT4_I(inode)->i_es_all_nr++;
50262306a36Sopenharmony_ci	percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
50362306a36Sopenharmony_ci}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_cistatic inline void __es_free_extent(struct extent_status *es)
50662306a36Sopenharmony_ci{
50762306a36Sopenharmony_ci	kmem_cache_free(ext4_es_cachep, es);
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_cistatic void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	EXT4_I(inode)->i_es_all_nr--;
51362306a36Sopenharmony_ci	percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	/* Decrease the shrink counter when we can reclaim the extent. */
51662306a36Sopenharmony_ci	if (!ext4_es_must_keep(es)) {
51762306a36Sopenharmony_ci		BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
51862306a36Sopenharmony_ci		if (!--EXT4_I(inode)->i_es_shk_nr)
51962306a36Sopenharmony_ci			ext4_es_list_del(inode);
52062306a36Sopenharmony_ci		percpu_counter_dec(&EXT4_SB(inode->i_sb)->
52162306a36Sopenharmony_ci					s_es_stats.es_stats_shk_cnt);
52262306a36Sopenharmony_ci	}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	__es_free_extent(es);
52562306a36Sopenharmony_ci}
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci/*
52862306a36Sopenharmony_ci * Check whether or not two extents can be merged
52962306a36Sopenharmony_ci * Condition:
53062306a36Sopenharmony_ci *  - logical block number is contiguous
53162306a36Sopenharmony_ci *  - physical block number is contiguous
53262306a36Sopenharmony_ci *  - status is equal
53362306a36Sopenharmony_ci */
53462306a36Sopenharmony_cistatic int ext4_es_can_be_merged(struct extent_status *es1,
53562306a36Sopenharmony_ci				 struct extent_status *es2)
53662306a36Sopenharmony_ci{
53762306a36Sopenharmony_ci	if (ext4_es_type(es1) != ext4_es_type(es2))
53862306a36Sopenharmony_ci		return 0;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
54162306a36Sopenharmony_ci		pr_warn("ES assertion failed when merging extents. "
54262306a36Sopenharmony_ci			"The sum of lengths of es1 (%d) and es2 (%d) "
54362306a36Sopenharmony_ci			"is bigger than allowed file size (%d)\n",
54462306a36Sopenharmony_ci			es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
54562306a36Sopenharmony_ci		WARN_ON(1);
54662306a36Sopenharmony_ci		return 0;
54762306a36Sopenharmony_ci	}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
55062306a36Sopenharmony_ci		return 0;
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
55362306a36Sopenharmony_ci	    (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
55462306a36Sopenharmony_ci		return 1;
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	if (ext4_es_is_hole(es1))
55762306a36Sopenharmony_ci		return 1;
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	/* we need to check delayed extent is without unwritten status */
56062306a36Sopenharmony_ci	if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
56162306a36Sopenharmony_ci		return 1;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	return 0;
56462306a36Sopenharmony_ci}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_cistatic struct extent_status *
56762306a36Sopenharmony_ciext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
56862306a36Sopenharmony_ci{
56962306a36Sopenharmony_ci	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
57062306a36Sopenharmony_ci	struct extent_status *es1;
57162306a36Sopenharmony_ci	struct rb_node *node;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	node = rb_prev(&es->rb_node);
57462306a36Sopenharmony_ci	if (!node)
57562306a36Sopenharmony_ci		return es;
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	es1 = rb_entry(node, struct extent_status, rb_node);
57862306a36Sopenharmony_ci	if (ext4_es_can_be_merged(es1, es)) {
57962306a36Sopenharmony_ci		es1->es_len += es->es_len;
58062306a36Sopenharmony_ci		if (ext4_es_is_referenced(es))
58162306a36Sopenharmony_ci			ext4_es_set_referenced(es1);
58262306a36Sopenharmony_ci		rb_erase(&es->rb_node, &tree->root);
58362306a36Sopenharmony_ci		ext4_es_free_extent(inode, es);
58462306a36Sopenharmony_ci		es = es1;
58562306a36Sopenharmony_ci	}
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	return es;
58862306a36Sopenharmony_ci}
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_cistatic struct extent_status *
59162306a36Sopenharmony_ciext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
59262306a36Sopenharmony_ci{
59362306a36Sopenharmony_ci	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
59462306a36Sopenharmony_ci	struct extent_status *es1;
59562306a36Sopenharmony_ci	struct rb_node *node;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	node = rb_next(&es->rb_node);
59862306a36Sopenharmony_ci	if (!node)
59962306a36Sopenharmony_ci		return es;
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	es1 = rb_entry(node, struct extent_status, rb_node);
60262306a36Sopenharmony_ci	if (ext4_es_can_be_merged(es, es1)) {
60362306a36Sopenharmony_ci		es->es_len += es1->es_len;
60462306a36Sopenharmony_ci		if (ext4_es_is_referenced(es1))
60562306a36Sopenharmony_ci			ext4_es_set_referenced(es);
60662306a36Sopenharmony_ci		rb_erase(node, &tree->root);
60762306a36Sopenharmony_ci		ext4_es_free_extent(inode, es1);
60862306a36Sopenharmony_ci	}
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	return es;
61162306a36Sopenharmony_ci}
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST
61462306a36Sopenharmony_ci#include "ext4_extents.h"	/* Needed when ES_AGGRESSIVE_TEST is defined */
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_cistatic void ext4_es_insert_extent_ext_check(struct inode *inode,
61762306a36Sopenharmony_ci					    struct extent_status *es)
61862306a36Sopenharmony_ci{
61962306a36Sopenharmony_ci	struct ext4_ext_path *path = NULL;
62062306a36Sopenharmony_ci	struct ext4_extent *ex;
62162306a36Sopenharmony_ci	ext4_lblk_t ee_block;
62262306a36Sopenharmony_ci	ext4_fsblk_t ee_start;
62362306a36Sopenharmony_ci	unsigned short ee_len;
62462306a36Sopenharmony_ci	int depth, ee_status, es_status;
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
62762306a36Sopenharmony_ci	if (IS_ERR(path))
62862306a36Sopenharmony_ci		return;
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	depth = ext_depth(inode);
63162306a36Sopenharmony_ci	ex = path[depth].p_ext;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	if (ex) {
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci		ee_block = le32_to_cpu(ex->ee_block);
63662306a36Sopenharmony_ci		ee_start = ext4_ext_pblock(ex);
63762306a36Sopenharmony_ci		ee_len = ext4_ext_get_actual_len(ex);
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci		ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
64062306a36Sopenharmony_ci		es_status = ext4_es_is_unwritten(es) ? 1 : 0;
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci		/*
64362306a36Sopenharmony_ci		 * Make sure ex and es are not overlap when we try to insert
64462306a36Sopenharmony_ci		 * a delayed/hole extent.
64562306a36Sopenharmony_ci		 */
64662306a36Sopenharmony_ci		if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
64762306a36Sopenharmony_ci			if (in_range(es->es_lblk, ee_block, ee_len)) {
64862306a36Sopenharmony_ci				pr_warn("ES insert assertion failed for "
64962306a36Sopenharmony_ci					"inode: %lu we can find an extent "
65062306a36Sopenharmony_ci					"at block [%d/%d/%llu/%c], but we "
65162306a36Sopenharmony_ci					"want to add a delayed/hole extent "
65262306a36Sopenharmony_ci					"[%d/%d/%llu/%x]\n",
65362306a36Sopenharmony_ci					inode->i_ino, ee_block, ee_len,
65462306a36Sopenharmony_ci					ee_start, ee_status ? 'u' : 'w',
65562306a36Sopenharmony_ci					es->es_lblk, es->es_len,
65662306a36Sopenharmony_ci					ext4_es_pblock(es), ext4_es_status(es));
65762306a36Sopenharmony_ci			}
65862306a36Sopenharmony_ci			goto out;
65962306a36Sopenharmony_ci		}
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci		/*
66262306a36Sopenharmony_ci		 * We don't check ee_block == es->es_lblk, etc. because es
66362306a36Sopenharmony_ci		 * might be a part of whole extent, vice versa.
66462306a36Sopenharmony_ci		 */
66562306a36Sopenharmony_ci		if (es->es_lblk < ee_block ||
66662306a36Sopenharmony_ci		    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
66762306a36Sopenharmony_ci			pr_warn("ES insert assertion failed for inode: %lu "
66862306a36Sopenharmony_ci				"ex_status [%d/%d/%llu/%c] != "
66962306a36Sopenharmony_ci				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
67062306a36Sopenharmony_ci				ee_block, ee_len, ee_start,
67162306a36Sopenharmony_ci				ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
67262306a36Sopenharmony_ci				ext4_es_pblock(es), es_status ? 'u' : 'w');
67362306a36Sopenharmony_ci			goto out;
67462306a36Sopenharmony_ci		}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci		if (ee_status ^ es_status) {
67762306a36Sopenharmony_ci			pr_warn("ES insert assertion failed for inode: %lu "
67862306a36Sopenharmony_ci				"ex_status [%d/%d/%llu/%c] != "
67962306a36Sopenharmony_ci				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
68062306a36Sopenharmony_ci				ee_block, ee_len, ee_start,
68162306a36Sopenharmony_ci				ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
68262306a36Sopenharmony_ci				ext4_es_pblock(es), es_status ? 'u' : 'w');
68362306a36Sopenharmony_ci		}
68462306a36Sopenharmony_ci	} else {
68562306a36Sopenharmony_ci		/*
68662306a36Sopenharmony_ci		 * We can't find an extent on disk.  So we need to make sure
68762306a36Sopenharmony_ci		 * that we don't want to add an written/unwritten extent.
68862306a36Sopenharmony_ci		 */
68962306a36Sopenharmony_ci		if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
69062306a36Sopenharmony_ci			pr_warn("ES insert assertion failed for inode: %lu "
69162306a36Sopenharmony_ci				"can't find an extent at block %d but we want "
69262306a36Sopenharmony_ci				"to add a written/unwritten extent "
69362306a36Sopenharmony_ci				"[%d/%d/%llu/%x]\n", inode->i_ino,
69462306a36Sopenharmony_ci				es->es_lblk, es->es_lblk, es->es_len,
69562306a36Sopenharmony_ci				ext4_es_pblock(es), ext4_es_status(es));
69662306a36Sopenharmony_ci		}
69762306a36Sopenharmony_ci	}
69862306a36Sopenharmony_ciout:
69962306a36Sopenharmony_ci	ext4_free_ext_path(path);
70062306a36Sopenharmony_ci}
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_cistatic void ext4_es_insert_extent_ind_check(struct inode *inode,
70362306a36Sopenharmony_ci					    struct extent_status *es)
70462306a36Sopenharmony_ci{
70562306a36Sopenharmony_ci	struct ext4_map_blocks map;
70662306a36Sopenharmony_ci	int retval;
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	/*
70962306a36Sopenharmony_ci	 * Here we call ext4_ind_map_blocks to lookup a block mapping because
71062306a36Sopenharmony_ci	 * 'Indirect' structure is defined in indirect.c.  So we couldn't
71162306a36Sopenharmony_ci	 * access direct/indirect tree from outside.  It is too dirty to define
71262306a36Sopenharmony_ci	 * this function in indirect.c file.
71362306a36Sopenharmony_ci	 */
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	map.m_lblk = es->es_lblk;
71662306a36Sopenharmony_ci	map.m_len = es->es_len;
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
71962306a36Sopenharmony_ci	if (retval > 0) {
72062306a36Sopenharmony_ci		if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
72162306a36Sopenharmony_ci			/*
72262306a36Sopenharmony_ci			 * We want to add a delayed/hole extent but this
72362306a36Sopenharmony_ci			 * block has been allocated.
72462306a36Sopenharmony_ci			 */
72562306a36Sopenharmony_ci			pr_warn("ES insert assertion failed for inode: %lu "
72662306a36Sopenharmony_ci				"We can find blocks but we want to add a "
72762306a36Sopenharmony_ci				"delayed/hole extent [%d/%d/%llu/%x]\n",
72862306a36Sopenharmony_ci				inode->i_ino, es->es_lblk, es->es_len,
72962306a36Sopenharmony_ci				ext4_es_pblock(es), ext4_es_status(es));
73062306a36Sopenharmony_ci			return;
73162306a36Sopenharmony_ci		} else if (ext4_es_is_written(es)) {
73262306a36Sopenharmony_ci			if (retval != es->es_len) {
73362306a36Sopenharmony_ci				pr_warn("ES insert assertion failed for "
73462306a36Sopenharmony_ci					"inode: %lu retval %d != es_len %d\n",
73562306a36Sopenharmony_ci					inode->i_ino, retval, es->es_len);
73662306a36Sopenharmony_ci				return;
73762306a36Sopenharmony_ci			}
73862306a36Sopenharmony_ci			if (map.m_pblk != ext4_es_pblock(es)) {
73962306a36Sopenharmony_ci				pr_warn("ES insert assertion failed for "
74062306a36Sopenharmony_ci					"inode: %lu m_pblk %llu != "
74162306a36Sopenharmony_ci					"es_pblk %llu\n",
74262306a36Sopenharmony_ci					inode->i_ino, map.m_pblk,
74362306a36Sopenharmony_ci					ext4_es_pblock(es));
74462306a36Sopenharmony_ci				return;
74562306a36Sopenharmony_ci			}
74662306a36Sopenharmony_ci		} else {
74762306a36Sopenharmony_ci			/*
74862306a36Sopenharmony_ci			 * We don't need to check unwritten extent because
74962306a36Sopenharmony_ci			 * indirect-based file doesn't have it.
75062306a36Sopenharmony_ci			 */
75162306a36Sopenharmony_ci			BUG();
75262306a36Sopenharmony_ci		}
75362306a36Sopenharmony_ci	} else if (retval == 0) {
75462306a36Sopenharmony_ci		if (ext4_es_is_written(es)) {
75562306a36Sopenharmony_ci			pr_warn("ES insert assertion failed for inode: %lu "
75662306a36Sopenharmony_ci				"We can't find the block but we want to add "
75762306a36Sopenharmony_ci				"a written extent [%d/%d/%llu/%x]\n",
75862306a36Sopenharmony_ci				inode->i_ino, es->es_lblk, es->es_len,
75962306a36Sopenharmony_ci				ext4_es_pblock(es), ext4_es_status(es));
76062306a36Sopenharmony_ci			return;
76162306a36Sopenharmony_ci		}
76262306a36Sopenharmony_ci	}
76362306a36Sopenharmony_ci}
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_cistatic inline void ext4_es_insert_extent_check(struct inode *inode,
76662306a36Sopenharmony_ci					       struct extent_status *es)
76762306a36Sopenharmony_ci{
76862306a36Sopenharmony_ci	/*
76962306a36Sopenharmony_ci	 * We don't need to worry about the race condition because
77062306a36Sopenharmony_ci	 * caller takes i_data_sem locking.
77162306a36Sopenharmony_ci	 */
77262306a36Sopenharmony_ci	BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
77362306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
77462306a36Sopenharmony_ci		ext4_es_insert_extent_ext_check(inode, es);
77562306a36Sopenharmony_ci	else
77662306a36Sopenharmony_ci		ext4_es_insert_extent_ind_check(inode, es);
77762306a36Sopenharmony_ci}
77862306a36Sopenharmony_ci#else
77962306a36Sopenharmony_cistatic inline void ext4_es_insert_extent_check(struct inode *inode,
78062306a36Sopenharmony_ci					       struct extent_status *es)
78162306a36Sopenharmony_ci{
78262306a36Sopenharmony_ci}
78362306a36Sopenharmony_ci#endif
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_cistatic int __es_insert_extent(struct inode *inode, struct extent_status *newes,
78662306a36Sopenharmony_ci			      struct extent_status *prealloc)
78762306a36Sopenharmony_ci{
78862306a36Sopenharmony_ci	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
78962306a36Sopenharmony_ci	struct rb_node **p = &tree->root.rb_node;
79062306a36Sopenharmony_ci	struct rb_node *parent = NULL;
79162306a36Sopenharmony_ci	struct extent_status *es;
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	while (*p) {
79462306a36Sopenharmony_ci		parent = *p;
79562306a36Sopenharmony_ci		es = rb_entry(parent, struct extent_status, rb_node);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci		if (newes->es_lblk < es->es_lblk) {
79862306a36Sopenharmony_ci			if (ext4_es_can_be_merged(newes, es)) {
79962306a36Sopenharmony_ci				/*
80062306a36Sopenharmony_ci				 * Here we can modify es_lblk directly
80162306a36Sopenharmony_ci				 * because it isn't overlapped.
80262306a36Sopenharmony_ci				 */
80362306a36Sopenharmony_ci				es->es_lblk = newes->es_lblk;
80462306a36Sopenharmony_ci				es->es_len += newes->es_len;
80562306a36Sopenharmony_ci				if (ext4_es_is_written(es) ||
80662306a36Sopenharmony_ci				    ext4_es_is_unwritten(es))
80762306a36Sopenharmony_ci					ext4_es_store_pblock(es,
80862306a36Sopenharmony_ci							     newes->es_pblk);
80962306a36Sopenharmony_ci				es = ext4_es_try_to_merge_left(inode, es);
81062306a36Sopenharmony_ci				goto out;
81162306a36Sopenharmony_ci			}
81262306a36Sopenharmony_ci			p = &(*p)->rb_left;
81362306a36Sopenharmony_ci		} else if (newes->es_lblk > ext4_es_end(es)) {
81462306a36Sopenharmony_ci			if (ext4_es_can_be_merged(es, newes)) {
81562306a36Sopenharmony_ci				es->es_len += newes->es_len;
81662306a36Sopenharmony_ci				es = ext4_es_try_to_merge_right(inode, es);
81762306a36Sopenharmony_ci				goto out;
81862306a36Sopenharmony_ci			}
81962306a36Sopenharmony_ci			p = &(*p)->rb_right;
82062306a36Sopenharmony_ci		} else {
82162306a36Sopenharmony_ci			BUG();
82262306a36Sopenharmony_ci			return -EINVAL;
82362306a36Sopenharmony_ci		}
82462306a36Sopenharmony_ci	}
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	if (prealloc)
82762306a36Sopenharmony_ci		es = prealloc;
82862306a36Sopenharmony_ci	else
82962306a36Sopenharmony_ci		es = __es_alloc_extent(false);
83062306a36Sopenharmony_ci	if (!es)
83162306a36Sopenharmony_ci		return -ENOMEM;
83262306a36Sopenharmony_ci	ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len,
83362306a36Sopenharmony_ci			    newes->es_pblk);
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	rb_link_node(&es->rb_node, parent, p);
83662306a36Sopenharmony_ci	rb_insert_color(&es->rb_node, &tree->root);
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ciout:
83962306a36Sopenharmony_ci	tree->cache_es = es;
84062306a36Sopenharmony_ci	return 0;
84162306a36Sopenharmony_ci}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci/*
84462306a36Sopenharmony_ci * ext4_es_insert_extent() adds information to an inode's extent
84562306a36Sopenharmony_ci * status tree.
84662306a36Sopenharmony_ci */
84762306a36Sopenharmony_civoid ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
84862306a36Sopenharmony_ci			   ext4_lblk_t len, ext4_fsblk_t pblk,
84962306a36Sopenharmony_ci			   unsigned int status)
85062306a36Sopenharmony_ci{
85162306a36Sopenharmony_ci	struct extent_status newes;
85262306a36Sopenharmony_ci	ext4_lblk_t end = lblk + len - 1;
85362306a36Sopenharmony_ci	int err1 = 0, err2 = 0, err3 = 0;
85462306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
85562306a36Sopenharmony_ci	struct extent_status *es1 = NULL;
85662306a36Sopenharmony_ci	struct extent_status *es2 = NULL;
85762306a36Sopenharmony_ci	struct pending_reservation *pr = NULL;
85862306a36Sopenharmony_ci	bool revise_pending = false;
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
86162306a36Sopenharmony_ci		return;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
86462306a36Sopenharmony_ci		 lblk, len, pblk, status, inode->i_ino);
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	if (!len)
86762306a36Sopenharmony_ci		return;
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	BUG_ON(end < lblk);
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if ((status & EXTENT_STATUS_DELAYED) &&
87262306a36Sopenharmony_ci	    (status & EXTENT_STATUS_WRITTEN)) {
87362306a36Sopenharmony_ci		ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
87462306a36Sopenharmony_ci				" delayed and written which can potentially "
87562306a36Sopenharmony_ci				" cause data loss.", lblk, len);
87662306a36Sopenharmony_ci		WARN_ON(1);
87762306a36Sopenharmony_ci	}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	newes.es_lblk = lblk;
88062306a36Sopenharmony_ci	newes.es_len = len;
88162306a36Sopenharmony_ci	ext4_es_store_pblock_status(&newes, pblk, status);
88262306a36Sopenharmony_ci	trace_ext4_es_insert_extent(inode, &newes);
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_ci	ext4_es_insert_extent_check(inode, &newes);
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	revise_pending = sbi->s_cluster_ratio > 1 &&
88762306a36Sopenharmony_ci			 test_opt(inode->i_sb, DELALLOC) &&
88862306a36Sopenharmony_ci			 (status & (EXTENT_STATUS_WRITTEN |
88962306a36Sopenharmony_ci				    EXTENT_STATUS_UNWRITTEN));
89062306a36Sopenharmony_ciretry:
89162306a36Sopenharmony_ci	if (err1 && !es1)
89262306a36Sopenharmony_ci		es1 = __es_alloc_extent(true);
89362306a36Sopenharmony_ci	if ((err1 || err2) && !es2)
89462306a36Sopenharmony_ci		es2 = __es_alloc_extent(true);
89562306a36Sopenharmony_ci	if ((err1 || err2 || err3) && revise_pending && !pr)
89662306a36Sopenharmony_ci		pr = __alloc_pending(true);
89762306a36Sopenharmony_ci	write_lock(&EXT4_I(inode)->i_es_lock);
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
90062306a36Sopenharmony_ci	if (err1 != 0)
90162306a36Sopenharmony_ci		goto error;
90262306a36Sopenharmony_ci	/* Free preallocated extent if it didn't get used. */
90362306a36Sopenharmony_ci	if (es1) {
90462306a36Sopenharmony_ci		if (!es1->es_len)
90562306a36Sopenharmony_ci			__es_free_extent(es1);
90662306a36Sopenharmony_ci		es1 = NULL;
90762306a36Sopenharmony_ci	}
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	err2 = __es_insert_extent(inode, &newes, es2);
91062306a36Sopenharmony_ci	if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
91162306a36Sopenharmony_ci		err2 = 0;
91262306a36Sopenharmony_ci	if (err2 != 0)
91362306a36Sopenharmony_ci		goto error;
91462306a36Sopenharmony_ci	/* Free preallocated extent if it didn't get used. */
91562306a36Sopenharmony_ci	if (es2) {
91662306a36Sopenharmony_ci		if (!es2->es_len)
91762306a36Sopenharmony_ci			__es_free_extent(es2);
91862306a36Sopenharmony_ci		es2 = NULL;
91962306a36Sopenharmony_ci	}
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	if (revise_pending) {
92262306a36Sopenharmony_ci		err3 = __revise_pending(inode, lblk, len, &pr);
92362306a36Sopenharmony_ci		if (err3 != 0)
92462306a36Sopenharmony_ci			goto error;
92562306a36Sopenharmony_ci		if (pr) {
92662306a36Sopenharmony_ci			__free_pending(pr);
92762306a36Sopenharmony_ci			pr = NULL;
92862306a36Sopenharmony_ci		}
92962306a36Sopenharmony_ci	}
93062306a36Sopenharmony_cierror:
93162306a36Sopenharmony_ci	write_unlock(&EXT4_I(inode)->i_es_lock);
93262306a36Sopenharmony_ci	if (err1 || err2 || err3)
93362306a36Sopenharmony_ci		goto retry;
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_ci	ext4_es_print_tree(inode);
93662306a36Sopenharmony_ci	return;
93762306a36Sopenharmony_ci}
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci/*
94062306a36Sopenharmony_ci * ext4_es_cache_extent() inserts information into the extent status
94162306a36Sopenharmony_ci * tree if and only if there isn't information about the range in
94262306a36Sopenharmony_ci * question already.
94362306a36Sopenharmony_ci */
94462306a36Sopenharmony_civoid ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
94562306a36Sopenharmony_ci			  ext4_lblk_t len, ext4_fsblk_t pblk,
94662306a36Sopenharmony_ci			  unsigned int status)
94762306a36Sopenharmony_ci{
94862306a36Sopenharmony_ci	struct extent_status *es;
94962306a36Sopenharmony_ci	struct extent_status newes;
95062306a36Sopenharmony_ci	ext4_lblk_t end = lblk + len - 1;
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
95362306a36Sopenharmony_ci		return;
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	newes.es_lblk = lblk;
95662306a36Sopenharmony_ci	newes.es_len = len;
95762306a36Sopenharmony_ci	ext4_es_store_pblock_status(&newes, pblk, status);
95862306a36Sopenharmony_ci	trace_ext4_es_cache_extent(inode, &newes);
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	if (!len)
96162306a36Sopenharmony_ci		return;
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	BUG_ON(end < lblk);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	write_lock(&EXT4_I(inode)->i_es_lock);
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ci	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
96862306a36Sopenharmony_ci	if (!es || es->es_lblk > end)
96962306a36Sopenharmony_ci		__es_insert_extent(inode, &newes, NULL);
97062306a36Sopenharmony_ci	write_unlock(&EXT4_I(inode)->i_es_lock);
97162306a36Sopenharmony_ci}
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci/*
97462306a36Sopenharmony_ci * ext4_es_lookup_extent() looks up an extent in extent status tree.
97562306a36Sopenharmony_ci *
97662306a36Sopenharmony_ci * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
97762306a36Sopenharmony_ci *
97862306a36Sopenharmony_ci * Return: 1 on found, 0 on not
97962306a36Sopenharmony_ci */
98062306a36Sopenharmony_ciint ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
98162306a36Sopenharmony_ci			  ext4_lblk_t *next_lblk,
98262306a36Sopenharmony_ci			  struct extent_status *es)
98362306a36Sopenharmony_ci{
98462306a36Sopenharmony_ci	struct ext4_es_tree *tree;
98562306a36Sopenharmony_ci	struct ext4_es_stats *stats;
98662306a36Sopenharmony_ci	struct extent_status *es1 = NULL;
98762306a36Sopenharmony_ci	struct rb_node *node;
98862306a36Sopenharmony_ci	int found = 0;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
99162306a36Sopenharmony_ci		return 0;
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	trace_ext4_es_lookup_extent_enter(inode, lblk);
99462306a36Sopenharmony_ci	es_debug("lookup extent in block %u\n", lblk);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	tree = &EXT4_I(inode)->i_es_tree;
99762306a36Sopenharmony_ci	read_lock(&EXT4_I(inode)->i_es_lock);
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	/* find extent in cache firstly */
100062306a36Sopenharmony_ci	es->es_lblk = es->es_len = es->es_pblk = 0;
100162306a36Sopenharmony_ci	es1 = READ_ONCE(tree->cache_es);
100262306a36Sopenharmony_ci	if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
100362306a36Sopenharmony_ci		es_debug("%u cached by [%u/%u)\n",
100462306a36Sopenharmony_ci			 lblk, es1->es_lblk, es1->es_len);
100562306a36Sopenharmony_ci		found = 1;
100662306a36Sopenharmony_ci		goto out;
100762306a36Sopenharmony_ci	}
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	node = tree->root.rb_node;
101062306a36Sopenharmony_ci	while (node) {
101162306a36Sopenharmony_ci		es1 = rb_entry(node, struct extent_status, rb_node);
101262306a36Sopenharmony_ci		if (lblk < es1->es_lblk)
101362306a36Sopenharmony_ci			node = node->rb_left;
101462306a36Sopenharmony_ci		else if (lblk > ext4_es_end(es1))
101562306a36Sopenharmony_ci			node = node->rb_right;
101662306a36Sopenharmony_ci		else {
101762306a36Sopenharmony_ci			found = 1;
101862306a36Sopenharmony_ci			break;
101962306a36Sopenharmony_ci		}
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ciout:
102362306a36Sopenharmony_ci	stats = &EXT4_SB(inode->i_sb)->s_es_stats;
102462306a36Sopenharmony_ci	if (found) {
102562306a36Sopenharmony_ci		BUG_ON(!es1);
102662306a36Sopenharmony_ci		es->es_lblk = es1->es_lblk;
102762306a36Sopenharmony_ci		es->es_len = es1->es_len;
102862306a36Sopenharmony_ci		es->es_pblk = es1->es_pblk;
102962306a36Sopenharmony_ci		if (!ext4_es_is_referenced(es1))
103062306a36Sopenharmony_ci			ext4_es_set_referenced(es1);
103162306a36Sopenharmony_ci		percpu_counter_inc(&stats->es_stats_cache_hits);
103262306a36Sopenharmony_ci		if (next_lblk) {
103362306a36Sopenharmony_ci			node = rb_next(&es1->rb_node);
103462306a36Sopenharmony_ci			if (node) {
103562306a36Sopenharmony_ci				es1 = rb_entry(node, struct extent_status,
103662306a36Sopenharmony_ci					       rb_node);
103762306a36Sopenharmony_ci				*next_lblk = es1->es_lblk;
103862306a36Sopenharmony_ci			} else
103962306a36Sopenharmony_ci				*next_lblk = 0;
104062306a36Sopenharmony_ci		}
104162306a36Sopenharmony_ci	} else {
104262306a36Sopenharmony_ci		percpu_counter_inc(&stats->es_stats_cache_misses);
104362306a36Sopenharmony_ci	}
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	read_unlock(&EXT4_I(inode)->i_es_lock);
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci	trace_ext4_es_lookup_extent_exit(inode, es, found);
104862306a36Sopenharmony_ci	return found;
104962306a36Sopenharmony_ci}
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_cistruct rsvd_count {
105262306a36Sopenharmony_ci	int ndelonly;
105362306a36Sopenharmony_ci	bool first_do_lblk_found;
105462306a36Sopenharmony_ci	ext4_lblk_t first_do_lblk;
105562306a36Sopenharmony_ci	ext4_lblk_t last_do_lblk;
105662306a36Sopenharmony_ci	struct extent_status *left_es;
105762306a36Sopenharmony_ci	bool partial;
105862306a36Sopenharmony_ci	ext4_lblk_t lclu;
105962306a36Sopenharmony_ci};
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci/*
106262306a36Sopenharmony_ci * init_rsvd - initialize reserved count data before removing block range
106362306a36Sopenharmony_ci *	       in file from extent status tree
106462306a36Sopenharmony_ci *
106562306a36Sopenharmony_ci * @inode - file containing range
106662306a36Sopenharmony_ci * @lblk - first block in range
106762306a36Sopenharmony_ci * @es - pointer to first extent in range
106862306a36Sopenharmony_ci * @rc - pointer to reserved count data
106962306a36Sopenharmony_ci *
107062306a36Sopenharmony_ci * Assumes es is not NULL
107162306a36Sopenharmony_ci */
107262306a36Sopenharmony_cistatic void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
107362306a36Sopenharmony_ci		      struct extent_status *es, struct rsvd_count *rc)
107462306a36Sopenharmony_ci{
107562306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
107662306a36Sopenharmony_ci	struct rb_node *node;
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci	rc->ndelonly = 0;
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci	/*
108162306a36Sopenharmony_ci	 * for bigalloc, note the first delonly block in the range has not
108262306a36Sopenharmony_ci	 * been found, record the extent containing the block to the left of
108362306a36Sopenharmony_ci	 * the region to be removed, if any, and note that there's no partial
108462306a36Sopenharmony_ci	 * cluster to track
108562306a36Sopenharmony_ci	 */
108662306a36Sopenharmony_ci	if (sbi->s_cluster_ratio > 1) {
108762306a36Sopenharmony_ci		rc->first_do_lblk_found = false;
108862306a36Sopenharmony_ci		if (lblk > es->es_lblk) {
108962306a36Sopenharmony_ci			rc->left_es = es;
109062306a36Sopenharmony_ci		} else {
109162306a36Sopenharmony_ci			node = rb_prev(&es->rb_node);
109262306a36Sopenharmony_ci			rc->left_es = node ? rb_entry(node,
109362306a36Sopenharmony_ci						      struct extent_status,
109462306a36Sopenharmony_ci						      rb_node) : NULL;
109562306a36Sopenharmony_ci		}
109662306a36Sopenharmony_ci		rc->partial = false;
109762306a36Sopenharmony_ci	}
109862306a36Sopenharmony_ci}
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci/*
110162306a36Sopenharmony_ci * count_rsvd - count the clusters containing delayed and not unwritten
110262306a36Sopenharmony_ci *		(delonly) blocks in a range within an extent and add to
110362306a36Sopenharmony_ci *	        the running tally in rsvd_count
110462306a36Sopenharmony_ci *
110562306a36Sopenharmony_ci * @inode - file containing extent
110662306a36Sopenharmony_ci * @lblk - first block in range
110762306a36Sopenharmony_ci * @len - length of range in blocks
110862306a36Sopenharmony_ci * @es - pointer to extent containing clusters to be counted
110962306a36Sopenharmony_ci * @rc - pointer to reserved count data
111062306a36Sopenharmony_ci *
111162306a36Sopenharmony_ci * Tracks partial clusters found at the beginning and end of extents so
111262306a36Sopenharmony_ci * they aren't overcounted when they span adjacent extents
111362306a36Sopenharmony_ci */
111462306a36Sopenharmony_cistatic void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
111562306a36Sopenharmony_ci		       struct extent_status *es, struct rsvd_count *rc)
111662306a36Sopenharmony_ci{
111762306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
111862306a36Sopenharmony_ci	ext4_lblk_t i, end, nclu;
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci	if (!ext4_es_is_delonly(es))
112162306a36Sopenharmony_ci		return;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	WARN_ON(len <= 0);
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci	if (sbi->s_cluster_ratio == 1) {
112662306a36Sopenharmony_ci		rc->ndelonly += (int) len;
112762306a36Sopenharmony_ci		return;
112862306a36Sopenharmony_ci	}
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	/* bigalloc */
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
113362306a36Sopenharmony_ci	end = lblk + (ext4_lblk_t) len - 1;
113462306a36Sopenharmony_ci	end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_ci	/* record the first block of the first delonly extent seen */
113762306a36Sopenharmony_ci	if (!rc->first_do_lblk_found) {
113862306a36Sopenharmony_ci		rc->first_do_lblk = i;
113962306a36Sopenharmony_ci		rc->first_do_lblk_found = true;
114062306a36Sopenharmony_ci	}
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	/* update the last lblk in the region seen so far */
114362306a36Sopenharmony_ci	rc->last_do_lblk = end;
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci	/*
114662306a36Sopenharmony_ci	 * if we're tracking a partial cluster and the current extent
114762306a36Sopenharmony_ci	 * doesn't start with it, count it and stop tracking
114862306a36Sopenharmony_ci	 */
114962306a36Sopenharmony_ci	if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
115062306a36Sopenharmony_ci		rc->ndelonly++;
115162306a36Sopenharmony_ci		rc->partial = false;
115262306a36Sopenharmony_ci	}
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci	/*
115562306a36Sopenharmony_ci	 * if the first cluster doesn't start on a cluster boundary but
115662306a36Sopenharmony_ci	 * ends on one, count it
115762306a36Sopenharmony_ci	 */
115862306a36Sopenharmony_ci	if (EXT4_LBLK_COFF(sbi, i) != 0) {
115962306a36Sopenharmony_ci		if (end >= EXT4_LBLK_CFILL(sbi, i)) {
116062306a36Sopenharmony_ci			rc->ndelonly++;
116162306a36Sopenharmony_ci			rc->partial = false;
116262306a36Sopenharmony_ci			i = EXT4_LBLK_CFILL(sbi, i) + 1;
116362306a36Sopenharmony_ci		}
116462306a36Sopenharmony_ci	}
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	/*
116762306a36Sopenharmony_ci	 * if the current cluster starts on a cluster boundary, count the
116862306a36Sopenharmony_ci	 * number of whole delonly clusters in the extent
116962306a36Sopenharmony_ci	 */
117062306a36Sopenharmony_ci	if ((i + sbi->s_cluster_ratio - 1) <= end) {
117162306a36Sopenharmony_ci		nclu = (end - i + 1) >> sbi->s_cluster_bits;
117262306a36Sopenharmony_ci		rc->ndelonly += nclu;
117362306a36Sopenharmony_ci		i += nclu << sbi->s_cluster_bits;
117462306a36Sopenharmony_ci	}
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	/*
117762306a36Sopenharmony_ci	 * start tracking a partial cluster if there's a partial at the end
117862306a36Sopenharmony_ci	 * of the current extent and we're not already tracking one
117962306a36Sopenharmony_ci	 */
118062306a36Sopenharmony_ci	if (!rc->partial && i <= end) {
118162306a36Sopenharmony_ci		rc->partial = true;
118262306a36Sopenharmony_ci		rc->lclu = EXT4_B2C(sbi, i);
118362306a36Sopenharmony_ci	}
118462306a36Sopenharmony_ci}
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci/*
118762306a36Sopenharmony_ci * __pr_tree_search - search for a pending cluster reservation
118862306a36Sopenharmony_ci *
118962306a36Sopenharmony_ci * @root - root of pending reservation tree
119062306a36Sopenharmony_ci * @lclu - logical cluster to search for
119162306a36Sopenharmony_ci *
119262306a36Sopenharmony_ci * Returns the pending reservation for the cluster identified by @lclu
119362306a36Sopenharmony_ci * if found.  If not, returns a reservation for the next cluster if any,
119462306a36Sopenharmony_ci * and if not, returns NULL.
119562306a36Sopenharmony_ci */
119662306a36Sopenharmony_cistatic struct pending_reservation *__pr_tree_search(struct rb_root *root,
119762306a36Sopenharmony_ci						    ext4_lblk_t lclu)
119862306a36Sopenharmony_ci{
119962306a36Sopenharmony_ci	struct rb_node *node = root->rb_node;
120062306a36Sopenharmony_ci	struct pending_reservation *pr = NULL;
120162306a36Sopenharmony_ci
120262306a36Sopenharmony_ci	while (node) {
120362306a36Sopenharmony_ci		pr = rb_entry(node, struct pending_reservation, rb_node);
120462306a36Sopenharmony_ci		if (lclu < pr->lclu)
120562306a36Sopenharmony_ci			node = node->rb_left;
120662306a36Sopenharmony_ci		else if (lclu > pr->lclu)
120762306a36Sopenharmony_ci			node = node->rb_right;
120862306a36Sopenharmony_ci		else
120962306a36Sopenharmony_ci			return pr;
121062306a36Sopenharmony_ci	}
121162306a36Sopenharmony_ci	if (pr && lclu < pr->lclu)
121262306a36Sopenharmony_ci		return pr;
121362306a36Sopenharmony_ci	if (pr && lclu > pr->lclu) {
121462306a36Sopenharmony_ci		node = rb_next(&pr->rb_node);
121562306a36Sopenharmony_ci		return node ? rb_entry(node, struct pending_reservation,
121662306a36Sopenharmony_ci				       rb_node) : NULL;
121762306a36Sopenharmony_ci	}
121862306a36Sopenharmony_ci	return NULL;
121962306a36Sopenharmony_ci}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci/*
122262306a36Sopenharmony_ci * get_rsvd - calculates and returns the number of cluster reservations to be
122362306a36Sopenharmony_ci *	      released when removing a block range from the extent status tree
122462306a36Sopenharmony_ci *	      and releases any pending reservations within the range
122562306a36Sopenharmony_ci *
122662306a36Sopenharmony_ci * @inode - file containing block range
122762306a36Sopenharmony_ci * @end - last block in range
122862306a36Sopenharmony_ci * @right_es - pointer to extent containing next block beyond end or NULL
122962306a36Sopenharmony_ci * @rc - pointer to reserved count data
123062306a36Sopenharmony_ci *
123162306a36Sopenharmony_ci * The number of reservations to be released is equal to the number of
123262306a36Sopenharmony_ci * clusters containing delayed and not unwritten (delonly) blocks within
123362306a36Sopenharmony_ci * the range, minus the number of clusters still containing delonly blocks
123462306a36Sopenharmony_ci * at the ends of the range, and minus the number of pending reservations
123562306a36Sopenharmony_ci * within the range.
123662306a36Sopenharmony_ci */
123762306a36Sopenharmony_cistatic unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
123862306a36Sopenharmony_ci			     struct extent_status *right_es,
123962306a36Sopenharmony_ci			     struct rsvd_count *rc)
124062306a36Sopenharmony_ci{
124162306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
124262306a36Sopenharmony_ci	struct pending_reservation *pr;
124362306a36Sopenharmony_ci	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
124462306a36Sopenharmony_ci	struct rb_node *node;
124562306a36Sopenharmony_ci	ext4_lblk_t first_lclu, last_lclu;
124662306a36Sopenharmony_ci	bool left_delonly, right_delonly, count_pending;
124762306a36Sopenharmony_ci	struct extent_status *es;
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ci	if (sbi->s_cluster_ratio > 1) {
125062306a36Sopenharmony_ci		/* count any remaining partial cluster */
125162306a36Sopenharmony_ci		if (rc->partial)
125262306a36Sopenharmony_ci			rc->ndelonly++;
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_ci		if (rc->ndelonly == 0)
125562306a36Sopenharmony_ci			return 0;
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci		first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
125862306a36Sopenharmony_ci		last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci		/*
126162306a36Sopenharmony_ci		 * decrease the delonly count by the number of clusters at the
126262306a36Sopenharmony_ci		 * ends of the range that still contain delonly blocks -
126362306a36Sopenharmony_ci		 * these clusters still need to be reserved
126462306a36Sopenharmony_ci		 */
126562306a36Sopenharmony_ci		left_delonly = right_delonly = false;
126662306a36Sopenharmony_ci
126762306a36Sopenharmony_ci		es = rc->left_es;
126862306a36Sopenharmony_ci		while (es && ext4_es_end(es) >=
126962306a36Sopenharmony_ci		       EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
127062306a36Sopenharmony_ci			if (ext4_es_is_delonly(es)) {
127162306a36Sopenharmony_ci				rc->ndelonly--;
127262306a36Sopenharmony_ci				left_delonly = true;
127362306a36Sopenharmony_ci				break;
127462306a36Sopenharmony_ci			}
127562306a36Sopenharmony_ci			node = rb_prev(&es->rb_node);
127662306a36Sopenharmony_ci			if (!node)
127762306a36Sopenharmony_ci				break;
127862306a36Sopenharmony_ci			es = rb_entry(node, struct extent_status, rb_node);
127962306a36Sopenharmony_ci		}
128062306a36Sopenharmony_ci		if (right_es && (!left_delonly || first_lclu != last_lclu)) {
128162306a36Sopenharmony_ci			if (end < ext4_es_end(right_es)) {
128262306a36Sopenharmony_ci				es = right_es;
128362306a36Sopenharmony_ci			} else {
128462306a36Sopenharmony_ci				node = rb_next(&right_es->rb_node);
128562306a36Sopenharmony_ci				es = node ? rb_entry(node, struct extent_status,
128662306a36Sopenharmony_ci						     rb_node) : NULL;
128762306a36Sopenharmony_ci			}
128862306a36Sopenharmony_ci			while (es && es->es_lblk <=
128962306a36Sopenharmony_ci			       EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
129062306a36Sopenharmony_ci				if (ext4_es_is_delonly(es)) {
129162306a36Sopenharmony_ci					rc->ndelonly--;
129262306a36Sopenharmony_ci					right_delonly = true;
129362306a36Sopenharmony_ci					break;
129462306a36Sopenharmony_ci				}
129562306a36Sopenharmony_ci				node = rb_next(&es->rb_node);
129662306a36Sopenharmony_ci				if (!node)
129762306a36Sopenharmony_ci					break;
129862306a36Sopenharmony_ci				es = rb_entry(node, struct extent_status,
129962306a36Sopenharmony_ci					      rb_node);
130062306a36Sopenharmony_ci			}
130162306a36Sopenharmony_ci		}
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci		/*
130462306a36Sopenharmony_ci		 * Determine the block range that should be searched for
130562306a36Sopenharmony_ci		 * pending reservations, if any.  Clusters on the ends of the
130662306a36Sopenharmony_ci		 * original removed range containing delonly blocks are
130762306a36Sopenharmony_ci		 * excluded.  They've already been accounted for and it's not
130862306a36Sopenharmony_ci		 * possible to determine if an associated pending reservation
130962306a36Sopenharmony_ci		 * should be released with the information available in the
131062306a36Sopenharmony_ci		 * extents status tree.
131162306a36Sopenharmony_ci		 */
131262306a36Sopenharmony_ci		if (first_lclu == last_lclu) {
131362306a36Sopenharmony_ci			if (left_delonly | right_delonly)
131462306a36Sopenharmony_ci				count_pending = false;
131562306a36Sopenharmony_ci			else
131662306a36Sopenharmony_ci				count_pending = true;
131762306a36Sopenharmony_ci		} else {
131862306a36Sopenharmony_ci			if (left_delonly)
131962306a36Sopenharmony_ci				first_lclu++;
132062306a36Sopenharmony_ci			if (right_delonly)
132162306a36Sopenharmony_ci				last_lclu--;
132262306a36Sopenharmony_ci			if (first_lclu <= last_lclu)
132362306a36Sopenharmony_ci				count_pending = true;
132462306a36Sopenharmony_ci			else
132562306a36Sopenharmony_ci				count_pending = false;
132662306a36Sopenharmony_ci		}
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci		/*
132962306a36Sopenharmony_ci		 * a pending reservation found between first_lclu and last_lclu
133062306a36Sopenharmony_ci		 * represents an allocated cluster that contained at least one
133162306a36Sopenharmony_ci		 * delonly block, so the delonly total must be reduced by one
133262306a36Sopenharmony_ci		 * for each pending reservation found and released
133362306a36Sopenharmony_ci		 */
133462306a36Sopenharmony_ci		if (count_pending) {
133562306a36Sopenharmony_ci			pr = __pr_tree_search(&tree->root, first_lclu);
133662306a36Sopenharmony_ci			while (pr && pr->lclu <= last_lclu) {
133762306a36Sopenharmony_ci				rc->ndelonly--;
133862306a36Sopenharmony_ci				node = rb_next(&pr->rb_node);
133962306a36Sopenharmony_ci				rb_erase(&pr->rb_node, &tree->root);
134062306a36Sopenharmony_ci				__free_pending(pr);
134162306a36Sopenharmony_ci				if (!node)
134262306a36Sopenharmony_ci					break;
134362306a36Sopenharmony_ci				pr = rb_entry(node, struct pending_reservation,
134462306a36Sopenharmony_ci					      rb_node);
134562306a36Sopenharmony_ci			}
134662306a36Sopenharmony_ci		}
134762306a36Sopenharmony_ci	}
134862306a36Sopenharmony_ci	return rc->ndelonly;
134962306a36Sopenharmony_ci}
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci/*
135362306a36Sopenharmony_ci * __es_remove_extent - removes block range from extent status tree
135462306a36Sopenharmony_ci *
135562306a36Sopenharmony_ci * @inode - file containing range
135662306a36Sopenharmony_ci * @lblk - first block in range
135762306a36Sopenharmony_ci * @end - last block in range
135862306a36Sopenharmony_ci * @reserved - number of cluster reservations released
135962306a36Sopenharmony_ci * @prealloc - pre-allocated es to avoid memory allocation failures
136062306a36Sopenharmony_ci *
136162306a36Sopenharmony_ci * If @reserved is not NULL and delayed allocation is enabled, counts
136262306a36Sopenharmony_ci * block/cluster reservations freed by removing range and if bigalloc
136362306a36Sopenharmony_ci * enabled cancels pending reservations as needed. Returns 0 on success,
136462306a36Sopenharmony_ci * error code on failure.
136562306a36Sopenharmony_ci */
136662306a36Sopenharmony_cistatic int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
136762306a36Sopenharmony_ci			      ext4_lblk_t end, int *reserved,
136862306a36Sopenharmony_ci			      struct extent_status *prealloc)
136962306a36Sopenharmony_ci{
137062306a36Sopenharmony_ci	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
137162306a36Sopenharmony_ci	struct rb_node *node;
137262306a36Sopenharmony_ci	struct extent_status *es;
137362306a36Sopenharmony_ci	struct extent_status orig_es;
137462306a36Sopenharmony_ci	ext4_lblk_t len1, len2;
137562306a36Sopenharmony_ci	ext4_fsblk_t block;
137662306a36Sopenharmony_ci	int err = 0;
137762306a36Sopenharmony_ci	bool count_reserved = true;
137862306a36Sopenharmony_ci	struct rsvd_count rc;
137962306a36Sopenharmony_ci
138062306a36Sopenharmony_ci	if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
138162306a36Sopenharmony_ci		count_reserved = false;
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci	es = __es_tree_search(&tree->root, lblk);
138462306a36Sopenharmony_ci	if (!es)
138562306a36Sopenharmony_ci		goto out;
138662306a36Sopenharmony_ci	if (es->es_lblk > end)
138762306a36Sopenharmony_ci		goto out;
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	/* Simply invalidate cache_es. */
139062306a36Sopenharmony_ci	tree->cache_es = NULL;
139162306a36Sopenharmony_ci	if (count_reserved)
139262306a36Sopenharmony_ci		init_rsvd(inode, lblk, es, &rc);
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_ci	orig_es.es_lblk = es->es_lblk;
139562306a36Sopenharmony_ci	orig_es.es_len = es->es_len;
139662306a36Sopenharmony_ci	orig_es.es_pblk = es->es_pblk;
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci	len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
139962306a36Sopenharmony_ci	len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
140062306a36Sopenharmony_ci	if (len1 > 0)
140162306a36Sopenharmony_ci		es->es_len = len1;
140262306a36Sopenharmony_ci	if (len2 > 0) {
140362306a36Sopenharmony_ci		if (len1 > 0) {
140462306a36Sopenharmony_ci			struct extent_status newes;
140562306a36Sopenharmony_ci
140662306a36Sopenharmony_ci			newes.es_lblk = end + 1;
140762306a36Sopenharmony_ci			newes.es_len = len2;
140862306a36Sopenharmony_ci			block = 0x7FDEADBEEFULL;
140962306a36Sopenharmony_ci			if (ext4_es_is_written(&orig_es) ||
141062306a36Sopenharmony_ci			    ext4_es_is_unwritten(&orig_es))
141162306a36Sopenharmony_ci				block = ext4_es_pblock(&orig_es) +
141262306a36Sopenharmony_ci					orig_es.es_len - len2;
141362306a36Sopenharmony_ci			ext4_es_store_pblock_status(&newes, block,
141462306a36Sopenharmony_ci						    ext4_es_status(&orig_es));
141562306a36Sopenharmony_ci			err = __es_insert_extent(inode, &newes, prealloc);
141662306a36Sopenharmony_ci			if (err) {
141762306a36Sopenharmony_ci				if (!ext4_es_must_keep(&newes))
141862306a36Sopenharmony_ci					return 0;
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci				es->es_lblk = orig_es.es_lblk;
142162306a36Sopenharmony_ci				es->es_len = orig_es.es_len;
142262306a36Sopenharmony_ci				goto out;
142362306a36Sopenharmony_ci			}
142462306a36Sopenharmony_ci		} else {
142562306a36Sopenharmony_ci			es->es_lblk = end + 1;
142662306a36Sopenharmony_ci			es->es_len = len2;
142762306a36Sopenharmony_ci			if (ext4_es_is_written(es) ||
142862306a36Sopenharmony_ci			    ext4_es_is_unwritten(es)) {
142962306a36Sopenharmony_ci				block = orig_es.es_pblk + orig_es.es_len - len2;
143062306a36Sopenharmony_ci				ext4_es_store_pblock(es, block);
143162306a36Sopenharmony_ci			}
143262306a36Sopenharmony_ci		}
143362306a36Sopenharmony_ci		if (count_reserved)
143462306a36Sopenharmony_ci			count_rsvd(inode, orig_es.es_lblk + len1,
143562306a36Sopenharmony_ci				   orig_es.es_len - len1 - len2, &orig_es, &rc);
143662306a36Sopenharmony_ci		goto out_get_reserved;
143762306a36Sopenharmony_ci	}
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	if (len1 > 0) {
144062306a36Sopenharmony_ci		if (count_reserved)
144162306a36Sopenharmony_ci			count_rsvd(inode, lblk, orig_es.es_len - len1,
144262306a36Sopenharmony_ci				   &orig_es, &rc);
144362306a36Sopenharmony_ci		node = rb_next(&es->rb_node);
144462306a36Sopenharmony_ci		if (node)
144562306a36Sopenharmony_ci			es = rb_entry(node, struct extent_status, rb_node);
144662306a36Sopenharmony_ci		else
144762306a36Sopenharmony_ci			es = NULL;
144862306a36Sopenharmony_ci	}
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci	while (es && ext4_es_end(es) <= end) {
145162306a36Sopenharmony_ci		if (count_reserved)
145262306a36Sopenharmony_ci			count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
145362306a36Sopenharmony_ci		node = rb_next(&es->rb_node);
145462306a36Sopenharmony_ci		rb_erase(&es->rb_node, &tree->root);
145562306a36Sopenharmony_ci		ext4_es_free_extent(inode, es);
145662306a36Sopenharmony_ci		if (!node) {
145762306a36Sopenharmony_ci			es = NULL;
145862306a36Sopenharmony_ci			break;
145962306a36Sopenharmony_ci		}
146062306a36Sopenharmony_ci		es = rb_entry(node, struct extent_status, rb_node);
146162306a36Sopenharmony_ci	}
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	if (es && es->es_lblk < end + 1) {
146462306a36Sopenharmony_ci		ext4_lblk_t orig_len = es->es_len;
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_ci		len1 = ext4_es_end(es) - end;
146762306a36Sopenharmony_ci		if (count_reserved)
146862306a36Sopenharmony_ci			count_rsvd(inode, es->es_lblk, orig_len - len1,
146962306a36Sopenharmony_ci				   es, &rc);
147062306a36Sopenharmony_ci		es->es_lblk = end + 1;
147162306a36Sopenharmony_ci		es->es_len = len1;
147262306a36Sopenharmony_ci		if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
147362306a36Sopenharmony_ci			block = es->es_pblk + orig_len - len1;
147462306a36Sopenharmony_ci			ext4_es_store_pblock(es, block);
147562306a36Sopenharmony_ci		}
147662306a36Sopenharmony_ci	}
147762306a36Sopenharmony_ci
147862306a36Sopenharmony_ciout_get_reserved:
147962306a36Sopenharmony_ci	if (count_reserved)
148062306a36Sopenharmony_ci		*reserved = get_rsvd(inode, end, es, &rc);
148162306a36Sopenharmony_ciout:
148262306a36Sopenharmony_ci	return err;
148362306a36Sopenharmony_ci}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci/*
148662306a36Sopenharmony_ci * ext4_es_remove_extent - removes block range from extent status tree
148762306a36Sopenharmony_ci *
148862306a36Sopenharmony_ci * @inode - file containing range
148962306a36Sopenharmony_ci * @lblk - first block in range
149062306a36Sopenharmony_ci * @len - number of blocks to remove
149162306a36Sopenharmony_ci *
149262306a36Sopenharmony_ci * Reduces block/cluster reservation count and for bigalloc cancels pending
149362306a36Sopenharmony_ci * reservations as needed.
149462306a36Sopenharmony_ci */
149562306a36Sopenharmony_civoid ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149662306a36Sopenharmony_ci			   ext4_lblk_t len)
149762306a36Sopenharmony_ci{
149862306a36Sopenharmony_ci	ext4_lblk_t end;
149962306a36Sopenharmony_ci	int err = 0;
150062306a36Sopenharmony_ci	int reserved = 0;
150162306a36Sopenharmony_ci	struct extent_status *es = NULL;
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
150462306a36Sopenharmony_ci		return;
150562306a36Sopenharmony_ci
150662306a36Sopenharmony_ci	trace_ext4_es_remove_extent(inode, lblk, len);
150762306a36Sopenharmony_ci	es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
150862306a36Sopenharmony_ci		 lblk, len, inode->i_ino);
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ci	if (!len)
151162306a36Sopenharmony_ci		return;
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ci	end = lblk + len - 1;
151462306a36Sopenharmony_ci	BUG_ON(end < lblk);
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ciretry:
151762306a36Sopenharmony_ci	if (err && !es)
151862306a36Sopenharmony_ci		es = __es_alloc_extent(true);
151962306a36Sopenharmony_ci	/*
152062306a36Sopenharmony_ci	 * ext4_clear_inode() depends on us taking i_es_lock unconditionally
152162306a36Sopenharmony_ci	 * so that we are sure __es_shrink() is done with the inode before it
152262306a36Sopenharmony_ci	 * is reclaimed.
152362306a36Sopenharmony_ci	 */
152462306a36Sopenharmony_ci	write_lock(&EXT4_I(inode)->i_es_lock);
152562306a36Sopenharmony_ci	err = __es_remove_extent(inode, lblk, end, &reserved, es);
152662306a36Sopenharmony_ci	/* Free preallocated extent if it didn't get used. */
152762306a36Sopenharmony_ci	if (es) {
152862306a36Sopenharmony_ci		if (!es->es_len)
152962306a36Sopenharmony_ci			__es_free_extent(es);
153062306a36Sopenharmony_ci		es = NULL;
153162306a36Sopenharmony_ci	}
153262306a36Sopenharmony_ci	write_unlock(&EXT4_I(inode)->i_es_lock);
153362306a36Sopenharmony_ci	if (err)
153462306a36Sopenharmony_ci		goto retry;
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ci	ext4_es_print_tree(inode);
153762306a36Sopenharmony_ci	ext4_da_release_space(inode, reserved);
153862306a36Sopenharmony_ci	return;
153962306a36Sopenharmony_ci}
154062306a36Sopenharmony_ci
154162306a36Sopenharmony_cistatic int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
154262306a36Sopenharmony_ci		       struct ext4_inode_info *locked_ei)
154362306a36Sopenharmony_ci{
154462306a36Sopenharmony_ci	struct ext4_inode_info *ei;
154562306a36Sopenharmony_ci	struct ext4_es_stats *es_stats;
154662306a36Sopenharmony_ci	ktime_t start_time;
154762306a36Sopenharmony_ci	u64 scan_time;
154862306a36Sopenharmony_ci	int nr_to_walk;
154962306a36Sopenharmony_ci	int nr_shrunk = 0;
155062306a36Sopenharmony_ci	int retried = 0, nr_skipped = 0;
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci	es_stats = &sbi->s_es_stats;
155362306a36Sopenharmony_ci	start_time = ktime_get();
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ciretry:
155662306a36Sopenharmony_ci	spin_lock(&sbi->s_es_lock);
155762306a36Sopenharmony_ci	nr_to_walk = sbi->s_es_nr_inode;
155862306a36Sopenharmony_ci	while (nr_to_walk-- > 0) {
155962306a36Sopenharmony_ci		if (list_empty(&sbi->s_es_list)) {
156062306a36Sopenharmony_ci			spin_unlock(&sbi->s_es_lock);
156162306a36Sopenharmony_ci			goto out;
156262306a36Sopenharmony_ci		}
156362306a36Sopenharmony_ci		ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
156462306a36Sopenharmony_ci				      i_es_list);
156562306a36Sopenharmony_ci		/* Move the inode to the tail */
156662306a36Sopenharmony_ci		list_move_tail(&ei->i_es_list, &sbi->s_es_list);
156762306a36Sopenharmony_ci
156862306a36Sopenharmony_ci		/*
156962306a36Sopenharmony_ci		 * Normally we try hard to avoid shrinking precached inodes,
157062306a36Sopenharmony_ci		 * but we will as a last resort.
157162306a36Sopenharmony_ci		 */
157262306a36Sopenharmony_ci		if (!retried && ext4_test_inode_state(&ei->vfs_inode,
157362306a36Sopenharmony_ci						EXT4_STATE_EXT_PRECACHED)) {
157462306a36Sopenharmony_ci			nr_skipped++;
157562306a36Sopenharmony_ci			continue;
157662306a36Sopenharmony_ci		}
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_ci		if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
157962306a36Sopenharmony_ci			nr_skipped++;
158062306a36Sopenharmony_ci			continue;
158162306a36Sopenharmony_ci		}
158262306a36Sopenharmony_ci		/*
158362306a36Sopenharmony_ci		 * Now we hold i_es_lock which protects us from inode reclaim
158462306a36Sopenharmony_ci		 * freeing inode under us
158562306a36Sopenharmony_ci		 */
158662306a36Sopenharmony_ci		spin_unlock(&sbi->s_es_lock);
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci		nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
158962306a36Sopenharmony_ci		write_unlock(&ei->i_es_lock);
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci		if (nr_to_scan <= 0)
159262306a36Sopenharmony_ci			goto out;
159362306a36Sopenharmony_ci		spin_lock(&sbi->s_es_lock);
159462306a36Sopenharmony_ci	}
159562306a36Sopenharmony_ci	spin_unlock(&sbi->s_es_lock);
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	/*
159862306a36Sopenharmony_ci	 * If we skipped any inodes, and we weren't able to make any
159962306a36Sopenharmony_ci	 * forward progress, try again to scan precached inodes.
160062306a36Sopenharmony_ci	 */
160162306a36Sopenharmony_ci	if ((nr_shrunk == 0) && nr_skipped && !retried) {
160262306a36Sopenharmony_ci		retried++;
160362306a36Sopenharmony_ci		goto retry;
160462306a36Sopenharmony_ci	}
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci	if (locked_ei && nr_shrunk == 0)
160762306a36Sopenharmony_ci		nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ciout:
161062306a36Sopenharmony_ci	scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
161162306a36Sopenharmony_ci	if (likely(es_stats->es_stats_scan_time))
161262306a36Sopenharmony_ci		es_stats->es_stats_scan_time = (scan_time +
161362306a36Sopenharmony_ci				es_stats->es_stats_scan_time*3) / 4;
161462306a36Sopenharmony_ci	else
161562306a36Sopenharmony_ci		es_stats->es_stats_scan_time = scan_time;
161662306a36Sopenharmony_ci	if (scan_time > es_stats->es_stats_max_scan_time)
161762306a36Sopenharmony_ci		es_stats->es_stats_max_scan_time = scan_time;
161862306a36Sopenharmony_ci	if (likely(es_stats->es_stats_shrunk))
161962306a36Sopenharmony_ci		es_stats->es_stats_shrunk = (nr_shrunk +
162062306a36Sopenharmony_ci				es_stats->es_stats_shrunk*3) / 4;
162162306a36Sopenharmony_ci	else
162262306a36Sopenharmony_ci		es_stats->es_stats_shrunk = nr_shrunk;
162362306a36Sopenharmony_ci
162462306a36Sopenharmony_ci	trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
162562306a36Sopenharmony_ci			     nr_skipped, retried);
162662306a36Sopenharmony_ci	return nr_shrunk;
162762306a36Sopenharmony_ci}
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_cistatic unsigned long ext4_es_count(struct shrinker *shrink,
163062306a36Sopenharmony_ci				   struct shrink_control *sc)
163162306a36Sopenharmony_ci{
163262306a36Sopenharmony_ci	unsigned long nr;
163362306a36Sopenharmony_ci	struct ext4_sb_info *sbi;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
163662306a36Sopenharmony_ci	nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
163762306a36Sopenharmony_ci	trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
163862306a36Sopenharmony_ci	return nr;
163962306a36Sopenharmony_ci}
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_cistatic unsigned long ext4_es_scan(struct shrinker *shrink,
164262306a36Sopenharmony_ci				  struct shrink_control *sc)
164362306a36Sopenharmony_ci{
164462306a36Sopenharmony_ci	struct ext4_sb_info *sbi = container_of(shrink,
164562306a36Sopenharmony_ci					struct ext4_sb_info, s_es_shrinker);
164662306a36Sopenharmony_ci	int nr_to_scan = sc->nr_to_scan;
164762306a36Sopenharmony_ci	int ret, nr_shrunk;
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
165062306a36Sopenharmony_ci	trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci	nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
165562306a36Sopenharmony_ci	trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
165662306a36Sopenharmony_ci	return nr_shrunk;
165762306a36Sopenharmony_ci}
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ciint ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
166062306a36Sopenharmony_ci{
166162306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
166262306a36Sopenharmony_ci	struct ext4_es_stats *es_stats = &sbi->s_es_stats;
166362306a36Sopenharmony_ci	struct ext4_inode_info *ei, *max = NULL;
166462306a36Sopenharmony_ci	unsigned int inode_cnt = 0;
166562306a36Sopenharmony_ci
166662306a36Sopenharmony_ci	if (v != SEQ_START_TOKEN)
166762306a36Sopenharmony_ci		return 0;
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_ci	/* here we just find an inode that has the max nr. of objects */
167062306a36Sopenharmony_ci	spin_lock(&sbi->s_es_lock);
167162306a36Sopenharmony_ci	list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
167262306a36Sopenharmony_ci		inode_cnt++;
167362306a36Sopenharmony_ci		if (max && max->i_es_all_nr < ei->i_es_all_nr)
167462306a36Sopenharmony_ci			max = ei;
167562306a36Sopenharmony_ci		else if (!max)
167662306a36Sopenharmony_ci			max = ei;
167762306a36Sopenharmony_ci	}
167862306a36Sopenharmony_ci	spin_unlock(&sbi->s_es_lock);
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci	seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
168162306a36Sopenharmony_ci		   percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
168262306a36Sopenharmony_ci		   percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
168362306a36Sopenharmony_ci	seq_printf(seq, "  %lld/%lld cache hits/misses\n",
168462306a36Sopenharmony_ci		   percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
168562306a36Sopenharmony_ci		   percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
168662306a36Sopenharmony_ci	if (inode_cnt)
168762306a36Sopenharmony_ci		seq_printf(seq, "  %d inodes on list\n", inode_cnt);
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_ci	seq_printf(seq, "average:\n  %llu us scan time\n",
169062306a36Sopenharmony_ci	    div_u64(es_stats->es_stats_scan_time, 1000));
169162306a36Sopenharmony_ci	seq_printf(seq, "  %lu shrunk objects\n", es_stats->es_stats_shrunk);
169262306a36Sopenharmony_ci	if (inode_cnt)
169362306a36Sopenharmony_ci		seq_printf(seq,
169462306a36Sopenharmony_ci		    "maximum:\n  %lu inode (%u objects, %u reclaimable)\n"
169562306a36Sopenharmony_ci		    "  %llu us max scan time\n",
169662306a36Sopenharmony_ci		    max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
169762306a36Sopenharmony_ci		    div_u64(es_stats->es_stats_max_scan_time, 1000));
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci	return 0;
170062306a36Sopenharmony_ci}
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ciint ext4_es_register_shrinker(struct ext4_sb_info *sbi)
170362306a36Sopenharmony_ci{
170462306a36Sopenharmony_ci	int err;
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	/* Make sure we have enough bits for physical block number */
170762306a36Sopenharmony_ci	BUILD_BUG_ON(ES_SHIFT < 48);
170862306a36Sopenharmony_ci	INIT_LIST_HEAD(&sbi->s_es_list);
170962306a36Sopenharmony_ci	sbi->s_es_nr_inode = 0;
171062306a36Sopenharmony_ci	spin_lock_init(&sbi->s_es_lock);
171162306a36Sopenharmony_ci	sbi->s_es_stats.es_stats_shrunk = 0;
171262306a36Sopenharmony_ci	err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
171362306a36Sopenharmony_ci				  GFP_KERNEL);
171462306a36Sopenharmony_ci	if (err)
171562306a36Sopenharmony_ci		return err;
171662306a36Sopenharmony_ci	err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
171762306a36Sopenharmony_ci				  GFP_KERNEL);
171862306a36Sopenharmony_ci	if (err)
171962306a36Sopenharmony_ci		goto err1;
172062306a36Sopenharmony_ci	sbi->s_es_stats.es_stats_scan_time = 0;
172162306a36Sopenharmony_ci	sbi->s_es_stats.es_stats_max_scan_time = 0;
172262306a36Sopenharmony_ci	err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
172362306a36Sopenharmony_ci	if (err)
172462306a36Sopenharmony_ci		goto err2;
172562306a36Sopenharmony_ci	err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
172662306a36Sopenharmony_ci	if (err)
172762306a36Sopenharmony_ci		goto err3;
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ci	sbi->s_es_shrinker.scan_objects = ext4_es_scan;
173062306a36Sopenharmony_ci	sbi->s_es_shrinker.count_objects = ext4_es_count;
173162306a36Sopenharmony_ci	sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
173262306a36Sopenharmony_ci	err = register_shrinker(&sbi->s_es_shrinker, "ext4-es:%s",
173362306a36Sopenharmony_ci				sbi->s_sb->s_id);
173462306a36Sopenharmony_ci	if (err)
173562306a36Sopenharmony_ci		goto err4;
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci	return 0;
173862306a36Sopenharmony_cierr4:
173962306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
174062306a36Sopenharmony_cierr3:
174162306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
174262306a36Sopenharmony_cierr2:
174362306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
174462306a36Sopenharmony_cierr1:
174562306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
174662306a36Sopenharmony_ci	return err;
174762306a36Sopenharmony_ci}
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_civoid ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
175062306a36Sopenharmony_ci{
175162306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
175262306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
175362306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
175462306a36Sopenharmony_ci	percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
175562306a36Sopenharmony_ci	unregister_shrinker(&sbi->s_es_shrinker);
175662306a36Sopenharmony_ci}
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci/*
175962306a36Sopenharmony_ci * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
176062306a36Sopenharmony_ci * most *nr_to_scan extents, update *nr_to_scan accordingly.
176162306a36Sopenharmony_ci *
176262306a36Sopenharmony_ci * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
176362306a36Sopenharmony_ci * Increment *nr_shrunk by the number of reclaimed extents. Also update
176462306a36Sopenharmony_ci * ei->i_es_shrink_lblk to where we should continue scanning.
176562306a36Sopenharmony_ci */
176662306a36Sopenharmony_cistatic int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
176762306a36Sopenharmony_ci				 int *nr_to_scan, int *nr_shrunk)
176862306a36Sopenharmony_ci{
176962306a36Sopenharmony_ci	struct inode *inode = &ei->vfs_inode;
177062306a36Sopenharmony_ci	struct ext4_es_tree *tree = &ei->i_es_tree;
177162306a36Sopenharmony_ci	struct extent_status *es;
177262306a36Sopenharmony_ci	struct rb_node *node;
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_ci	es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
177562306a36Sopenharmony_ci	if (!es)
177662306a36Sopenharmony_ci		goto out_wrap;
177762306a36Sopenharmony_ci
177862306a36Sopenharmony_ci	while (*nr_to_scan > 0) {
177962306a36Sopenharmony_ci		if (es->es_lblk > end) {
178062306a36Sopenharmony_ci			ei->i_es_shrink_lblk = end + 1;
178162306a36Sopenharmony_ci			return 0;
178262306a36Sopenharmony_ci		}
178362306a36Sopenharmony_ci
178462306a36Sopenharmony_ci		(*nr_to_scan)--;
178562306a36Sopenharmony_ci		node = rb_next(&es->rb_node);
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_ci		if (ext4_es_must_keep(es))
178862306a36Sopenharmony_ci			goto next;
178962306a36Sopenharmony_ci		if (ext4_es_is_referenced(es)) {
179062306a36Sopenharmony_ci			ext4_es_clear_referenced(es);
179162306a36Sopenharmony_ci			goto next;
179262306a36Sopenharmony_ci		}
179362306a36Sopenharmony_ci
179462306a36Sopenharmony_ci		rb_erase(&es->rb_node, &tree->root);
179562306a36Sopenharmony_ci		ext4_es_free_extent(inode, es);
179662306a36Sopenharmony_ci		(*nr_shrunk)++;
179762306a36Sopenharmony_cinext:
179862306a36Sopenharmony_ci		if (!node)
179962306a36Sopenharmony_ci			goto out_wrap;
180062306a36Sopenharmony_ci		es = rb_entry(node, struct extent_status, rb_node);
180162306a36Sopenharmony_ci	}
180262306a36Sopenharmony_ci	ei->i_es_shrink_lblk = es->es_lblk;
180362306a36Sopenharmony_ci	return 1;
180462306a36Sopenharmony_ciout_wrap:
180562306a36Sopenharmony_ci	ei->i_es_shrink_lblk = 0;
180662306a36Sopenharmony_ci	return 0;
180762306a36Sopenharmony_ci}
180862306a36Sopenharmony_ci
180962306a36Sopenharmony_cistatic int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
181062306a36Sopenharmony_ci{
181162306a36Sopenharmony_ci	struct inode *inode = &ei->vfs_inode;
181262306a36Sopenharmony_ci	int nr_shrunk = 0;
181362306a36Sopenharmony_ci	ext4_lblk_t start = ei->i_es_shrink_lblk;
181462306a36Sopenharmony_ci	static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
181562306a36Sopenharmony_ci				      DEFAULT_RATELIMIT_BURST);
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_ci	if (ei->i_es_shk_nr == 0)
181862306a36Sopenharmony_ci		return 0;
181962306a36Sopenharmony_ci
182062306a36Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
182162306a36Sopenharmony_ci	    __ratelimit(&_rs))
182262306a36Sopenharmony_ci		ext4_warning(inode->i_sb, "forced shrink of precached extents");
182362306a36Sopenharmony_ci
182462306a36Sopenharmony_ci	if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
182562306a36Sopenharmony_ci	    start != 0)
182662306a36Sopenharmony_ci		es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
182762306a36Sopenharmony_ci
182862306a36Sopenharmony_ci	ei->i_es_tree.cache_es = NULL;
182962306a36Sopenharmony_ci	return nr_shrunk;
183062306a36Sopenharmony_ci}
183162306a36Sopenharmony_ci
183262306a36Sopenharmony_ci/*
183362306a36Sopenharmony_ci * Called to support EXT4_IOC_CLEAR_ES_CACHE.  We can only remove
183462306a36Sopenharmony_ci * discretionary entries from the extent status cache.  (Some entries
183562306a36Sopenharmony_ci * must be present for proper operations.)
183662306a36Sopenharmony_ci */
183762306a36Sopenharmony_civoid ext4_clear_inode_es(struct inode *inode)
183862306a36Sopenharmony_ci{
183962306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
184062306a36Sopenharmony_ci	struct extent_status *es;
184162306a36Sopenharmony_ci	struct ext4_es_tree *tree;
184262306a36Sopenharmony_ci	struct rb_node *node;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci	write_lock(&ei->i_es_lock);
184562306a36Sopenharmony_ci	tree = &EXT4_I(inode)->i_es_tree;
184662306a36Sopenharmony_ci	tree->cache_es = NULL;
184762306a36Sopenharmony_ci	node = rb_first(&tree->root);
184862306a36Sopenharmony_ci	while (node) {
184962306a36Sopenharmony_ci		es = rb_entry(node, struct extent_status, rb_node);
185062306a36Sopenharmony_ci		node = rb_next(node);
185162306a36Sopenharmony_ci		if (!ext4_es_must_keep(es)) {
185262306a36Sopenharmony_ci			rb_erase(&es->rb_node, &tree->root);
185362306a36Sopenharmony_ci			ext4_es_free_extent(inode, es);
185462306a36Sopenharmony_ci		}
185562306a36Sopenharmony_ci	}
185662306a36Sopenharmony_ci	ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
185762306a36Sopenharmony_ci	write_unlock(&ei->i_es_lock);
185862306a36Sopenharmony_ci}
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci#ifdef ES_DEBUG__
186162306a36Sopenharmony_cistatic void ext4_print_pending_tree(struct inode *inode)
186262306a36Sopenharmony_ci{
186362306a36Sopenharmony_ci	struct ext4_pending_tree *tree;
186462306a36Sopenharmony_ci	struct rb_node *node;
186562306a36Sopenharmony_ci	struct pending_reservation *pr;
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_ci	printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
186862306a36Sopenharmony_ci	tree = &EXT4_I(inode)->i_pending_tree;
186962306a36Sopenharmony_ci	node = rb_first(&tree->root);
187062306a36Sopenharmony_ci	while (node) {
187162306a36Sopenharmony_ci		pr = rb_entry(node, struct pending_reservation, rb_node);
187262306a36Sopenharmony_ci		printk(KERN_DEBUG " %u", pr->lclu);
187362306a36Sopenharmony_ci		node = rb_next(node);
187462306a36Sopenharmony_ci	}
187562306a36Sopenharmony_ci	printk(KERN_DEBUG "\n");
187662306a36Sopenharmony_ci}
187762306a36Sopenharmony_ci#else
187862306a36Sopenharmony_ci#define ext4_print_pending_tree(inode)
187962306a36Sopenharmony_ci#endif
188062306a36Sopenharmony_ci
188162306a36Sopenharmony_ciint __init ext4_init_pending(void)
188262306a36Sopenharmony_ci{
188362306a36Sopenharmony_ci	ext4_pending_cachep = KMEM_CACHE(pending_reservation, SLAB_RECLAIM_ACCOUNT);
188462306a36Sopenharmony_ci	if (ext4_pending_cachep == NULL)
188562306a36Sopenharmony_ci		return -ENOMEM;
188662306a36Sopenharmony_ci	return 0;
188762306a36Sopenharmony_ci}
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_civoid ext4_exit_pending(void)
189062306a36Sopenharmony_ci{
189162306a36Sopenharmony_ci	kmem_cache_destroy(ext4_pending_cachep);
189262306a36Sopenharmony_ci}
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_civoid ext4_init_pending_tree(struct ext4_pending_tree *tree)
189562306a36Sopenharmony_ci{
189662306a36Sopenharmony_ci	tree->root = RB_ROOT;
189762306a36Sopenharmony_ci}
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci/*
190062306a36Sopenharmony_ci * __get_pending - retrieve a pointer to a pending reservation
190162306a36Sopenharmony_ci *
190262306a36Sopenharmony_ci * @inode - file containing the pending cluster reservation
190362306a36Sopenharmony_ci * @lclu - logical cluster of interest
190462306a36Sopenharmony_ci *
190562306a36Sopenharmony_ci * Returns a pointer to a pending reservation if it's a member of
190662306a36Sopenharmony_ci * the set, and NULL if not.  Must be called holding i_es_lock.
190762306a36Sopenharmony_ci */
190862306a36Sopenharmony_cistatic struct pending_reservation *__get_pending(struct inode *inode,
190962306a36Sopenharmony_ci						 ext4_lblk_t lclu)
191062306a36Sopenharmony_ci{
191162306a36Sopenharmony_ci	struct ext4_pending_tree *tree;
191262306a36Sopenharmony_ci	struct rb_node *node;
191362306a36Sopenharmony_ci	struct pending_reservation *pr = NULL;
191462306a36Sopenharmony_ci
191562306a36Sopenharmony_ci	tree = &EXT4_I(inode)->i_pending_tree;
191662306a36Sopenharmony_ci	node = (&tree->root)->rb_node;
191762306a36Sopenharmony_ci
191862306a36Sopenharmony_ci	while (node) {
191962306a36Sopenharmony_ci		pr = rb_entry(node, struct pending_reservation, rb_node);
192062306a36Sopenharmony_ci		if (lclu < pr->lclu)
192162306a36Sopenharmony_ci			node = node->rb_left;
192262306a36Sopenharmony_ci		else if (lclu > pr->lclu)
192362306a36Sopenharmony_ci			node = node->rb_right;
192462306a36Sopenharmony_ci		else if (lclu == pr->lclu)
192562306a36Sopenharmony_ci			return pr;
192662306a36Sopenharmony_ci	}
192762306a36Sopenharmony_ci	return NULL;
192862306a36Sopenharmony_ci}
192962306a36Sopenharmony_ci
193062306a36Sopenharmony_ci/*
193162306a36Sopenharmony_ci * __insert_pending - adds a pending cluster reservation to the set of
193262306a36Sopenharmony_ci *                    pending reservations
193362306a36Sopenharmony_ci *
193462306a36Sopenharmony_ci * @inode - file containing the cluster
193562306a36Sopenharmony_ci * @lblk - logical block in the cluster to be added
193662306a36Sopenharmony_ci * @prealloc - preallocated pending entry
193762306a36Sopenharmony_ci *
193862306a36Sopenharmony_ci * Returns 0 on successful insertion and -ENOMEM on failure.  If the
193962306a36Sopenharmony_ci * pending reservation is already in the set, returns successfully.
194062306a36Sopenharmony_ci */
194162306a36Sopenharmony_cistatic int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
194262306a36Sopenharmony_ci			    struct pending_reservation **prealloc)
194362306a36Sopenharmony_ci{
194462306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
194562306a36Sopenharmony_ci	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
194662306a36Sopenharmony_ci	struct rb_node **p = &tree->root.rb_node;
194762306a36Sopenharmony_ci	struct rb_node *parent = NULL;
194862306a36Sopenharmony_ci	struct pending_reservation *pr;
194962306a36Sopenharmony_ci	ext4_lblk_t lclu;
195062306a36Sopenharmony_ci	int ret = 0;
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_ci	lclu = EXT4_B2C(sbi, lblk);
195362306a36Sopenharmony_ci	/* search to find parent for insertion */
195462306a36Sopenharmony_ci	while (*p) {
195562306a36Sopenharmony_ci		parent = *p;
195662306a36Sopenharmony_ci		pr = rb_entry(parent, struct pending_reservation, rb_node);
195762306a36Sopenharmony_ci
195862306a36Sopenharmony_ci		if (lclu < pr->lclu) {
195962306a36Sopenharmony_ci			p = &(*p)->rb_left;
196062306a36Sopenharmony_ci		} else if (lclu > pr->lclu) {
196162306a36Sopenharmony_ci			p = &(*p)->rb_right;
196262306a36Sopenharmony_ci		} else {
196362306a36Sopenharmony_ci			/* pending reservation already inserted */
196462306a36Sopenharmony_ci			goto out;
196562306a36Sopenharmony_ci		}
196662306a36Sopenharmony_ci	}
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	if (likely(*prealloc == NULL)) {
196962306a36Sopenharmony_ci		pr = __alloc_pending(false);
197062306a36Sopenharmony_ci		if (!pr) {
197162306a36Sopenharmony_ci			ret = -ENOMEM;
197262306a36Sopenharmony_ci			goto out;
197362306a36Sopenharmony_ci		}
197462306a36Sopenharmony_ci	} else {
197562306a36Sopenharmony_ci		pr = *prealloc;
197662306a36Sopenharmony_ci		*prealloc = NULL;
197762306a36Sopenharmony_ci	}
197862306a36Sopenharmony_ci	pr->lclu = lclu;
197962306a36Sopenharmony_ci
198062306a36Sopenharmony_ci	rb_link_node(&pr->rb_node, parent, p);
198162306a36Sopenharmony_ci	rb_insert_color(&pr->rb_node, &tree->root);
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ciout:
198462306a36Sopenharmony_ci	return ret;
198562306a36Sopenharmony_ci}
198662306a36Sopenharmony_ci
198762306a36Sopenharmony_ci/*
198862306a36Sopenharmony_ci * __remove_pending - removes a pending cluster reservation from the set
198962306a36Sopenharmony_ci *                    of pending reservations
199062306a36Sopenharmony_ci *
199162306a36Sopenharmony_ci * @inode - file containing the cluster
199262306a36Sopenharmony_ci * @lblk - logical block in the pending cluster reservation to be removed
199362306a36Sopenharmony_ci *
199462306a36Sopenharmony_ci * Returns successfully if pending reservation is not a member of the set.
199562306a36Sopenharmony_ci */
199662306a36Sopenharmony_cistatic void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
199762306a36Sopenharmony_ci{
199862306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
199962306a36Sopenharmony_ci	struct pending_reservation *pr;
200062306a36Sopenharmony_ci	struct ext4_pending_tree *tree;
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci	pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
200362306a36Sopenharmony_ci	if (pr != NULL) {
200462306a36Sopenharmony_ci		tree = &EXT4_I(inode)->i_pending_tree;
200562306a36Sopenharmony_ci		rb_erase(&pr->rb_node, &tree->root);
200662306a36Sopenharmony_ci		__free_pending(pr);
200762306a36Sopenharmony_ci	}
200862306a36Sopenharmony_ci}
200962306a36Sopenharmony_ci
201062306a36Sopenharmony_ci/*
201162306a36Sopenharmony_ci * ext4_remove_pending - removes a pending cluster reservation from the set
201262306a36Sopenharmony_ci *                       of pending reservations
201362306a36Sopenharmony_ci *
201462306a36Sopenharmony_ci * @inode - file containing the cluster
201562306a36Sopenharmony_ci * @lblk - logical block in the pending cluster reservation to be removed
201662306a36Sopenharmony_ci *
201762306a36Sopenharmony_ci * Locking for external use of __remove_pending.
201862306a36Sopenharmony_ci */
201962306a36Sopenharmony_civoid ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
202062306a36Sopenharmony_ci{
202162306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
202262306a36Sopenharmony_ci
202362306a36Sopenharmony_ci	write_lock(&ei->i_es_lock);
202462306a36Sopenharmony_ci	__remove_pending(inode, lblk);
202562306a36Sopenharmony_ci	write_unlock(&ei->i_es_lock);
202662306a36Sopenharmony_ci}
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci/*
202962306a36Sopenharmony_ci * ext4_is_pending - determine whether a cluster has a pending reservation
203062306a36Sopenharmony_ci *                   on it
203162306a36Sopenharmony_ci *
203262306a36Sopenharmony_ci * @inode - file containing the cluster
203362306a36Sopenharmony_ci * @lblk - logical block in the cluster
203462306a36Sopenharmony_ci *
203562306a36Sopenharmony_ci * Returns true if there's a pending reservation for the cluster in the
203662306a36Sopenharmony_ci * set of pending reservations, and false if not.
203762306a36Sopenharmony_ci */
203862306a36Sopenharmony_cibool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
203962306a36Sopenharmony_ci{
204062306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
204162306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
204262306a36Sopenharmony_ci	bool ret;
204362306a36Sopenharmony_ci
204462306a36Sopenharmony_ci	read_lock(&ei->i_es_lock);
204562306a36Sopenharmony_ci	ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
204662306a36Sopenharmony_ci	read_unlock(&ei->i_es_lock);
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci	return ret;
204962306a36Sopenharmony_ci}
205062306a36Sopenharmony_ci
205162306a36Sopenharmony_ci/*
205262306a36Sopenharmony_ci * ext4_es_insert_delayed_block - adds a delayed block to the extents status
205362306a36Sopenharmony_ci *                                tree, adding a pending reservation where
205462306a36Sopenharmony_ci *                                needed
205562306a36Sopenharmony_ci *
205662306a36Sopenharmony_ci * @inode - file containing the newly added block
205762306a36Sopenharmony_ci * @lblk - logical block to be added
205862306a36Sopenharmony_ci * @allocated - indicates whether a physical cluster has been allocated for
205962306a36Sopenharmony_ci *              the logical cluster that contains the block
206062306a36Sopenharmony_ci */
206162306a36Sopenharmony_civoid ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
206262306a36Sopenharmony_ci				  bool allocated)
206362306a36Sopenharmony_ci{
206462306a36Sopenharmony_ci	struct extent_status newes;
206562306a36Sopenharmony_ci	int err1 = 0, err2 = 0, err3 = 0;
206662306a36Sopenharmony_ci	struct extent_status *es1 = NULL;
206762306a36Sopenharmony_ci	struct extent_status *es2 = NULL;
206862306a36Sopenharmony_ci	struct pending_reservation *pr = NULL;
206962306a36Sopenharmony_ci
207062306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
207162306a36Sopenharmony_ci		return;
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci	es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
207462306a36Sopenharmony_ci		 lblk, inode->i_ino);
207562306a36Sopenharmony_ci
207662306a36Sopenharmony_ci	newes.es_lblk = lblk;
207762306a36Sopenharmony_ci	newes.es_len = 1;
207862306a36Sopenharmony_ci	ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
207962306a36Sopenharmony_ci	trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci	ext4_es_insert_extent_check(inode, &newes);
208262306a36Sopenharmony_ci
208362306a36Sopenharmony_ciretry:
208462306a36Sopenharmony_ci	if (err1 && !es1)
208562306a36Sopenharmony_ci		es1 = __es_alloc_extent(true);
208662306a36Sopenharmony_ci	if ((err1 || err2) && !es2)
208762306a36Sopenharmony_ci		es2 = __es_alloc_extent(true);
208862306a36Sopenharmony_ci	if ((err1 || err2 || err3) && allocated && !pr)
208962306a36Sopenharmony_ci		pr = __alloc_pending(true);
209062306a36Sopenharmony_ci	write_lock(&EXT4_I(inode)->i_es_lock);
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_ci	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
209362306a36Sopenharmony_ci	if (err1 != 0)
209462306a36Sopenharmony_ci		goto error;
209562306a36Sopenharmony_ci	/* Free preallocated extent if it didn't get used. */
209662306a36Sopenharmony_ci	if (es1) {
209762306a36Sopenharmony_ci		if (!es1->es_len)
209862306a36Sopenharmony_ci			__es_free_extent(es1);
209962306a36Sopenharmony_ci		es1 = NULL;
210062306a36Sopenharmony_ci	}
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci	err2 = __es_insert_extent(inode, &newes, es2);
210362306a36Sopenharmony_ci	if (err2 != 0)
210462306a36Sopenharmony_ci		goto error;
210562306a36Sopenharmony_ci	/* Free preallocated extent if it didn't get used. */
210662306a36Sopenharmony_ci	if (es2) {
210762306a36Sopenharmony_ci		if (!es2->es_len)
210862306a36Sopenharmony_ci			__es_free_extent(es2);
210962306a36Sopenharmony_ci		es2 = NULL;
211062306a36Sopenharmony_ci	}
211162306a36Sopenharmony_ci
211262306a36Sopenharmony_ci	if (allocated) {
211362306a36Sopenharmony_ci		err3 = __insert_pending(inode, lblk, &pr);
211462306a36Sopenharmony_ci		if (err3 != 0)
211562306a36Sopenharmony_ci			goto error;
211662306a36Sopenharmony_ci		if (pr) {
211762306a36Sopenharmony_ci			__free_pending(pr);
211862306a36Sopenharmony_ci			pr = NULL;
211962306a36Sopenharmony_ci		}
212062306a36Sopenharmony_ci	}
212162306a36Sopenharmony_cierror:
212262306a36Sopenharmony_ci	write_unlock(&EXT4_I(inode)->i_es_lock);
212362306a36Sopenharmony_ci	if (err1 || err2 || err3)
212462306a36Sopenharmony_ci		goto retry;
212562306a36Sopenharmony_ci
212662306a36Sopenharmony_ci	ext4_es_print_tree(inode);
212762306a36Sopenharmony_ci	ext4_print_pending_tree(inode);
212862306a36Sopenharmony_ci	return;
212962306a36Sopenharmony_ci}
213062306a36Sopenharmony_ci
213162306a36Sopenharmony_ci/*
213262306a36Sopenharmony_ci * __es_delayed_clu - count number of clusters containing blocks that
213362306a36Sopenharmony_ci *                    are delayed only
213462306a36Sopenharmony_ci *
213562306a36Sopenharmony_ci * @inode - file containing block range
213662306a36Sopenharmony_ci * @start - logical block defining start of range
213762306a36Sopenharmony_ci * @end - logical block defining end of range
213862306a36Sopenharmony_ci *
213962306a36Sopenharmony_ci * Returns the number of clusters containing only delayed (not delayed
214062306a36Sopenharmony_ci * and unwritten) blocks in the range specified by @start and @end.  Any
214162306a36Sopenharmony_ci * cluster or part of a cluster within the range and containing a delayed
214262306a36Sopenharmony_ci * and not unwritten block within the range is counted as a whole cluster.
214362306a36Sopenharmony_ci */
214462306a36Sopenharmony_cistatic unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
214562306a36Sopenharmony_ci				     ext4_lblk_t end)
214662306a36Sopenharmony_ci{
214762306a36Sopenharmony_ci	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
214862306a36Sopenharmony_ci	struct extent_status *es;
214962306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
215062306a36Sopenharmony_ci	struct rb_node *node;
215162306a36Sopenharmony_ci	ext4_lblk_t first_lclu, last_lclu;
215262306a36Sopenharmony_ci	unsigned long long last_counted_lclu;
215362306a36Sopenharmony_ci	unsigned int n = 0;
215462306a36Sopenharmony_ci
215562306a36Sopenharmony_ci	/* guaranteed to be unequal to any ext4_lblk_t value */
215662306a36Sopenharmony_ci	last_counted_lclu = ~0ULL;
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci	es = __es_tree_search(&tree->root, start);
215962306a36Sopenharmony_ci
216062306a36Sopenharmony_ci	while (es && (es->es_lblk <= end)) {
216162306a36Sopenharmony_ci		if (ext4_es_is_delonly(es)) {
216262306a36Sopenharmony_ci			if (es->es_lblk <= start)
216362306a36Sopenharmony_ci				first_lclu = EXT4_B2C(sbi, start);
216462306a36Sopenharmony_ci			else
216562306a36Sopenharmony_ci				first_lclu = EXT4_B2C(sbi, es->es_lblk);
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci			if (ext4_es_end(es) >= end)
216862306a36Sopenharmony_ci				last_lclu = EXT4_B2C(sbi, end);
216962306a36Sopenharmony_ci			else
217062306a36Sopenharmony_ci				last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
217162306a36Sopenharmony_ci
217262306a36Sopenharmony_ci			if (first_lclu == last_counted_lclu)
217362306a36Sopenharmony_ci				n += last_lclu - first_lclu;
217462306a36Sopenharmony_ci			else
217562306a36Sopenharmony_ci				n += last_lclu - first_lclu + 1;
217662306a36Sopenharmony_ci			last_counted_lclu = last_lclu;
217762306a36Sopenharmony_ci		}
217862306a36Sopenharmony_ci		node = rb_next(&es->rb_node);
217962306a36Sopenharmony_ci		if (!node)
218062306a36Sopenharmony_ci			break;
218162306a36Sopenharmony_ci		es = rb_entry(node, struct extent_status, rb_node);
218262306a36Sopenharmony_ci	}
218362306a36Sopenharmony_ci
218462306a36Sopenharmony_ci	return n;
218562306a36Sopenharmony_ci}
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_ci/*
218862306a36Sopenharmony_ci * ext4_es_delayed_clu - count number of clusters containing blocks that
218962306a36Sopenharmony_ci *                       are both delayed and unwritten
219062306a36Sopenharmony_ci *
219162306a36Sopenharmony_ci * @inode - file containing block range
219262306a36Sopenharmony_ci * @lblk - logical block defining start of range
219362306a36Sopenharmony_ci * @len - number of blocks in range
219462306a36Sopenharmony_ci *
219562306a36Sopenharmony_ci * Locking for external use of __es_delayed_clu().
219662306a36Sopenharmony_ci */
219762306a36Sopenharmony_ciunsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
219862306a36Sopenharmony_ci				 ext4_lblk_t len)
219962306a36Sopenharmony_ci{
220062306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
220162306a36Sopenharmony_ci	ext4_lblk_t end;
220262306a36Sopenharmony_ci	unsigned int n;
220362306a36Sopenharmony_ci
220462306a36Sopenharmony_ci	if (len == 0)
220562306a36Sopenharmony_ci		return 0;
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci	end = lblk + len - 1;
220862306a36Sopenharmony_ci	WARN_ON(end < lblk);
220962306a36Sopenharmony_ci
221062306a36Sopenharmony_ci	read_lock(&ei->i_es_lock);
221162306a36Sopenharmony_ci
221262306a36Sopenharmony_ci	n = __es_delayed_clu(inode, lblk, end);
221362306a36Sopenharmony_ci
221462306a36Sopenharmony_ci	read_unlock(&ei->i_es_lock);
221562306a36Sopenharmony_ci
221662306a36Sopenharmony_ci	return n;
221762306a36Sopenharmony_ci}
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_ci/*
222062306a36Sopenharmony_ci * __revise_pending - makes, cancels, or leaves unchanged pending cluster
222162306a36Sopenharmony_ci *                    reservations for a specified block range depending
222262306a36Sopenharmony_ci *                    upon the presence or absence of delayed blocks
222362306a36Sopenharmony_ci *                    outside the range within clusters at the ends of the
222462306a36Sopenharmony_ci *                    range
222562306a36Sopenharmony_ci *
222662306a36Sopenharmony_ci * @inode - file containing the range
222762306a36Sopenharmony_ci * @lblk - logical block defining the start of range
222862306a36Sopenharmony_ci * @len  - length of range in blocks
222962306a36Sopenharmony_ci * @prealloc - preallocated pending entry
223062306a36Sopenharmony_ci *
223162306a36Sopenharmony_ci * Used after a newly allocated extent is added to the extents status tree.
223262306a36Sopenharmony_ci * Requires that the extents in the range have either written or unwritten
223362306a36Sopenharmony_ci * status.  Must be called while holding i_es_lock.
223462306a36Sopenharmony_ci */
223562306a36Sopenharmony_cistatic int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
223662306a36Sopenharmony_ci			    ext4_lblk_t len,
223762306a36Sopenharmony_ci			    struct pending_reservation **prealloc)
223862306a36Sopenharmony_ci{
223962306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
224062306a36Sopenharmony_ci	ext4_lblk_t end = lblk + len - 1;
224162306a36Sopenharmony_ci	ext4_lblk_t first, last;
224262306a36Sopenharmony_ci	bool f_del = false, l_del = false;
224362306a36Sopenharmony_ci	int ret = 0;
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ci	if (len == 0)
224662306a36Sopenharmony_ci		return 0;
224762306a36Sopenharmony_ci
224862306a36Sopenharmony_ci	/*
224962306a36Sopenharmony_ci	 * Two cases - block range within single cluster and block range
225062306a36Sopenharmony_ci	 * spanning two or more clusters.  Note that a cluster belonging
225162306a36Sopenharmony_ci	 * to a range starting and/or ending on a cluster boundary is treated
225262306a36Sopenharmony_ci	 * as if it does not contain a delayed extent.  The new range may
225362306a36Sopenharmony_ci	 * have allocated space for previously delayed blocks out to the
225462306a36Sopenharmony_ci	 * cluster boundary, requiring that any pre-existing pending
225562306a36Sopenharmony_ci	 * reservation be canceled.  Because this code only looks at blocks
225662306a36Sopenharmony_ci	 * outside the range, it should revise pending reservations
225762306a36Sopenharmony_ci	 * correctly even if the extent represented by the range can't be
225862306a36Sopenharmony_ci	 * inserted in the extents status tree due to ENOSPC.
225962306a36Sopenharmony_ci	 */
226062306a36Sopenharmony_ci
226162306a36Sopenharmony_ci	if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
226262306a36Sopenharmony_ci		first = EXT4_LBLK_CMASK(sbi, lblk);
226362306a36Sopenharmony_ci		if (first != lblk)
226462306a36Sopenharmony_ci			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
226562306a36Sopenharmony_ci						first, lblk - 1);
226662306a36Sopenharmony_ci		if (f_del) {
226762306a36Sopenharmony_ci			ret = __insert_pending(inode, first, prealloc);
226862306a36Sopenharmony_ci			if (ret < 0)
226962306a36Sopenharmony_ci				goto out;
227062306a36Sopenharmony_ci		} else {
227162306a36Sopenharmony_ci			last = EXT4_LBLK_CMASK(sbi, end) +
227262306a36Sopenharmony_ci			       sbi->s_cluster_ratio - 1;
227362306a36Sopenharmony_ci			if (last != end)
227462306a36Sopenharmony_ci				l_del = __es_scan_range(inode,
227562306a36Sopenharmony_ci							&ext4_es_is_delonly,
227662306a36Sopenharmony_ci							end + 1, last);
227762306a36Sopenharmony_ci			if (l_del) {
227862306a36Sopenharmony_ci				ret = __insert_pending(inode, last, prealloc);
227962306a36Sopenharmony_ci				if (ret < 0)
228062306a36Sopenharmony_ci					goto out;
228162306a36Sopenharmony_ci			} else
228262306a36Sopenharmony_ci				__remove_pending(inode, last);
228362306a36Sopenharmony_ci		}
228462306a36Sopenharmony_ci	} else {
228562306a36Sopenharmony_ci		first = EXT4_LBLK_CMASK(sbi, lblk);
228662306a36Sopenharmony_ci		if (first != lblk)
228762306a36Sopenharmony_ci			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
228862306a36Sopenharmony_ci						first, lblk - 1);
228962306a36Sopenharmony_ci		if (f_del) {
229062306a36Sopenharmony_ci			ret = __insert_pending(inode, first, prealloc);
229162306a36Sopenharmony_ci			if (ret < 0)
229262306a36Sopenharmony_ci				goto out;
229362306a36Sopenharmony_ci		} else
229462306a36Sopenharmony_ci			__remove_pending(inode, first);
229562306a36Sopenharmony_ci
229662306a36Sopenharmony_ci		last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
229762306a36Sopenharmony_ci		if (last != end)
229862306a36Sopenharmony_ci			l_del = __es_scan_range(inode, &ext4_es_is_delonly,
229962306a36Sopenharmony_ci						end + 1, last);
230062306a36Sopenharmony_ci		if (l_del) {
230162306a36Sopenharmony_ci			ret = __insert_pending(inode, last, prealloc);
230262306a36Sopenharmony_ci			if (ret < 0)
230362306a36Sopenharmony_ci				goto out;
230462306a36Sopenharmony_ci		} else
230562306a36Sopenharmony_ci			__remove_pending(inode, last);
230662306a36Sopenharmony_ci	}
230762306a36Sopenharmony_ciout:
230862306a36Sopenharmony_ci	return ret;
230962306a36Sopenharmony_ci}
2310