162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * fs/ext4/extents_status.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Written by Yongqiang Yang <xiaoqiangnk@gmail.com> 662306a36Sopenharmony_ci * Modified by 762306a36Sopenharmony_ci * Allison Henderson <achender@linux.vnet.ibm.com> 862306a36Sopenharmony_ci * Hugh Dickins <hughd@google.com> 962306a36Sopenharmony_ci * Zheng Liu <wenqing.lz@taobao.com> 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Ext4 extents status tree core functions. 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci#include <linux/list_sort.h> 1462306a36Sopenharmony_ci#include <linux/proc_fs.h> 1562306a36Sopenharmony_ci#include <linux/seq_file.h> 1662306a36Sopenharmony_ci#include "ext4.h" 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <trace/events/ext4.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci/* 2162306a36Sopenharmony_ci * According to previous discussion in Ext4 Developer Workshop, we 2262306a36Sopenharmony_ci * will introduce a new structure called io tree to track all extent 2362306a36Sopenharmony_ci * status in order to solve some problems that we have met 2462306a36Sopenharmony_ci * (e.g. Reservation space warning), and provide extent-level locking. 2562306a36Sopenharmony_ci * Delay extent tree is the first step to achieve this goal. It is 2662306a36Sopenharmony_ci * original built by Yongqiang Yang. At that time it is called delay 2762306a36Sopenharmony_ci * extent tree, whose goal is only track delayed extents in memory to 2862306a36Sopenharmony_ci * simplify the implementation of fiemap and bigalloc, and introduce 2962306a36Sopenharmony_ci * lseek SEEK_DATA/SEEK_HOLE support. That is why it is still called 3062306a36Sopenharmony_ci * delay extent tree at the first commit. But for better understand 3162306a36Sopenharmony_ci * what it does, it has been rename to extent status tree. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * Step1: 3462306a36Sopenharmony_ci * Currently the first step has been done. All delayed extents are 3562306a36Sopenharmony_ci * tracked in the tree. It maintains the delayed extent when a delayed 3662306a36Sopenharmony_ci * allocation is issued, and the delayed extent is written out or 3762306a36Sopenharmony_ci * invalidated. Therefore the implementation of fiemap and bigalloc 3862306a36Sopenharmony_ci * are simplified, and SEEK_DATA/SEEK_HOLE are introduced. 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * The following comment describes the implemenmtation of extent 4162306a36Sopenharmony_ci * status tree and future works. 4262306a36Sopenharmony_ci * 4362306a36Sopenharmony_ci * Step2: 4462306a36Sopenharmony_ci * In this step all extent status are tracked by extent status tree. 4562306a36Sopenharmony_ci * Thus, we can first try to lookup a block mapping in this tree before 4662306a36Sopenharmony_ci * finding it in extent tree. Hence, single extent cache can be removed 4762306a36Sopenharmony_ci * because extent status tree can do a better job. Extents in status 4862306a36Sopenharmony_ci * tree are loaded on-demand. Therefore, the extent status tree may not 4962306a36Sopenharmony_ci * contain all of the extents in a file. Meanwhile we define a shrinker 5062306a36Sopenharmony_ci * to reclaim memory from extent status tree because fragmented extent 5162306a36Sopenharmony_ci * tree will make status tree cost too much memory. written/unwritten/- 5262306a36Sopenharmony_ci * hole extents in the tree will be reclaimed by this shrinker when we 5362306a36Sopenharmony_ci * are under high memory pressure. Delayed extents will not be 5462306a36Sopenharmony_ci * reclimed because fiemap, bigalloc, and seek_data/hole need it. 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci/* 5862306a36Sopenharmony_ci * Extent status tree implementation for ext4. 5962306a36Sopenharmony_ci * 6062306a36Sopenharmony_ci * 6162306a36Sopenharmony_ci * ========================================================================== 6262306a36Sopenharmony_ci * Extent status tree tracks all extent status. 6362306a36Sopenharmony_ci * 6462306a36Sopenharmony_ci * 1. Why we need to implement extent status tree? 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * Without extent status tree, ext4 identifies a delayed extent by looking 6762306a36Sopenharmony_ci * up page cache, this has several deficiencies - complicated, buggy, 6862306a36Sopenharmony_ci * and inefficient code. 6962306a36Sopenharmony_ci * 7062306a36Sopenharmony_ci * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a 7162306a36Sopenharmony_ci * block or a range of blocks are belonged to a delayed extent. 7262306a36Sopenharmony_ci * 7362306a36Sopenharmony_ci * Let us have a look at how they do without extent status tree. 7462306a36Sopenharmony_ci * -- FIEMAP 7562306a36Sopenharmony_ci * FIEMAP looks up page cache to identify delayed allocations from holes. 7662306a36Sopenharmony_ci * 7762306a36Sopenharmony_ci * -- SEEK_HOLE/DATA 7862306a36Sopenharmony_ci * SEEK_HOLE/DATA has the same problem as FIEMAP. 7962306a36Sopenharmony_ci * 8062306a36Sopenharmony_ci * -- bigalloc 8162306a36Sopenharmony_ci * bigalloc looks up page cache to figure out if a block is 8262306a36Sopenharmony_ci * already under delayed allocation or not to determine whether 8362306a36Sopenharmony_ci * quota reserving is needed for the cluster. 8462306a36Sopenharmony_ci * 8562306a36Sopenharmony_ci * -- writeout 8662306a36Sopenharmony_ci * Writeout looks up whole page cache to see if a buffer is 8762306a36Sopenharmony_ci * mapped, If there are not very many delayed buffers, then it is 8862306a36Sopenharmony_ci * time consuming. 8962306a36Sopenharmony_ci * 9062306a36Sopenharmony_ci * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA, 9162306a36Sopenharmony_ci * bigalloc and writeout can figure out if a block or a range of 9262306a36Sopenharmony_ci * blocks is under delayed allocation(belonged to a delayed extent) or 9362306a36Sopenharmony_ci * not by searching the extent tree. 9462306a36Sopenharmony_ci * 9562306a36Sopenharmony_ci * 9662306a36Sopenharmony_ci * ========================================================================== 9762306a36Sopenharmony_ci * 2. Ext4 extent status tree impelmentation 9862306a36Sopenharmony_ci * 9962306a36Sopenharmony_ci * -- extent 10062306a36Sopenharmony_ci * A extent is a range of blocks which are contiguous logically and 10162306a36Sopenharmony_ci * physically. Unlike extent in extent tree, this extent in ext4 is 10262306a36Sopenharmony_ci * a in-memory struct, there is no corresponding on-disk data. There 10362306a36Sopenharmony_ci * is no limit on length of extent, so an extent can contain as many 10462306a36Sopenharmony_ci * blocks as they are contiguous logically and physically. 10562306a36Sopenharmony_ci * 10662306a36Sopenharmony_ci * -- extent status tree 10762306a36Sopenharmony_ci * Every inode has an extent status tree and all allocation blocks 10862306a36Sopenharmony_ci * are added to the tree with different status. The extent in the 10962306a36Sopenharmony_ci * tree are ordered by logical block no. 11062306a36Sopenharmony_ci * 11162306a36Sopenharmony_ci * -- operations on a extent status tree 11262306a36Sopenharmony_ci * There are three important operations on a delayed extent tree: find 11362306a36Sopenharmony_ci * next extent, adding a extent(a range of blocks) and removing a extent. 11462306a36Sopenharmony_ci * 11562306a36Sopenharmony_ci * -- race on a extent status tree 11662306a36Sopenharmony_ci * Extent status tree is protected by inode->i_es_lock. 11762306a36Sopenharmony_ci * 11862306a36Sopenharmony_ci * -- memory consumption 11962306a36Sopenharmony_ci * Fragmented extent tree will make extent status tree cost too much 12062306a36Sopenharmony_ci * memory. Hence, we will reclaim written/unwritten/hole extents from 12162306a36Sopenharmony_ci * the tree under a heavy memory pressure. 12262306a36Sopenharmony_ci * 12362306a36Sopenharmony_ci * 12462306a36Sopenharmony_ci * ========================================================================== 12562306a36Sopenharmony_ci * 3. Performance analysis 12662306a36Sopenharmony_ci * 12762306a36Sopenharmony_ci * -- overhead 12862306a36Sopenharmony_ci * 1. There is a cache extent for write access, so if writes are 12962306a36Sopenharmony_ci * not very random, adding space operaions are in O(1) time. 13062306a36Sopenharmony_ci * 13162306a36Sopenharmony_ci * -- gain 13262306a36Sopenharmony_ci * 2. Code is much simpler, more readable, more maintainable and 13362306a36Sopenharmony_ci * more efficient. 13462306a36Sopenharmony_ci * 13562306a36Sopenharmony_ci * 13662306a36Sopenharmony_ci * ========================================================================== 13762306a36Sopenharmony_ci * 4. TODO list 13862306a36Sopenharmony_ci * 13962306a36Sopenharmony_ci * -- Refactor delayed space reservation 14062306a36Sopenharmony_ci * 14162306a36Sopenharmony_ci * -- Extent-level locking 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_cistatic struct kmem_cache *ext4_es_cachep; 14562306a36Sopenharmony_cistatic struct kmem_cache *ext4_pending_cachep; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic int __es_insert_extent(struct inode *inode, struct extent_status *newes, 14862306a36Sopenharmony_ci struct extent_status *prealloc); 14962306a36Sopenharmony_cistatic int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 15062306a36Sopenharmony_ci ext4_lblk_t end, int *reserved, 15162306a36Sopenharmony_ci struct extent_status *prealloc); 15262306a36Sopenharmony_cistatic int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); 15362306a36Sopenharmony_cistatic int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 15462306a36Sopenharmony_ci struct ext4_inode_info *locked_ei); 15562306a36Sopenharmony_cistatic int __revise_pending(struct inode *inode, ext4_lblk_t lblk, 15662306a36Sopenharmony_ci ext4_lblk_t len, 15762306a36Sopenharmony_ci struct pending_reservation **prealloc); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ciint __init ext4_init_es(void) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT); 16262306a36Sopenharmony_ci if (ext4_es_cachep == NULL) 16362306a36Sopenharmony_ci return -ENOMEM; 16462306a36Sopenharmony_ci return 0; 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_civoid ext4_exit_es(void) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci kmem_cache_destroy(ext4_es_cachep); 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_civoid ext4_es_init_tree(struct ext4_es_tree *tree) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci tree->root = RB_ROOT; 17562306a36Sopenharmony_ci tree->cache_es = NULL; 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci#ifdef ES_DEBUG__ 17962306a36Sopenharmony_cistatic void ext4_es_print_tree(struct inode *inode) 18062306a36Sopenharmony_ci{ 18162306a36Sopenharmony_ci struct ext4_es_tree *tree; 18262306a36Sopenharmony_ci struct rb_node *node; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino); 18562306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_es_tree; 18662306a36Sopenharmony_ci node = rb_first(&tree->root); 18762306a36Sopenharmony_ci while (node) { 18862306a36Sopenharmony_ci struct extent_status *es; 18962306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 19062306a36Sopenharmony_ci printk(KERN_DEBUG " [%u/%u) %llu %x", 19162306a36Sopenharmony_ci es->es_lblk, es->es_len, 19262306a36Sopenharmony_ci ext4_es_pblock(es), ext4_es_status(es)); 19362306a36Sopenharmony_ci node = rb_next(node); 19462306a36Sopenharmony_ci } 19562306a36Sopenharmony_ci printk(KERN_DEBUG "\n"); 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ci#else 19862306a36Sopenharmony_ci#define ext4_es_print_tree(inode) 19962306a36Sopenharmony_ci#endif 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_cistatic inline ext4_lblk_t ext4_es_end(struct extent_status *es) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci BUG_ON(es->es_lblk + es->es_len < es->es_lblk); 20462306a36Sopenharmony_ci return es->es_lblk + es->es_len - 1; 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci/* 20862306a36Sopenharmony_ci * search through the tree for an delayed extent with a given offset. If 20962306a36Sopenharmony_ci * it can't be found, try to find next extent. 21062306a36Sopenharmony_ci */ 21162306a36Sopenharmony_cistatic struct extent_status *__es_tree_search(struct rb_root *root, 21262306a36Sopenharmony_ci ext4_lblk_t lblk) 21362306a36Sopenharmony_ci{ 21462306a36Sopenharmony_ci struct rb_node *node = root->rb_node; 21562306a36Sopenharmony_ci struct extent_status *es = NULL; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci while (node) { 21862306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 21962306a36Sopenharmony_ci if (lblk < es->es_lblk) 22062306a36Sopenharmony_ci node = node->rb_left; 22162306a36Sopenharmony_ci else if (lblk > ext4_es_end(es)) 22262306a36Sopenharmony_ci node = node->rb_right; 22362306a36Sopenharmony_ci else 22462306a36Sopenharmony_ci return es; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci if (es && lblk < es->es_lblk) 22862306a36Sopenharmony_ci return es; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci if (es && lblk > ext4_es_end(es)) { 23162306a36Sopenharmony_ci node = rb_next(&es->rb_node); 23262306a36Sopenharmony_ci return node ? rb_entry(node, struct extent_status, rb_node) : 23362306a36Sopenharmony_ci NULL; 23462306a36Sopenharmony_ci } 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return NULL; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * ext4_es_find_extent_range - find extent with specified status within block 24162306a36Sopenharmony_ci * range or next extent following block range in 24262306a36Sopenharmony_ci * extents status tree 24362306a36Sopenharmony_ci * 24462306a36Sopenharmony_ci * @inode - file containing the range 24562306a36Sopenharmony_ci * @matching_fn - pointer to function that matches extents with desired status 24662306a36Sopenharmony_ci * @lblk - logical block defining start of range 24762306a36Sopenharmony_ci * @end - logical block defining end of range 24862306a36Sopenharmony_ci * @es - extent found, if any 24962306a36Sopenharmony_ci * 25062306a36Sopenharmony_ci * Find the first extent within the block range specified by @lblk and @end 25162306a36Sopenharmony_ci * in the extents status tree that satisfies @matching_fn. If a match 25262306a36Sopenharmony_ci * is found, it's returned in @es. If not, and a matching extent is found 25362306a36Sopenharmony_ci * beyond the block range, it's returned in @es. If no match is found, an 25462306a36Sopenharmony_ci * extent is returned in @es whose es_lblk, es_len, and es_pblk components 25562306a36Sopenharmony_ci * are 0. 25662306a36Sopenharmony_ci */ 25762306a36Sopenharmony_cistatic void __es_find_extent_range(struct inode *inode, 25862306a36Sopenharmony_ci int (*matching_fn)(struct extent_status *es), 25962306a36Sopenharmony_ci ext4_lblk_t lblk, ext4_lblk_t end, 26062306a36Sopenharmony_ci struct extent_status *es) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci struct ext4_es_tree *tree = NULL; 26362306a36Sopenharmony_ci struct extent_status *es1 = NULL; 26462306a36Sopenharmony_ci struct rb_node *node; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci WARN_ON(es == NULL); 26762306a36Sopenharmony_ci WARN_ON(end < lblk); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_es_tree; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci /* see if the extent has been cached */ 27262306a36Sopenharmony_ci es->es_lblk = es->es_len = es->es_pblk = 0; 27362306a36Sopenharmony_ci es1 = READ_ONCE(tree->cache_es); 27462306a36Sopenharmony_ci if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) { 27562306a36Sopenharmony_ci es_debug("%u cached by [%u/%u) %llu %x\n", 27662306a36Sopenharmony_ci lblk, es1->es_lblk, es1->es_len, 27762306a36Sopenharmony_ci ext4_es_pblock(es1), ext4_es_status(es1)); 27862306a36Sopenharmony_ci goto out; 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci es1 = __es_tree_search(&tree->root, lblk); 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ciout: 28462306a36Sopenharmony_ci if (es1 && !matching_fn(es1)) { 28562306a36Sopenharmony_ci while ((node = rb_next(&es1->rb_node)) != NULL) { 28662306a36Sopenharmony_ci es1 = rb_entry(node, struct extent_status, rb_node); 28762306a36Sopenharmony_ci if (es1->es_lblk > end) { 28862306a36Sopenharmony_ci es1 = NULL; 28962306a36Sopenharmony_ci break; 29062306a36Sopenharmony_ci } 29162306a36Sopenharmony_ci if (matching_fn(es1)) 29262306a36Sopenharmony_ci break; 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci if (es1 && matching_fn(es1)) { 29762306a36Sopenharmony_ci WRITE_ONCE(tree->cache_es, es1); 29862306a36Sopenharmony_ci es->es_lblk = es1->es_lblk; 29962306a36Sopenharmony_ci es->es_len = es1->es_len; 30062306a36Sopenharmony_ci es->es_pblk = es1->es_pblk; 30162306a36Sopenharmony_ci } 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci/* 30662306a36Sopenharmony_ci * Locking for __es_find_extent_range() for external use 30762306a36Sopenharmony_ci */ 30862306a36Sopenharmony_civoid ext4_es_find_extent_range(struct inode *inode, 30962306a36Sopenharmony_ci int (*matching_fn)(struct extent_status *es), 31062306a36Sopenharmony_ci ext4_lblk_t lblk, ext4_lblk_t end, 31162306a36Sopenharmony_ci struct extent_status *es) 31262306a36Sopenharmony_ci{ 31362306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 31462306a36Sopenharmony_ci return; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci trace_ext4_es_find_extent_range_enter(inode, lblk); 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci read_lock(&EXT4_I(inode)->i_es_lock); 31962306a36Sopenharmony_ci __es_find_extent_range(inode, matching_fn, lblk, end, es); 32062306a36Sopenharmony_ci read_unlock(&EXT4_I(inode)->i_es_lock); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci trace_ext4_es_find_extent_range_exit(inode, es); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/* 32662306a36Sopenharmony_ci * __es_scan_range - search block range for block with specified status 32762306a36Sopenharmony_ci * in extents status tree 32862306a36Sopenharmony_ci * 32962306a36Sopenharmony_ci * @inode - file containing the range 33062306a36Sopenharmony_ci * @matching_fn - pointer to function that matches extents with desired status 33162306a36Sopenharmony_ci * @lblk - logical block defining start of range 33262306a36Sopenharmony_ci * @end - logical block defining end of range 33362306a36Sopenharmony_ci * 33462306a36Sopenharmony_ci * Returns true if at least one block in the specified block range satisfies 33562306a36Sopenharmony_ci * the criterion specified by @matching_fn, and false if not. If at least 33662306a36Sopenharmony_ci * one extent has the specified status, then there is at least one block 33762306a36Sopenharmony_ci * in the cluster with that status. Should only be called by code that has 33862306a36Sopenharmony_ci * taken i_es_lock. 33962306a36Sopenharmony_ci */ 34062306a36Sopenharmony_cistatic bool __es_scan_range(struct inode *inode, 34162306a36Sopenharmony_ci int (*matching_fn)(struct extent_status *es), 34262306a36Sopenharmony_ci ext4_lblk_t start, ext4_lblk_t end) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci struct extent_status es; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci __es_find_extent_range(inode, matching_fn, start, end, &es); 34762306a36Sopenharmony_ci if (es.es_len == 0) 34862306a36Sopenharmony_ci return false; /* no matching extent in the tree */ 34962306a36Sopenharmony_ci else if (es.es_lblk <= start && 35062306a36Sopenharmony_ci start < es.es_lblk + es.es_len) 35162306a36Sopenharmony_ci return true; 35262306a36Sopenharmony_ci else if (start <= es.es_lblk && es.es_lblk <= end) 35362306a36Sopenharmony_ci return true; 35462306a36Sopenharmony_ci else 35562306a36Sopenharmony_ci return false; 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci/* 35862306a36Sopenharmony_ci * Locking for __es_scan_range() for external use 35962306a36Sopenharmony_ci */ 36062306a36Sopenharmony_cibool ext4_es_scan_range(struct inode *inode, 36162306a36Sopenharmony_ci int (*matching_fn)(struct extent_status *es), 36262306a36Sopenharmony_ci ext4_lblk_t lblk, ext4_lblk_t end) 36362306a36Sopenharmony_ci{ 36462306a36Sopenharmony_ci bool ret; 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 36762306a36Sopenharmony_ci return false; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci read_lock(&EXT4_I(inode)->i_es_lock); 37062306a36Sopenharmony_ci ret = __es_scan_range(inode, matching_fn, lblk, end); 37162306a36Sopenharmony_ci read_unlock(&EXT4_I(inode)->i_es_lock); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci return ret; 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci/* 37762306a36Sopenharmony_ci * __es_scan_clu - search cluster for block with specified status in 37862306a36Sopenharmony_ci * extents status tree 37962306a36Sopenharmony_ci * 38062306a36Sopenharmony_ci * @inode - file containing the cluster 38162306a36Sopenharmony_ci * @matching_fn - pointer to function that matches extents with desired status 38262306a36Sopenharmony_ci * @lblk - logical block in cluster to be searched 38362306a36Sopenharmony_ci * 38462306a36Sopenharmony_ci * Returns true if at least one extent in the cluster containing @lblk 38562306a36Sopenharmony_ci * satisfies the criterion specified by @matching_fn, and false if not. If at 38662306a36Sopenharmony_ci * least one extent has the specified status, then there is at least one block 38762306a36Sopenharmony_ci * in the cluster with that status. Should only be called by code that has 38862306a36Sopenharmony_ci * taken i_es_lock. 38962306a36Sopenharmony_ci */ 39062306a36Sopenharmony_cistatic bool __es_scan_clu(struct inode *inode, 39162306a36Sopenharmony_ci int (*matching_fn)(struct extent_status *es), 39262306a36Sopenharmony_ci ext4_lblk_t lblk) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 39562306a36Sopenharmony_ci ext4_lblk_t lblk_start, lblk_end; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci lblk_start = EXT4_LBLK_CMASK(sbi, lblk); 39862306a36Sopenharmony_ci lblk_end = lblk_start + sbi->s_cluster_ratio - 1; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci return __es_scan_range(inode, matching_fn, lblk_start, lblk_end); 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci/* 40462306a36Sopenharmony_ci * Locking for __es_scan_clu() for external use 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_cibool ext4_es_scan_clu(struct inode *inode, 40762306a36Sopenharmony_ci int (*matching_fn)(struct extent_status *es), 40862306a36Sopenharmony_ci ext4_lblk_t lblk) 40962306a36Sopenharmony_ci{ 41062306a36Sopenharmony_ci bool ret; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 41362306a36Sopenharmony_ci return false; 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci read_lock(&EXT4_I(inode)->i_es_lock); 41662306a36Sopenharmony_ci ret = __es_scan_clu(inode, matching_fn, lblk); 41762306a36Sopenharmony_ci read_unlock(&EXT4_I(inode)->i_es_lock); 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci return ret; 42062306a36Sopenharmony_ci} 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_cistatic void ext4_es_list_add(struct inode *inode) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 42562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci if (!list_empty(&ei->i_es_list)) 42862306a36Sopenharmony_ci return; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci spin_lock(&sbi->s_es_lock); 43162306a36Sopenharmony_ci if (list_empty(&ei->i_es_list)) { 43262306a36Sopenharmony_ci list_add_tail(&ei->i_es_list, &sbi->s_es_list); 43362306a36Sopenharmony_ci sbi->s_es_nr_inode++; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci spin_unlock(&sbi->s_es_lock); 43662306a36Sopenharmony_ci} 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_cistatic void ext4_es_list_del(struct inode *inode) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 44162306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci spin_lock(&sbi->s_es_lock); 44462306a36Sopenharmony_ci if (!list_empty(&ei->i_es_list)) { 44562306a36Sopenharmony_ci list_del_init(&ei->i_es_list); 44662306a36Sopenharmony_ci sbi->s_es_nr_inode--; 44762306a36Sopenharmony_ci WARN_ON_ONCE(sbi->s_es_nr_inode < 0); 44862306a36Sopenharmony_ci } 44962306a36Sopenharmony_ci spin_unlock(&sbi->s_es_lock); 45062306a36Sopenharmony_ci} 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_cistatic inline struct pending_reservation *__alloc_pending(bool nofail) 45362306a36Sopenharmony_ci{ 45462306a36Sopenharmony_ci if (!nofail) 45562306a36Sopenharmony_ci return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL); 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_cistatic inline void __free_pending(struct pending_reservation *pr) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci kmem_cache_free(ext4_pending_cachep, pr); 46362306a36Sopenharmony_ci} 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci/* 46662306a36Sopenharmony_ci * Returns true if we cannot fail to allocate memory for this extent_status 46762306a36Sopenharmony_ci * entry and cannot reclaim it until its status changes. 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_cistatic inline bool ext4_es_must_keep(struct extent_status *es) 47062306a36Sopenharmony_ci{ 47162306a36Sopenharmony_ci /* fiemap, bigalloc, and seek_data/hole need to use it. */ 47262306a36Sopenharmony_ci if (ext4_es_is_delayed(es)) 47362306a36Sopenharmony_ci return true; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci return false; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic inline struct extent_status *__es_alloc_extent(bool nofail) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci if (!nofail) 48162306a36Sopenharmony_ci return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL); 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_cistatic void ext4_es_init_extent(struct inode *inode, struct extent_status *es, 48762306a36Sopenharmony_ci ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci es->es_lblk = lblk; 49062306a36Sopenharmony_ci es->es_len = len; 49162306a36Sopenharmony_ci es->es_pblk = pblk; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci /* We never try to reclaim a must kept extent, so we don't count it. */ 49462306a36Sopenharmony_ci if (!ext4_es_must_keep(es)) { 49562306a36Sopenharmony_ci if (!EXT4_I(inode)->i_es_shk_nr++) 49662306a36Sopenharmony_ci ext4_es_list_add(inode); 49762306a36Sopenharmony_ci percpu_counter_inc(&EXT4_SB(inode->i_sb)-> 49862306a36Sopenharmony_ci s_es_stats.es_stats_shk_cnt); 49962306a36Sopenharmony_ci } 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci EXT4_I(inode)->i_es_all_nr++; 50262306a36Sopenharmony_ci percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); 50362306a36Sopenharmony_ci} 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_cistatic inline void __es_free_extent(struct extent_status *es) 50662306a36Sopenharmony_ci{ 50762306a36Sopenharmony_ci kmem_cache_free(ext4_es_cachep, es); 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_cistatic void ext4_es_free_extent(struct inode *inode, struct extent_status *es) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci EXT4_I(inode)->i_es_all_nr--; 51362306a36Sopenharmony_ci percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci /* Decrease the shrink counter when we can reclaim the extent. */ 51662306a36Sopenharmony_ci if (!ext4_es_must_keep(es)) { 51762306a36Sopenharmony_ci BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); 51862306a36Sopenharmony_ci if (!--EXT4_I(inode)->i_es_shk_nr) 51962306a36Sopenharmony_ci ext4_es_list_del(inode); 52062306a36Sopenharmony_ci percpu_counter_dec(&EXT4_SB(inode->i_sb)-> 52162306a36Sopenharmony_ci s_es_stats.es_stats_shk_cnt); 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci __es_free_extent(es); 52562306a36Sopenharmony_ci} 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci/* 52862306a36Sopenharmony_ci * Check whether or not two extents can be merged 52962306a36Sopenharmony_ci * Condition: 53062306a36Sopenharmony_ci * - logical block number is contiguous 53162306a36Sopenharmony_ci * - physical block number is contiguous 53262306a36Sopenharmony_ci * - status is equal 53362306a36Sopenharmony_ci */ 53462306a36Sopenharmony_cistatic int ext4_es_can_be_merged(struct extent_status *es1, 53562306a36Sopenharmony_ci struct extent_status *es2) 53662306a36Sopenharmony_ci{ 53762306a36Sopenharmony_ci if (ext4_es_type(es1) != ext4_es_type(es2)) 53862306a36Sopenharmony_ci return 0; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { 54162306a36Sopenharmony_ci pr_warn("ES assertion failed when merging extents. " 54262306a36Sopenharmony_ci "The sum of lengths of es1 (%d) and es2 (%d) " 54362306a36Sopenharmony_ci "is bigger than allowed file size (%d)\n", 54462306a36Sopenharmony_ci es1->es_len, es2->es_len, EXT_MAX_BLOCKS); 54562306a36Sopenharmony_ci WARN_ON(1); 54662306a36Sopenharmony_ci return 0; 54762306a36Sopenharmony_ci } 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) 55062306a36Sopenharmony_ci return 0; 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && 55362306a36Sopenharmony_ci (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2))) 55462306a36Sopenharmony_ci return 1; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci if (ext4_es_is_hole(es1)) 55762306a36Sopenharmony_ci return 1; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci /* we need to check delayed extent is without unwritten status */ 56062306a36Sopenharmony_ci if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1)) 56162306a36Sopenharmony_ci return 1; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci return 0; 56462306a36Sopenharmony_ci} 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_cistatic struct extent_status * 56762306a36Sopenharmony_ciext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 57062306a36Sopenharmony_ci struct extent_status *es1; 57162306a36Sopenharmony_ci struct rb_node *node; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci node = rb_prev(&es->rb_node); 57462306a36Sopenharmony_ci if (!node) 57562306a36Sopenharmony_ci return es; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci es1 = rb_entry(node, struct extent_status, rb_node); 57862306a36Sopenharmony_ci if (ext4_es_can_be_merged(es1, es)) { 57962306a36Sopenharmony_ci es1->es_len += es->es_len; 58062306a36Sopenharmony_ci if (ext4_es_is_referenced(es)) 58162306a36Sopenharmony_ci ext4_es_set_referenced(es1); 58262306a36Sopenharmony_ci rb_erase(&es->rb_node, &tree->root); 58362306a36Sopenharmony_ci ext4_es_free_extent(inode, es); 58462306a36Sopenharmony_ci es = es1; 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci return es; 58862306a36Sopenharmony_ci} 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_cistatic struct extent_status * 59162306a36Sopenharmony_ciext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) 59262306a36Sopenharmony_ci{ 59362306a36Sopenharmony_ci struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 59462306a36Sopenharmony_ci struct extent_status *es1; 59562306a36Sopenharmony_ci struct rb_node *node; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci node = rb_next(&es->rb_node); 59862306a36Sopenharmony_ci if (!node) 59962306a36Sopenharmony_ci return es; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci es1 = rb_entry(node, struct extent_status, rb_node); 60262306a36Sopenharmony_ci if (ext4_es_can_be_merged(es, es1)) { 60362306a36Sopenharmony_ci es->es_len += es1->es_len; 60462306a36Sopenharmony_ci if (ext4_es_is_referenced(es1)) 60562306a36Sopenharmony_ci ext4_es_set_referenced(es); 60662306a36Sopenharmony_ci rb_erase(node, &tree->root); 60762306a36Sopenharmony_ci ext4_es_free_extent(inode, es1); 60862306a36Sopenharmony_ci } 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci return es; 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 61462306a36Sopenharmony_ci#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */ 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_cistatic void ext4_es_insert_extent_ext_check(struct inode *inode, 61762306a36Sopenharmony_ci struct extent_status *es) 61862306a36Sopenharmony_ci{ 61962306a36Sopenharmony_ci struct ext4_ext_path *path = NULL; 62062306a36Sopenharmony_ci struct ext4_extent *ex; 62162306a36Sopenharmony_ci ext4_lblk_t ee_block; 62262306a36Sopenharmony_ci ext4_fsblk_t ee_start; 62362306a36Sopenharmony_ci unsigned short ee_len; 62462306a36Sopenharmony_ci int depth, ee_status, es_status; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); 62762306a36Sopenharmony_ci if (IS_ERR(path)) 62862306a36Sopenharmony_ci return; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci depth = ext_depth(inode); 63162306a36Sopenharmony_ci ex = path[depth].p_ext; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci if (ex) { 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci ee_block = le32_to_cpu(ex->ee_block); 63662306a36Sopenharmony_ci ee_start = ext4_ext_pblock(ex); 63762306a36Sopenharmony_ci ee_len = ext4_ext_get_actual_len(ex); 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0; 64062306a36Sopenharmony_ci es_status = ext4_es_is_unwritten(es) ? 1 : 0; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci /* 64362306a36Sopenharmony_ci * Make sure ex and es are not overlap when we try to insert 64462306a36Sopenharmony_ci * a delayed/hole extent. 64562306a36Sopenharmony_ci */ 64662306a36Sopenharmony_ci if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { 64762306a36Sopenharmony_ci if (in_range(es->es_lblk, ee_block, ee_len)) { 64862306a36Sopenharmony_ci pr_warn("ES insert assertion failed for " 64962306a36Sopenharmony_ci "inode: %lu we can find an extent " 65062306a36Sopenharmony_ci "at block [%d/%d/%llu/%c], but we " 65162306a36Sopenharmony_ci "want to add a delayed/hole extent " 65262306a36Sopenharmony_ci "[%d/%d/%llu/%x]\n", 65362306a36Sopenharmony_ci inode->i_ino, ee_block, ee_len, 65462306a36Sopenharmony_ci ee_start, ee_status ? 'u' : 'w', 65562306a36Sopenharmony_ci es->es_lblk, es->es_len, 65662306a36Sopenharmony_ci ext4_es_pblock(es), ext4_es_status(es)); 65762306a36Sopenharmony_ci } 65862306a36Sopenharmony_ci goto out; 65962306a36Sopenharmony_ci } 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci /* 66262306a36Sopenharmony_ci * We don't check ee_block == es->es_lblk, etc. because es 66362306a36Sopenharmony_ci * might be a part of whole extent, vice versa. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_ci if (es->es_lblk < ee_block || 66662306a36Sopenharmony_ci ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { 66762306a36Sopenharmony_ci pr_warn("ES insert assertion failed for inode: %lu " 66862306a36Sopenharmony_ci "ex_status [%d/%d/%llu/%c] != " 66962306a36Sopenharmony_ci "es_status [%d/%d/%llu/%c]\n", inode->i_ino, 67062306a36Sopenharmony_ci ee_block, ee_len, ee_start, 67162306a36Sopenharmony_ci ee_status ? 'u' : 'w', es->es_lblk, es->es_len, 67262306a36Sopenharmony_ci ext4_es_pblock(es), es_status ? 'u' : 'w'); 67362306a36Sopenharmony_ci goto out; 67462306a36Sopenharmony_ci } 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci if (ee_status ^ es_status) { 67762306a36Sopenharmony_ci pr_warn("ES insert assertion failed for inode: %lu " 67862306a36Sopenharmony_ci "ex_status [%d/%d/%llu/%c] != " 67962306a36Sopenharmony_ci "es_status [%d/%d/%llu/%c]\n", inode->i_ino, 68062306a36Sopenharmony_ci ee_block, ee_len, ee_start, 68162306a36Sopenharmony_ci ee_status ? 'u' : 'w', es->es_lblk, es->es_len, 68262306a36Sopenharmony_ci ext4_es_pblock(es), es_status ? 'u' : 'w'); 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci } else { 68562306a36Sopenharmony_ci /* 68662306a36Sopenharmony_ci * We can't find an extent on disk. So we need to make sure 68762306a36Sopenharmony_ci * that we don't want to add an written/unwritten extent. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_ci if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { 69062306a36Sopenharmony_ci pr_warn("ES insert assertion failed for inode: %lu " 69162306a36Sopenharmony_ci "can't find an extent at block %d but we want " 69262306a36Sopenharmony_ci "to add a written/unwritten extent " 69362306a36Sopenharmony_ci "[%d/%d/%llu/%x]\n", inode->i_ino, 69462306a36Sopenharmony_ci es->es_lblk, es->es_lblk, es->es_len, 69562306a36Sopenharmony_ci ext4_es_pblock(es), ext4_es_status(es)); 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ciout: 69962306a36Sopenharmony_ci ext4_free_ext_path(path); 70062306a36Sopenharmony_ci} 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_cistatic void ext4_es_insert_extent_ind_check(struct inode *inode, 70362306a36Sopenharmony_ci struct extent_status *es) 70462306a36Sopenharmony_ci{ 70562306a36Sopenharmony_ci struct ext4_map_blocks map; 70662306a36Sopenharmony_ci int retval; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci /* 70962306a36Sopenharmony_ci * Here we call ext4_ind_map_blocks to lookup a block mapping because 71062306a36Sopenharmony_ci * 'Indirect' structure is defined in indirect.c. So we couldn't 71162306a36Sopenharmony_ci * access direct/indirect tree from outside. It is too dirty to define 71262306a36Sopenharmony_ci * this function in indirect.c file. 71362306a36Sopenharmony_ci */ 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci map.m_lblk = es->es_lblk; 71662306a36Sopenharmony_ci map.m_len = es->es_len; 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci retval = ext4_ind_map_blocks(NULL, inode, &map, 0); 71962306a36Sopenharmony_ci if (retval > 0) { 72062306a36Sopenharmony_ci if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) { 72162306a36Sopenharmony_ci /* 72262306a36Sopenharmony_ci * We want to add a delayed/hole extent but this 72362306a36Sopenharmony_ci * block has been allocated. 72462306a36Sopenharmony_ci */ 72562306a36Sopenharmony_ci pr_warn("ES insert assertion failed for inode: %lu " 72662306a36Sopenharmony_ci "We can find blocks but we want to add a " 72762306a36Sopenharmony_ci "delayed/hole extent [%d/%d/%llu/%x]\n", 72862306a36Sopenharmony_ci inode->i_ino, es->es_lblk, es->es_len, 72962306a36Sopenharmony_ci ext4_es_pblock(es), ext4_es_status(es)); 73062306a36Sopenharmony_ci return; 73162306a36Sopenharmony_ci } else if (ext4_es_is_written(es)) { 73262306a36Sopenharmony_ci if (retval != es->es_len) { 73362306a36Sopenharmony_ci pr_warn("ES insert assertion failed for " 73462306a36Sopenharmony_ci "inode: %lu retval %d != es_len %d\n", 73562306a36Sopenharmony_ci inode->i_ino, retval, es->es_len); 73662306a36Sopenharmony_ci return; 73762306a36Sopenharmony_ci } 73862306a36Sopenharmony_ci if (map.m_pblk != ext4_es_pblock(es)) { 73962306a36Sopenharmony_ci pr_warn("ES insert assertion failed for " 74062306a36Sopenharmony_ci "inode: %lu m_pblk %llu != " 74162306a36Sopenharmony_ci "es_pblk %llu\n", 74262306a36Sopenharmony_ci inode->i_ino, map.m_pblk, 74362306a36Sopenharmony_ci ext4_es_pblock(es)); 74462306a36Sopenharmony_ci return; 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci } else { 74762306a36Sopenharmony_ci /* 74862306a36Sopenharmony_ci * We don't need to check unwritten extent because 74962306a36Sopenharmony_ci * indirect-based file doesn't have it. 75062306a36Sopenharmony_ci */ 75162306a36Sopenharmony_ci BUG(); 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci } else if (retval == 0) { 75462306a36Sopenharmony_ci if (ext4_es_is_written(es)) { 75562306a36Sopenharmony_ci pr_warn("ES insert assertion failed for inode: %lu " 75662306a36Sopenharmony_ci "We can't find the block but we want to add " 75762306a36Sopenharmony_ci "a written extent [%d/%d/%llu/%x]\n", 75862306a36Sopenharmony_ci inode->i_ino, es->es_lblk, es->es_len, 75962306a36Sopenharmony_ci ext4_es_pblock(es), ext4_es_status(es)); 76062306a36Sopenharmony_ci return; 76162306a36Sopenharmony_ci } 76262306a36Sopenharmony_ci } 76362306a36Sopenharmony_ci} 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_cistatic inline void ext4_es_insert_extent_check(struct inode *inode, 76662306a36Sopenharmony_ci struct extent_status *es) 76762306a36Sopenharmony_ci{ 76862306a36Sopenharmony_ci /* 76962306a36Sopenharmony_ci * We don't need to worry about the race condition because 77062306a36Sopenharmony_ci * caller takes i_data_sem locking. 77162306a36Sopenharmony_ci */ 77262306a36Sopenharmony_ci BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); 77362306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 77462306a36Sopenharmony_ci ext4_es_insert_extent_ext_check(inode, es); 77562306a36Sopenharmony_ci else 77662306a36Sopenharmony_ci ext4_es_insert_extent_ind_check(inode, es); 77762306a36Sopenharmony_ci} 77862306a36Sopenharmony_ci#else 77962306a36Sopenharmony_cistatic inline void ext4_es_insert_extent_check(struct inode *inode, 78062306a36Sopenharmony_ci struct extent_status *es) 78162306a36Sopenharmony_ci{ 78262306a36Sopenharmony_ci} 78362306a36Sopenharmony_ci#endif 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_cistatic int __es_insert_extent(struct inode *inode, struct extent_status *newes, 78662306a36Sopenharmony_ci struct extent_status *prealloc) 78762306a36Sopenharmony_ci{ 78862306a36Sopenharmony_ci struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 78962306a36Sopenharmony_ci struct rb_node **p = &tree->root.rb_node; 79062306a36Sopenharmony_ci struct rb_node *parent = NULL; 79162306a36Sopenharmony_ci struct extent_status *es; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci while (*p) { 79462306a36Sopenharmony_ci parent = *p; 79562306a36Sopenharmony_ci es = rb_entry(parent, struct extent_status, rb_node); 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci if (newes->es_lblk < es->es_lblk) { 79862306a36Sopenharmony_ci if (ext4_es_can_be_merged(newes, es)) { 79962306a36Sopenharmony_ci /* 80062306a36Sopenharmony_ci * Here we can modify es_lblk directly 80162306a36Sopenharmony_ci * because it isn't overlapped. 80262306a36Sopenharmony_ci */ 80362306a36Sopenharmony_ci es->es_lblk = newes->es_lblk; 80462306a36Sopenharmony_ci es->es_len += newes->es_len; 80562306a36Sopenharmony_ci if (ext4_es_is_written(es) || 80662306a36Sopenharmony_ci ext4_es_is_unwritten(es)) 80762306a36Sopenharmony_ci ext4_es_store_pblock(es, 80862306a36Sopenharmony_ci newes->es_pblk); 80962306a36Sopenharmony_ci es = ext4_es_try_to_merge_left(inode, es); 81062306a36Sopenharmony_ci goto out; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci p = &(*p)->rb_left; 81362306a36Sopenharmony_ci } else if (newes->es_lblk > ext4_es_end(es)) { 81462306a36Sopenharmony_ci if (ext4_es_can_be_merged(es, newes)) { 81562306a36Sopenharmony_ci es->es_len += newes->es_len; 81662306a36Sopenharmony_ci es = ext4_es_try_to_merge_right(inode, es); 81762306a36Sopenharmony_ci goto out; 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci p = &(*p)->rb_right; 82062306a36Sopenharmony_ci } else { 82162306a36Sopenharmony_ci BUG(); 82262306a36Sopenharmony_ci return -EINVAL; 82362306a36Sopenharmony_ci } 82462306a36Sopenharmony_ci } 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci if (prealloc) 82762306a36Sopenharmony_ci es = prealloc; 82862306a36Sopenharmony_ci else 82962306a36Sopenharmony_ci es = __es_alloc_extent(false); 83062306a36Sopenharmony_ci if (!es) 83162306a36Sopenharmony_ci return -ENOMEM; 83262306a36Sopenharmony_ci ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len, 83362306a36Sopenharmony_ci newes->es_pblk); 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci rb_link_node(&es->rb_node, parent, p); 83662306a36Sopenharmony_ci rb_insert_color(&es->rb_node, &tree->root); 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ciout: 83962306a36Sopenharmony_ci tree->cache_es = es; 84062306a36Sopenharmony_ci return 0; 84162306a36Sopenharmony_ci} 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci/* 84462306a36Sopenharmony_ci * ext4_es_insert_extent() adds information to an inode's extent 84562306a36Sopenharmony_ci * status tree. 84662306a36Sopenharmony_ci */ 84762306a36Sopenharmony_civoid ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 84862306a36Sopenharmony_ci ext4_lblk_t len, ext4_fsblk_t pblk, 84962306a36Sopenharmony_ci unsigned int status) 85062306a36Sopenharmony_ci{ 85162306a36Sopenharmony_ci struct extent_status newes; 85262306a36Sopenharmony_ci ext4_lblk_t end = lblk + len - 1; 85362306a36Sopenharmony_ci int err1 = 0, err2 = 0, err3 = 0; 85462306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 85562306a36Sopenharmony_ci struct extent_status *es1 = NULL; 85662306a36Sopenharmony_ci struct extent_status *es2 = NULL; 85762306a36Sopenharmony_ci struct pending_reservation *pr = NULL; 85862306a36Sopenharmony_ci bool revise_pending = false; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 86162306a36Sopenharmony_ci return; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", 86462306a36Sopenharmony_ci lblk, len, pblk, status, inode->i_ino); 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci if (!len) 86762306a36Sopenharmony_ci return; 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci BUG_ON(end < lblk); 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci if ((status & EXTENT_STATUS_DELAYED) && 87262306a36Sopenharmony_ci (status & EXTENT_STATUS_WRITTEN)) { 87362306a36Sopenharmony_ci ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " 87462306a36Sopenharmony_ci " delayed and written which can potentially " 87562306a36Sopenharmony_ci " cause data loss.", lblk, len); 87662306a36Sopenharmony_ci WARN_ON(1); 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci newes.es_lblk = lblk; 88062306a36Sopenharmony_ci newes.es_len = len; 88162306a36Sopenharmony_ci ext4_es_store_pblock_status(&newes, pblk, status); 88262306a36Sopenharmony_ci trace_ext4_es_insert_extent(inode, &newes); 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci ext4_es_insert_extent_check(inode, &newes); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci revise_pending = sbi->s_cluster_ratio > 1 && 88762306a36Sopenharmony_ci test_opt(inode->i_sb, DELALLOC) && 88862306a36Sopenharmony_ci (status & (EXTENT_STATUS_WRITTEN | 88962306a36Sopenharmony_ci EXTENT_STATUS_UNWRITTEN)); 89062306a36Sopenharmony_ciretry: 89162306a36Sopenharmony_ci if (err1 && !es1) 89262306a36Sopenharmony_ci es1 = __es_alloc_extent(true); 89362306a36Sopenharmony_ci if ((err1 || err2) && !es2) 89462306a36Sopenharmony_ci es2 = __es_alloc_extent(true); 89562306a36Sopenharmony_ci if ((err1 || err2 || err3) && revise_pending && !pr) 89662306a36Sopenharmony_ci pr = __alloc_pending(true); 89762306a36Sopenharmony_ci write_lock(&EXT4_I(inode)->i_es_lock); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci err1 = __es_remove_extent(inode, lblk, end, NULL, es1); 90062306a36Sopenharmony_ci if (err1 != 0) 90162306a36Sopenharmony_ci goto error; 90262306a36Sopenharmony_ci /* Free preallocated extent if it didn't get used. */ 90362306a36Sopenharmony_ci if (es1) { 90462306a36Sopenharmony_ci if (!es1->es_len) 90562306a36Sopenharmony_ci __es_free_extent(es1); 90662306a36Sopenharmony_ci es1 = NULL; 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci err2 = __es_insert_extent(inode, &newes, es2); 91062306a36Sopenharmony_ci if (err2 == -ENOMEM && !ext4_es_must_keep(&newes)) 91162306a36Sopenharmony_ci err2 = 0; 91262306a36Sopenharmony_ci if (err2 != 0) 91362306a36Sopenharmony_ci goto error; 91462306a36Sopenharmony_ci /* Free preallocated extent if it didn't get used. */ 91562306a36Sopenharmony_ci if (es2) { 91662306a36Sopenharmony_ci if (!es2->es_len) 91762306a36Sopenharmony_ci __es_free_extent(es2); 91862306a36Sopenharmony_ci es2 = NULL; 91962306a36Sopenharmony_ci } 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci if (revise_pending) { 92262306a36Sopenharmony_ci err3 = __revise_pending(inode, lblk, len, &pr); 92362306a36Sopenharmony_ci if (err3 != 0) 92462306a36Sopenharmony_ci goto error; 92562306a36Sopenharmony_ci if (pr) { 92662306a36Sopenharmony_ci __free_pending(pr); 92762306a36Sopenharmony_ci pr = NULL; 92862306a36Sopenharmony_ci } 92962306a36Sopenharmony_ci } 93062306a36Sopenharmony_cierror: 93162306a36Sopenharmony_ci write_unlock(&EXT4_I(inode)->i_es_lock); 93262306a36Sopenharmony_ci if (err1 || err2 || err3) 93362306a36Sopenharmony_ci goto retry; 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci ext4_es_print_tree(inode); 93662306a36Sopenharmony_ci return; 93762306a36Sopenharmony_ci} 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci/* 94062306a36Sopenharmony_ci * ext4_es_cache_extent() inserts information into the extent status 94162306a36Sopenharmony_ci * tree if and only if there isn't information about the range in 94262306a36Sopenharmony_ci * question already. 94362306a36Sopenharmony_ci */ 94462306a36Sopenharmony_civoid ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, 94562306a36Sopenharmony_ci ext4_lblk_t len, ext4_fsblk_t pblk, 94662306a36Sopenharmony_ci unsigned int status) 94762306a36Sopenharmony_ci{ 94862306a36Sopenharmony_ci struct extent_status *es; 94962306a36Sopenharmony_ci struct extent_status newes; 95062306a36Sopenharmony_ci ext4_lblk_t end = lblk + len - 1; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 95362306a36Sopenharmony_ci return; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci newes.es_lblk = lblk; 95662306a36Sopenharmony_ci newes.es_len = len; 95762306a36Sopenharmony_ci ext4_es_store_pblock_status(&newes, pblk, status); 95862306a36Sopenharmony_ci trace_ext4_es_cache_extent(inode, &newes); 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci if (!len) 96162306a36Sopenharmony_ci return; 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci BUG_ON(end < lblk); 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci write_lock(&EXT4_I(inode)->i_es_lock); 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); 96862306a36Sopenharmony_ci if (!es || es->es_lblk > end) 96962306a36Sopenharmony_ci __es_insert_extent(inode, &newes, NULL); 97062306a36Sopenharmony_ci write_unlock(&EXT4_I(inode)->i_es_lock); 97162306a36Sopenharmony_ci} 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci/* 97462306a36Sopenharmony_ci * ext4_es_lookup_extent() looks up an extent in extent status tree. 97562306a36Sopenharmony_ci * 97662306a36Sopenharmony_ci * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. 97762306a36Sopenharmony_ci * 97862306a36Sopenharmony_ci * Return: 1 on found, 0 on not 97962306a36Sopenharmony_ci */ 98062306a36Sopenharmony_ciint ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 98162306a36Sopenharmony_ci ext4_lblk_t *next_lblk, 98262306a36Sopenharmony_ci struct extent_status *es) 98362306a36Sopenharmony_ci{ 98462306a36Sopenharmony_ci struct ext4_es_tree *tree; 98562306a36Sopenharmony_ci struct ext4_es_stats *stats; 98662306a36Sopenharmony_ci struct extent_status *es1 = NULL; 98762306a36Sopenharmony_ci struct rb_node *node; 98862306a36Sopenharmony_ci int found = 0; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 99162306a36Sopenharmony_ci return 0; 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci trace_ext4_es_lookup_extent_enter(inode, lblk); 99462306a36Sopenharmony_ci es_debug("lookup extent in block %u\n", lblk); 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_es_tree; 99762306a36Sopenharmony_ci read_lock(&EXT4_I(inode)->i_es_lock); 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci /* find extent in cache firstly */ 100062306a36Sopenharmony_ci es->es_lblk = es->es_len = es->es_pblk = 0; 100162306a36Sopenharmony_ci es1 = READ_ONCE(tree->cache_es); 100262306a36Sopenharmony_ci if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) { 100362306a36Sopenharmony_ci es_debug("%u cached by [%u/%u)\n", 100462306a36Sopenharmony_ci lblk, es1->es_lblk, es1->es_len); 100562306a36Sopenharmony_ci found = 1; 100662306a36Sopenharmony_ci goto out; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci node = tree->root.rb_node; 101062306a36Sopenharmony_ci while (node) { 101162306a36Sopenharmony_ci es1 = rb_entry(node, struct extent_status, rb_node); 101262306a36Sopenharmony_ci if (lblk < es1->es_lblk) 101362306a36Sopenharmony_ci node = node->rb_left; 101462306a36Sopenharmony_ci else if (lblk > ext4_es_end(es1)) 101562306a36Sopenharmony_ci node = node->rb_right; 101662306a36Sopenharmony_ci else { 101762306a36Sopenharmony_ci found = 1; 101862306a36Sopenharmony_ci break; 101962306a36Sopenharmony_ci } 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ciout: 102362306a36Sopenharmony_ci stats = &EXT4_SB(inode->i_sb)->s_es_stats; 102462306a36Sopenharmony_ci if (found) { 102562306a36Sopenharmony_ci BUG_ON(!es1); 102662306a36Sopenharmony_ci es->es_lblk = es1->es_lblk; 102762306a36Sopenharmony_ci es->es_len = es1->es_len; 102862306a36Sopenharmony_ci es->es_pblk = es1->es_pblk; 102962306a36Sopenharmony_ci if (!ext4_es_is_referenced(es1)) 103062306a36Sopenharmony_ci ext4_es_set_referenced(es1); 103162306a36Sopenharmony_ci percpu_counter_inc(&stats->es_stats_cache_hits); 103262306a36Sopenharmony_ci if (next_lblk) { 103362306a36Sopenharmony_ci node = rb_next(&es1->rb_node); 103462306a36Sopenharmony_ci if (node) { 103562306a36Sopenharmony_ci es1 = rb_entry(node, struct extent_status, 103662306a36Sopenharmony_ci rb_node); 103762306a36Sopenharmony_ci *next_lblk = es1->es_lblk; 103862306a36Sopenharmony_ci } else 103962306a36Sopenharmony_ci *next_lblk = 0; 104062306a36Sopenharmony_ci } 104162306a36Sopenharmony_ci } else { 104262306a36Sopenharmony_ci percpu_counter_inc(&stats->es_stats_cache_misses); 104362306a36Sopenharmony_ci } 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci read_unlock(&EXT4_I(inode)->i_es_lock); 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci trace_ext4_es_lookup_extent_exit(inode, es, found); 104862306a36Sopenharmony_ci return found; 104962306a36Sopenharmony_ci} 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_cistruct rsvd_count { 105262306a36Sopenharmony_ci int ndelonly; 105362306a36Sopenharmony_ci bool first_do_lblk_found; 105462306a36Sopenharmony_ci ext4_lblk_t first_do_lblk; 105562306a36Sopenharmony_ci ext4_lblk_t last_do_lblk; 105662306a36Sopenharmony_ci struct extent_status *left_es; 105762306a36Sopenharmony_ci bool partial; 105862306a36Sopenharmony_ci ext4_lblk_t lclu; 105962306a36Sopenharmony_ci}; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci/* 106262306a36Sopenharmony_ci * init_rsvd - initialize reserved count data before removing block range 106362306a36Sopenharmony_ci * in file from extent status tree 106462306a36Sopenharmony_ci * 106562306a36Sopenharmony_ci * @inode - file containing range 106662306a36Sopenharmony_ci * @lblk - first block in range 106762306a36Sopenharmony_ci * @es - pointer to first extent in range 106862306a36Sopenharmony_ci * @rc - pointer to reserved count data 106962306a36Sopenharmony_ci * 107062306a36Sopenharmony_ci * Assumes es is not NULL 107162306a36Sopenharmony_ci */ 107262306a36Sopenharmony_cistatic void init_rsvd(struct inode *inode, ext4_lblk_t lblk, 107362306a36Sopenharmony_ci struct extent_status *es, struct rsvd_count *rc) 107462306a36Sopenharmony_ci{ 107562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 107662306a36Sopenharmony_ci struct rb_node *node; 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci rc->ndelonly = 0; 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci /* 108162306a36Sopenharmony_ci * for bigalloc, note the first delonly block in the range has not 108262306a36Sopenharmony_ci * been found, record the extent containing the block to the left of 108362306a36Sopenharmony_ci * the region to be removed, if any, and note that there's no partial 108462306a36Sopenharmony_ci * cluster to track 108562306a36Sopenharmony_ci */ 108662306a36Sopenharmony_ci if (sbi->s_cluster_ratio > 1) { 108762306a36Sopenharmony_ci rc->first_do_lblk_found = false; 108862306a36Sopenharmony_ci if (lblk > es->es_lblk) { 108962306a36Sopenharmony_ci rc->left_es = es; 109062306a36Sopenharmony_ci } else { 109162306a36Sopenharmony_ci node = rb_prev(&es->rb_node); 109262306a36Sopenharmony_ci rc->left_es = node ? rb_entry(node, 109362306a36Sopenharmony_ci struct extent_status, 109462306a36Sopenharmony_ci rb_node) : NULL; 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci rc->partial = false; 109762306a36Sopenharmony_ci } 109862306a36Sopenharmony_ci} 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci/* 110162306a36Sopenharmony_ci * count_rsvd - count the clusters containing delayed and not unwritten 110262306a36Sopenharmony_ci * (delonly) blocks in a range within an extent and add to 110362306a36Sopenharmony_ci * the running tally in rsvd_count 110462306a36Sopenharmony_ci * 110562306a36Sopenharmony_ci * @inode - file containing extent 110662306a36Sopenharmony_ci * @lblk - first block in range 110762306a36Sopenharmony_ci * @len - length of range in blocks 110862306a36Sopenharmony_ci * @es - pointer to extent containing clusters to be counted 110962306a36Sopenharmony_ci * @rc - pointer to reserved count data 111062306a36Sopenharmony_ci * 111162306a36Sopenharmony_ci * Tracks partial clusters found at the beginning and end of extents so 111262306a36Sopenharmony_ci * they aren't overcounted when they span adjacent extents 111362306a36Sopenharmony_ci */ 111462306a36Sopenharmony_cistatic void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len, 111562306a36Sopenharmony_ci struct extent_status *es, struct rsvd_count *rc) 111662306a36Sopenharmony_ci{ 111762306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 111862306a36Sopenharmony_ci ext4_lblk_t i, end, nclu; 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci if (!ext4_es_is_delonly(es)) 112162306a36Sopenharmony_ci return; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci WARN_ON(len <= 0); 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci if (sbi->s_cluster_ratio == 1) { 112662306a36Sopenharmony_ci rc->ndelonly += (int) len; 112762306a36Sopenharmony_ci return; 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci /* bigalloc */ 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci i = (lblk < es->es_lblk) ? es->es_lblk : lblk; 113362306a36Sopenharmony_ci end = lblk + (ext4_lblk_t) len - 1; 113462306a36Sopenharmony_ci end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end; 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci /* record the first block of the first delonly extent seen */ 113762306a36Sopenharmony_ci if (!rc->first_do_lblk_found) { 113862306a36Sopenharmony_ci rc->first_do_lblk = i; 113962306a36Sopenharmony_ci rc->first_do_lblk_found = true; 114062306a36Sopenharmony_ci } 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci /* update the last lblk in the region seen so far */ 114362306a36Sopenharmony_ci rc->last_do_lblk = end; 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci /* 114662306a36Sopenharmony_ci * if we're tracking a partial cluster and the current extent 114762306a36Sopenharmony_ci * doesn't start with it, count it and stop tracking 114862306a36Sopenharmony_ci */ 114962306a36Sopenharmony_ci if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) { 115062306a36Sopenharmony_ci rc->ndelonly++; 115162306a36Sopenharmony_ci rc->partial = false; 115262306a36Sopenharmony_ci } 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci /* 115562306a36Sopenharmony_ci * if the first cluster doesn't start on a cluster boundary but 115662306a36Sopenharmony_ci * ends on one, count it 115762306a36Sopenharmony_ci */ 115862306a36Sopenharmony_ci if (EXT4_LBLK_COFF(sbi, i) != 0) { 115962306a36Sopenharmony_ci if (end >= EXT4_LBLK_CFILL(sbi, i)) { 116062306a36Sopenharmony_ci rc->ndelonly++; 116162306a36Sopenharmony_ci rc->partial = false; 116262306a36Sopenharmony_ci i = EXT4_LBLK_CFILL(sbi, i) + 1; 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci } 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci /* 116762306a36Sopenharmony_ci * if the current cluster starts on a cluster boundary, count the 116862306a36Sopenharmony_ci * number of whole delonly clusters in the extent 116962306a36Sopenharmony_ci */ 117062306a36Sopenharmony_ci if ((i + sbi->s_cluster_ratio - 1) <= end) { 117162306a36Sopenharmony_ci nclu = (end - i + 1) >> sbi->s_cluster_bits; 117262306a36Sopenharmony_ci rc->ndelonly += nclu; 117362306a36Sopenharmony_ci i += nclu << sbi->s_cluster_bits; 117462306a36Sopenharmony_ci } 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci /* 117762306a36Sopenharmony_ci * start tracking a partial cluster if there's a partial at the end 117862306a36Sopenharmony_ci * of the current extent and we're not already tracking one 117962306a36Sopenharmony_ci */ 118062306a36Sopenharmony_ci if (!rc->partial && i <= end) { 118162306a36Sopenharmony_ci rc->partial = true; 118262306a36Sopenharmony_ci rc->lclu = EXT4_B2C(sbi, i); 118362306a36Sopenharmony_ci } 118462306a36Sopenharmony_ci} 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci/* 118762306a36Sopenharmony_ci * __pr_tree_search - search for a pending cluster reservation 118862306a36Sopenharmony_ci * 118962306a36Sopenharmony_ci * @root - root of pending reservation tree 119062306a36Sopenharmony_ci * @lclu - logical cluster to search for 119162306a36Sopenharmony_ci * 119262306a36Sopenharmony_ci * Returns the pending reservation for the cluster identified by @lclu 119362306a36Sopenharmony_ci * if found. If not, returns a reservation for the next cluster if any, 119462306a36Sopenharmony_ci * and if not, returns NULL. 119562306a36Sopenharmony_ci */ 119662306a36Sopenharmony_cistatic struct pending_reservation *__pr_tree_search(struct rb_root *root, 119762306a36Sopenharmony_ci ext4_lblk_t lclu) 119862306a36Sopenharmony_ci{ 119962306a36Sopenharmony_ci struct rb_node *node = root->rb_node; 120062306a36Sopenharmony_ci struct pending_reservation *pr = NULL; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci while (node) { 120362306a36Sopenharmony_ci pr = rb_entry(node, struct pending_reservation, rb_node); 120462306a36Sopenharmony_ci if (lclu < pr->lclu) 120562306a36Sopenharmony_ci node = node->rb_left; 120662306a36Sopenharmony_ci else if (lclu > pr->lclu) 120762306a36Sopenharmony_ci node = node->rb_right; 120862306a36Sopenharmony_ci else 120962306a36Sopenharmony_ci return pr; 121062306a36Sopenharmony_ci } 121162306a36Sopenharmony_ci if (pr && lclu < pr->lclu) 121262306a36Sopenharmony_ci return pr; 121362306a36Sopenharmony_ci if (pr && lclu > pr->lclu) { 121462306a36Sopenharmony_ci node = rb_next(&pr->rb_node); 121562306a36Sopenharmony_ci return node ? rb_entry(node, struct pending_reservation, 121662306a36Sopenharmony_ci rb_node) : NULL; 121762306a36Sopenharmony_ci } 121862306a36Sopenharmony_ci return NULL; 121962306a36Sopenharmony_ci} 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci/* 122262306a36Sopenharmony_ci * get_rsvd - calculates and returns the number of cluster reservations to be 122362306a36Sopenharmony_ci * released when removing a block range from the extent status tree 122462306a36Sopenharmony_ci * and releases any pending reservations within the range 122562306a36Sopenharmony_ci * 122662306a36Sopenharmony_ci * @inode - file containing block range 122762306a36Sopenharmony_ci * @end - last block in range 122862306a36Sopenharmony_ci * @right_es - pointer to extent containing next block beyond end or NULL 122962306a36Sopenharmony_ci * @rc - pointer to reserved count data 123062306a36Sopenharmony_ci * 123162306a36Sopenharmony_ci * The number of reservations to be released is equal to the number of 123262306a36Sopenharmony_ci * clusters containing delayed and not unwritten (delonly) blocks within 123362306a36Sopenharmony_ci * the range, minus the number of clusters still containing delonly blocks 123462306a36Sopenharmony_ci * at the ends of the range, and minus the number of pending reservations 123562306a36Sopenharmony_ci * within the range. 123662306a36Sopenharmony_ci */ 123762306a36Sopenharmony_cistatic unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, 123862306a36Sopenharmony_ci struct extent_status *right_es, 123962306a36Sopenharmony_ci struct rsvd_count *rc) 124062306a36Sopenharmony_ci{ 124162306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 124262306a36Sopenharmony_ci struct pending_reservation *pr; 124362306a36Sopenharmony_ci struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; 124462306a36Sopenharmony_ci struct rb_node *node; 124562306a36Sopenharmony_ci ext4_lblk_t first_lclu, last_lclu; 124662306a36Sopenharmony_ci bool left_delonly, right_delonly, count_pending; 124762306a36Sopenharmony_ci struct extent_status *es; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci if (sbi->s_cluster_ratio > 1) { 125062306a36Sopenharmony_ci /* count any remaining partial cluster */ 125162306a36Sopenharmony_ci if (rc->partial) 125262306a36Sopenharmony_ci rc->ndelonly++; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci if (rc->ndelonly == 0) 125562306a36Sopenharmony_ci return 0; 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci first_lclu = EXT4_B2C(sbi, rc->first_do_lblk); 125862306a36Sopenharmony_ci last_lclu = EXT4_B2C(sbi, rc->last_do_lblk); 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci /* 126162306a36Sopenharmony_ci * decrease the delonly count by the number of clusters at the 126262306a36Sopenharmony_ci * ends of the range that still contain delonly blocks - 126362306a36Sopenharmony_ci * these clusters still need to be reserved 126462306a36Sopenharmony_ci */ 126562306a36Sopenharmony_ci left_delonly = right_delonly = false; 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci es = rc->left_es; 126862306a36Sopenharmony_ci while (es && ext4_es_end(es) >= 126962306a36Sopenharmony_ci EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) { 127062306a36Sopenharmony_ci if (ext4_es_is_delonly(es)) { 127162306a36Sopenharmony_ci rc->ndelonly--; 127262306a36Sopenharmony_ci left_delonly = true; 127362306a36Sopenharmony_ci break; 127462306a36Sopenharmony_ci } 127562306a36Sopenharmony_ci node = rb_prev(&es->rb_node); 127662306a36Sopenharmony_ci if (!node) 127762306a36Sopenharmony_ci break; 127862306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 127962306a36Sopenharmony_ci } 128062306a36Sopenharmony_ci if (right_es && (!left_delonly || first_lclu != last_lclu)) { 128162306a36Sopenharmony_ci if (end < ext4_es_end(right_es)) { 128262306a36Sopenharmony_ci es = right_es; 128362306a36Sopenharmony_ci } else { 128462306a36Sopenharmony_ci node = rb_next(&right_es->rb_node); 128562306a36Sopenharmony_ci es = node ? rb_entry(node, struct extent_status, 128662306a36Sopenharmony_ci rb_node) : NULL; 128762306a36Sopenharmony_ci } 128862306a36Sopenharmony_ci while (es && es->es_lblk <= 128962306a36Sopenharmony_ci EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) { 129062306a36Sopenharmony_ci if (ext4_es_is_delonly(es)) { 129162306a36Sopenharmony_ci rc->ndelonly--; 129262306a36Sopenharmony_ci right_delonly = true; 129362306a36Sopenharmony_ci break; 129462306a36Sopenharmony_ci } 129562306a36Sopenharmony_ci node = rb_next(&es->rb_node); 129662306a36Sopenharmony_ci if (!node) 129762306a36Sopenharmony_ci break; 129862306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, 129962306a36Sopenharmony_ci rb_node); 130062306a36Sopenharmony_ci } 130162306a36Sopenharmony_ci } 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_ci /* 130462306a36Sopenharmony_ci * Determine the block range that should be searched for 130562306a36Sopenharmony_ci * pending reservations, if any. Clusters on the ends of the 130662306a36Sopenharmony_ci * original removed range containing delonly blocks are 130762306a36Sopenharmony_ci * excluded. They've already been accounted for and it's not 130862306a36Sopenharmony_ci * possible to determine if an associated pending reservation 130962306a36Sopenharmony_ci * should be released with the information available in the 131062306a36Sopenharmony_ci * extents status tree. 131162306a36Sopenharmony_ci */ 131262306a36Sopenharmony_ci if (first_lclu == last_lclu) { 131362306a36Sopenharmony_ci if (left_delonly | right_delonly) 131462306a36Sopenharmony_ci count_pending = false; 131562306a36Sopenharmony_ci else 131662306a36Sopenharmony_ci count_pending = true; 131762306a36Sopenharmony_ci } else { 131862306a36Sopenharmony_ci if (left_delonly) 131962306a36Sopenharmony_ci first_lclu++; 132062306a36Sopenharmony_ci if (right_delonly) 132162306a36Sopenharmony_ci last_lclu--; 132262306a36Sopenharmony_ci if (first_lclu <= last_lclu) 132362306a36Sopenharmony_ci count_pending = true; 132462306a36Sopenharmony_ci else 132562306a36Sopenharmony_ci count_pending = false; 132662306a36Sopenharmony_ci } 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci /* 132962306a36Sopenharmony_ci * a pending reservation found between first_lclu and last_lclu 133062306a36Sopenharmony_ci * represents an allocated cluster that contained at least one 133162306a36Sopenharmony_ci * delonly block, so the delonly total must be reduced by one 133262306a36Sopenharmony_ci * for each pending reservation found and released 133362306a36Sopenharmony_ci */ 133462306a36Sopenharmony_ci if (count_pending) { 133562306a36Sopenharmony_ci pr = __pr_tree_search(&tree->root, first_lclu); 133662306a36Sopenharmony_ci while (pr && pr->lclu <= last_lclu) { 133762306a36Sopenharmony_ci rc->ndelonly--; 133862306a36Sopenharmony_ci node = rb_next(&pr->rb_node); 133962306a36Sopenharmony_ci rb_erase(&pr->rb_node, &tree->root); 134062306a36Sopenharmony_ci __free_pending(pr); 134162306a36Sopenharmony_ci if (!node) 134262306a36Sopenharmony_ci break; 134362306a36Sopenharmony_ci pr = rb_entry(node, struct pending_reservation, 134462306a36Sopenharmony_ci rb_node); 134562306a36Sopenharmony_ci } 134662306a36Sopenharmony_ci } 134762306a36Sopenharmony_ci } 134862306a36Sopenharmony_ci return rc->ndelonly; 134962306a36Sopenharmony_ci} 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci/* 135362306a36Sopenharmony_ci * __es_remove_extent - removes block range from extent status tree 135462306a36Sopenharmony_ci * 135562306a36Sopenharmony_ci * @inode - file containing range 135662306a36Sopenharmony_ci * @lblk - first block in range 135762306a36Sopenharmony_ci * @end - last block in range 135862306a36Sopenharmony_ci * @reserved - number of cluster reservations released 135962306a36Sopenharmony_ci * @prealloc - pre-allocated es to avoid memory allocation failures 136062306a36Sopenharmony_ci * 136162306a36Sopenharmony_ci * If @reserved is not NULL and delayed allocation is enabled, counts 136262306a36Sopenharmony_ci * block/cluster reservations freed by removing range and if bigalloc 136362306a36Sopenharmony_ci * enabled cancels pending reservations as needed. Returns 0 on success, 136462306a36Sopenharmony_ci * error code on failure. 136562306a36Sopenharmony_ci */ 136662306a36Sopenharmony_cistatic int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 136762306a36Sopenharmony_ci ext4_lblk_t end, int *reserved, 136862306a36Sopenharmony_ci struct extent_status *prealloc) 136962306a36Sopenharmony_ci{ 137062306a36Sopenharmony_ci struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 137162306a36Sopenharmony_ci struct rb_node *node; 137262306a36Sopenharmony_ci struct extent_status *es; 137362306a36Sopenharmony_ci struct extent_status orig_es; 137462306a36Sopenharmony_ci ext4_lblk_t len1, len2; 137562306a36Sopenharmony_ci ext4_fsblk_t block; 137662306a36Sopenharmony_ci int err = 0; 137762306a36Sopenharmony_ci bool count_reserved = true; 137862306a36Sopenharmony_ci struct rsvd_count rc; 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC)) 138162306a36Sopenharmony_ci count_reserved = false; 138262306a36Sopenharmony_ci 138362306a36Sopenharmony_ci es = __es_tree_search(&tree->root, lblk); 138462306a36Sopenharmony_ci if (!es) 138562306a36Sopenharmony_ci goto out; 138662306a36Sopenharmony_ci if (es->es_lblk > end) 138762306a36Sopenharmony_ci goto out; 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci /* Simply invalidate cache_es. */ 139062306a36Sopenharmony_ci tree->cache_es = NULL; 139162306a36Sopenharmony_ci if (count_reserved) 139262306a36Sopenharmony_ci init_rsvd(inode, lblk, es, &rc); 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci orig_es.es_lblk = es->es_lblk; 139562306a36Sopenharmony_ci orig_es.es_len = es->es_len; 139662306a36Sopenharmony_ci orig_es.es_pblk = es->es_pblk; 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0; 139962306a36Sopenharmony_ci len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0; 140062306a36Sopenharmony_ci if (len1 > 0) 140162306a36Sopenharmony_ci es->es_len = len1; 140262306a36Sopenharmony_ci if (len2 > 0) { 140362306a36Sopenharmony_ci if (len1 > 0) { 140462306a36Sopenharmony_ci struct extent_status newes; 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ci newes.es_lblk = end + 1; 140762306a36Sopenharmony_ci newes.es_len = len2; 140862306a36Sopenharmony_ci block = 0x7FDEADBEEFULL; 140962306a36Sopenharmony_ci if (ext4_es_is_written(&orig_es) || 141062306a36Sopenharmony_ci ext4_es_is_unwritten(&orig_es)) 141162306a36Sopenharmony_ci block = ext4_es_pblock(&orig_es) + 141262306a36Sopenharmony_ci orig_es.es_len - len2; 141362306a36Sopenharmony_ci ext4_es_store_pblock_status(&newes, block, 141462306a36Sopenharmony_ci ext4_es_status(&orig_es)); 141562306a36Sopenharmony_ci err = __es_insert_extent(inode, &newes, prealloc); 141662306a36Sopenharmony_ci if (err) { 141762306a36Sopenharmony_ci if (!ext4_es_must_keep(&newes)) 141862306a36Sopenharmony_ci return 0; 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci es->es_lblk = orig_es.es_lblk; 142162306a36Sopenharmony_ci es->es_len = orig_es.es_len; 142262306a36Sopenharmony_ci goto out; 142362306a36Sopenharmony_ci } 142462306a36Sopenharmony_ci } else { 142562306a36Sopenharmony_ci es->es_lblk = end + 1; 142662306a36Sopenharmony_ci es->es_len = len2; 142762306a36Sopenharmony_ci if (ext4_es_is_written(es) || 142862306a36Sopenharmony_ci ext4_es_is_unwritten(es)) { 142962306a36Sopenharmony_ci block = orig_es.es_pblk + orig_es.es_len - len2; 143062306a36Sopenharmony_ci ext4_es_store_pblock(es, block); 143162306a36Sopenharmony_ci } 143262306a36Sopenharmony_ci } 143362306a36Sopenharmony_ci if (count_reserved) 143462306a36Sopenharmony_ci count_rsvd(inode, orig_es.es_lblk + len1, 143562306a36Sopenharmony_ci orig_es.es_len - len1 - len2, &orig_es, &rc); 143662306a36Sopenharmony_ci goto out_get_reserved; 143762306a36Sopenharmony_ci } 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci if (len1 > 0) { 144062306a36Sopenharmony_ci if (count_reserved) 144162306a36Sopenharmony_ci count_rsvd(inode, lblk, orig_es.es_len - len1, 144262306a36Sopenharmony_ci &orig_es, &rc); 144362306a36Sopenharmony_ci node = rb_next(&es->rb_node); 144462306a36Sopenharmony_ci if (node) 144562306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 144662306a36Sopenharmony_ci else 144762306a36Sopenharmony_ci es = NULL; 144862306a36Sopenharmony_ci } 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci while (es && ext4_es_end(es) <= end) { 145162306a36Sopenharmony_ci if (count_reserved) 145262306a36Sopenharmony_ci count_rsvd(inode, es->es_lblk, es->es_len, es, &rc); 145362306a36Sopenharmony_ci node = rb_next(&es->rb_node); 145462306a36Sopenharmony_ci rb_erase(&es->rb_node, &tree->root); 145562306a36Sopenharmony_ci ext4_es_free_extent(inode, es); 145662306a36Sopenharmony_ci if (!node) { 145762306a36Sopenharmony_ci es = NULL; 145862306a36Sopenharmony_ci break; 145962306a36Sopenharmony_ci } 146062306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 146162306a36Sopenharmony_ci } 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci if (es && es->es_lblk < end + 1) { 146462306a36Sopenharmony_ci ext4_lblk_t orig_len = es->es_len; 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci len1 = ext4_es_end(es) - end; 146762306a36Sopenharmony_ci if (count_reserved) 146862306a36Sopenharmony_ci count_rsvd(inode, es->es_lblk, orig_len - len1, 146962306a36Sopenharmony_ci es, &rc); 147062306a36Sopenharmony_ci es->es_lblk = end + 1; 147162306a36Sopenharmony_ci es->es_len = len1; 147262306a36Sopenharmony_ci if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) { 147362306a36Sopenharmony_ci block = es->es_pblk + orig_len - len1; 147462306a36Sopenharmony_ci ext4_es_store_pblock(es, block); 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci } 147762306a36Sopenharmony_ci 147862306a36Sopenharmony_ciout_get_reserved: 147962306a36Sopenharmony_ci if (count_reserved) 148062306a36Sopenharmony_ci *reserved = get_rsvd(inode, end, es, &rc); 148162306a36Sopenharmony_ciout: 148262306a36Sopenharmony_ci return err; 148362306a36Sopenharmony_ci} 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci/* 148662306a36Sopenharmony_ci * ext4_es_remove_extent - removes block range from extent status tree 148762306a36Sopenharmony_ci * 148862306a36Sopenharmony_ci * @inode - file containing range 148962306a36Sopenharmony_ci * @lblk - first block in range 149062306a36Sopenharmony_ci * @len - number of blocks to remove 149162306a36Sopenharmony_ci * 149262306a36Sopenharmony_ci * Reduces block/cluster reservation count and for bigalloc cancels pending 149362306a36Sopenharmony_ci * reservations as needed. 149462306a36Sopenharmony_ci */ 149562306a36Sopenharmony_civoid ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 149662306a36Sopenharmony_ci ext4_lblk_t len) 149762306a36Sopenharmony_ci{ 149862306a36Sopenharmony_ci ext4_lblk_t end; 149962306a36Sopenharmony_ci int err = 0; 150062306a36Sopenharmony_ci int reserved = 0; 150162306a36Sopenharmony_ci struct extent_status *es = NULL; 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 150462306a36Sopenharmony_ci return; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci trace_ext4_es_remove_extent(inode, lblk, len); 150762306a36Sopenharmony_ci es_debug("remove [%u/%u) from extent status tree of inode %lu\n", 150862306a36Sopenharmony_ci lblk, len, inode->i_ino); 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci if (!len) 151162306a36Sopenharmony_ci return; 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci end = lblk + len - 1; 151462306a36Sopenharmony_ci BUG_ON(end < lblk); 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ciretry: 151762306a36Sopenharmony_ci if (err && !es) 151862306a36Sopenharmony_ci es = __es_alloc_extent(true); 151962306a36Sopenharmony_ci /* 152062306a36Sopenharmony_ci * ext4_clear_inode() depends on us taking i_es_lock unconditionally 152162306a36Sopenharmony_ci * so that we are sure __es_shrink() is done with the inode before it 152262306a36Sopenharmony_ci * is reclaimed. 152362306a36Sopenharmony_ci */ 152462306a36Sopenharmony_ci write_lock(&EXT4_I(inode)->i_es_lock); 152562306a36Sopenharmony_ci err = __es_remove_extent(inode, lblk, end, &reserved, es); 152662306a36Sopenharmony_ci /* Free preallocated extent if it didn't get used. */ 152762306a36Sopenharmony_ci if (es) { 152862306a36Sopenharmony_ci if (!es->es_len) 152962306a36Sopenharmony_ci __es_free_extent(es); 153062306a36Sopenharmony_ci es = NULL; 153162306a36Sopenharmony_ci } 153262306a36Sopenharmony_ci write_unlock(&EXT4_I(inode)->i_es_lock); 153362306a36Sopenharmony_ci if (err) 153462306a36Sopenharmony_ci goto retry; 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci ext4_es_print_tree(inode); 153762306a36Sopenharmony_ci ext4_da_release_space(inode, reserved); 153862306a36Sopenharmony_ci return; 153962306a36Sopenharmony_ci} 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_cistatic int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 154262306a36Sopenharmony_ci struct ext4_inode_info *locked_ei) 154362306a36Sopenharmony_ci{ 154462306a36Sopenharmony_ci struct ext4_inode_info *ei; 154562306a36Sopenharmony_ci struct ext4_es_stats *es_stats; 154662306a36Sopenharmony_ci ktime_t start_time; 154762306a36Sopenharmony_ci u64 scan_time; 154862306a36Sopenharmony_ci int nr_to_walk; 154962306a36Sopenharmony_ci int nr_shrunk = 0; 155062306a36Sopenharmony_ci int retried = 0, nr_skipped = 0; 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci es_stats = &sbi->s_es_stats; 155362306a36Sopenharmony_ci start_time = ktime_get(); 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ciretry: 155662306a36Sopenharmony_ci spin_lock(&sbi->s_es_lock); 155762306a36Sopenharmony_ci nr_to_walk = sbi->s_es_nr_inode; 155862306a36Sopenharmony_ci while (nr_to_walk-- > 0) { 155962306a36Sopenharmony_ci if (list_empty(&sbi->s_es_list)) { 156062306a36Sopenharmony_ci spin_unlock(&sbi->s_es_lock); 156162306a36Sopenharmony_ci goto out; 156262306a36Sopenharmony_ci } 156362306a36Sopenharmony_ci ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, 156462306a36Sopenharmony_ci i_es_list); 156562306a36Sopenharmony_ci /* Move the inode to the tail */ 156662306a36Sopenharmony_ci list_move_tail(&ei->i_es_list, &sbi->s_es_list); 156762306a36Sopenharmony_ci 156862306a36Sopenharmony_ci /* 156962306a36Sopenharmony_ci * Normally we try hard to avoid shrinking precached inodes, 157062306a36Sopenharmony_ci * but we will as a last resort. 157162306a36Sopenharmony_ci */ 157262306a36Sopenharmony_ci if (!retried && ext4_test_inode_state(&ei->vfs_inode, 157362306a36Sopenharmony_ci EXT4_STATE_EXT_PRECACHED)) { 157462306a36Sopenharmony_ci nr_skipped++; 157562306a36Sopenharmony_ci continue; 157662306a36Sopenharmony_ci } 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { 157962306a36Sopenharmony_ci nr_skipped++; 158062306a36Sopenharmony_ci continue; 158162306a36Sopenharmony_ci } 158262306a36Sopenharmony_ci /* 158362306a36Sopenharmony_ci * Now we hold i_es_lock which protects us from inode reclaim 158462306a36Sopenharmony_ci * freeing inode under us 158562306a36Sopenharmony_ci */ 158662306a36Sopenharmony_ci spin_unlock(&sbi->s_es_lock); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci nr_shrunk += es_reclaim_extents(ei, &nr_to_scan); 158962306a36Sopenharmony_ci write_unlock(&ei->i_es_lock); 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci if (nr_to_scan <= 0) 159262306a36Sopenharmony_ci goto out; 159362306a36Sopenharmony_ci spin_lock(&sbi->s_es_lock); 159462306a36Sopenharmony_ci } 159562306a36Sopenharmony_ci spin_unlock(&sbi->s_es_lock); 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci /* 159862306a36Sopenharmony_ci * If we skipped any inodes, and we weren't able to make any 159962306a36Sopenharmony_ci * forward progress, try again to scan precached inodes. 160062306a36Sopenharmony_ci */ 160162306a36Sopenharmony_ci if ((nr_shrunk == 0) && nr_skipped && !retried) { 160262306a36Sopenharmony_ci retried++; 160362306a36Sopenharmony_ci goto retry; 160462306a36Sopenharmony_ci } 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci if (locked_ei && nr_shrunk == 0) 160762306a36Sopenharmony_ci nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan); 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ciout: 161062306a36Sopenharmony_ci scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 161162306a36Sopenharmony_ci if (likely(es_stats->es_stats_scan_time)) 161262306a36Sopenharmony_ci es_stats->es_stats_scan_time = (scan_time + 161362306a36Sopenharmony_ci es_stats->es_stats_scan_time*3) / 4; 161462306a36Sopenharmony_ci else 161562306a36Sopenharmony_ci es_stats->es_stats_scan_time = scan_time; 161662306a36Sopenharmony_ci if (scan_time > es_stats->es_stats_max_scan_time) 161762306a36Sopenharmony_ci es_stats->es_stats_max_scan_time = scan_time; 161862306a36Sopenharmony_ci if (likely(es_stats->es_stats_shrunk)) 161962306a36Sopenharmony_ci es_stats->es_stats_shrunk = (nr_shrunk + 162062306a36Sopenharmony_ci es_stats->es_stats_shrunk*3) / 4; 162162306a36Sopenharmony_ci else 162262306a36Sopenharmony_ci es_stats->es_stats_shrunk = nr_shrunk; 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, 162562306a36Sopenharmony_ci nr_skipped, retried); 162662306a36Sopenharmony_ci return nr_shrunk; 162762306a36Sopenharmony_ci} 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_cistatic unsigned long ext4_es_count(struct shrinker *shrink, 163062306a36Sopenharmony_ci struct shrink_control *sc) 163162306a36Sopenharmony_ci{ 163262306a36Sopenharmony_ci unsigned long nr; 163362306a36Sopenharmony_ci struct ext4_sb_info *sbi; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); 163662306a36Sopenharmony_ci nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); 163762306a36Sopenharmony_ci trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); 163862306a36Sopenharmony_ci return nr; 163962306a36Sopenharmony_ci} 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_cistatic unsigned long ext4_es_scan(struct shrinker *shrink, 164262306a36Sopenharmony_ci struct shrink_control *sc) 164362306a36Sopenharmony_ci{ 164462306a36Sopenharmony_ci struct ext4_sb_info *sbi = container_of(shrink, 164562306a36Sopenharmony_ci struct ext4_sb_info, s_es_shrinker); 164662306a36Sopenharmony_ci int nr_to_scan = sc->nr_to_scan; 164762306a36Sopenharmony_ci int ret, nr_shrunk; 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); 165062306a36Sopenharmony_ci trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); 165162306a36Sopenharmony_ci 165262306a36Sopenharmony_ci nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); 165562306a36Sopenharmony_ci trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); 165662306a36Sopenharmony_ci return nr_shrunk; 165762306a36Sopenharmony_ci} 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ciint ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v) 166062306a36Sopenharmony_ci{ 166162306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private); 166262306a36Sopenharmony_ci struct ext4_es_stats *es_stats = &sbi->s_es_stats; 166362306a36Sopenharmony_ci struct ext4_inode_info *ei, *max = NULL; 166462306a36Sopenharmony_ci unsigned int inode_cnt = 0; 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci if (v != SEQ_START_TOKEN) 166762306a36Sopenharmony_ci return 0; 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci /* here we just find an inode that has the max nr. of objects */ 167062306a36Sopenharmony_ci spin_lock(&sbi->s_es_lock); 167162306a36Sopenharmony_ci list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { 167262306a36Sopenharmony_ci inode_cnt++; 167362306a36Sopenharmony_ci if (max && max->i_es_all_nr < ei->i_es_all_nr) 167462306a36Sopenharmony_ci max = ei; 167562306a36Sopenharmony_ci else if (!max) 167662306a36Sopenharmony_ci max = ei; 167762306a36Sopenharmony_ci } 167862306a36Sopenharmony_ci spin_unlock(&sbi->s_es_lock); 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", 168162306a36Sopenharmony_ci percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), 168262306a36Sopenharmony_ci percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); 168362306a36Sopenharmony_ci seq_printf(seq, " %lld/%lld cache hits/misses\n", 168462306a36Sopenharmony_ci percpu_counter_sum_positive(&es_stats->es_stats_cache_hits), 168562306a36Sopenharmony_ci percpu_counter_sum_positive(&es_stats->es_stats_cache_misses)); 168662306a36Sopenharmony_ci if (inode_cnt) 168762306a36Sopenharmony_ci seq_printf(seq, " %d inodes on list\n", inode_cnt); 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_ci seq_printf(seq, "average:\n %llu us scan time\n", 169062306a36Sopenharmony_ci div_u64(es_stats->es_stats_scan_time, 1000)); 169162306a36Sopenharmony_ci seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk); 169262306a36Sopenharmony_ci if (inode_cnt) 169362306a36Sopenharmony_ci seq_printf(seq, 169462306a36Sopenharmony_ci "maximum:\n %lu inode (%u objects, %u reclaimable)\n" 169562306a36Sopenharmony_ci " %llu us max scan time\n", 169662306a36Sopenharmony_ci max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, 169762306a36Sopenharmony_ci div_u64(es_stats->es_stats_max_scan_time, 1000)); 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci return 0; 170062306a36Sopenharmony_ci} 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ciint ext4_es_register_shrinker(struct ext4_sb_info *sbi) 170362306a36Sopenharmony_ci{ 170462306a36Sopenharmony_ci int err; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci /* Make sure we have enough bits for physical block number */ 170762306a36Sopenharmony_ci BUILD_BUG_ON(ES_SHIFT < 48); 170862306a36Sopenharmony_ci INIT_LIST_HEAD(&sbi->s_es_list); 170962306a36Sopenharmony_ci sbi->s_es_nr_inode = 0; 171062306a36Sopenharmony_ci spin_lock_init(&sbi->s_es_lock); 171162306a36Sopenharmony_ci sbi->s_es_stats.es_stats_shrunk = 0; 171262306a36Sopenharmony_ci err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0, 171362306a36Sopenharmony_ci GFP_KERNEL); 171462306a36Sopenharmony_ci if (err) 171562306a36Sopenharmony_ci return err; 171662306a36Sopenharmony_ci err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0, 171762306a36Sopenharmony_ci GFP_KERNEL); 171862306a36Sopenharmony_ci if (err) 171962306a36Sopenharmony_ci goto err1; 172062306a36Sopenharmony_ci sbi->s_es_stats.es_stats_scan_time = 0; 172162306a36Sopenharmony_ci sbi->s_es_stats.es_stats_max_scan_time = 0; 172262306a36Sopenharmony_ci err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); 172362306a36Sopenharmony_ci if (err) 172462306a36Sopenharmony_ci goto err2; 172562306a36Sopenharmony_ci err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); 172662306a36Sopenharmony_ci if (err) 172762306a36Sopenharmony_ci goto err3; 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci sbi->s_es_shrinker.scan_objects = ext4_es_scan; 173062306a36Sopenharmony_ci sbi->s_es_shrinker.count_objects = ext4_es_count; 173162306a36Sopenharmony_ci sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 173262306a36Sopenharmony_ci err = register_shrinker(&sbi->s_es_shrinker, "ext4-es:%s", 173362306a36Sopenharmony_ci sbi->s_sb->s_id); 173462306a36Sopenharmony_ci if (err) 173562306a36Sopenharmony_ci goto err4; 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci return 0; 173862306a36Sopenharmony_cierr4: 173962306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); 174062306a36Sopenharmony_cierr3: 174162306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); 174262306a36Sopenharmony_cierr2: 174362306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses); 174462306a36Sopenharmony_cierr1: 174562306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits); 174662306a36Sopenharmony_ci return err; 174762306a36Sopenharmony_ci} 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_civoid ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) 175062306a36Sopenharmony_ci{ 175162306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits); 175262306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses); 175362306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); 175462306a36Sopenharmony_ci percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); 175562306a36Sopenharmony_ci unregister_shrinker(&sbi->s_es_shrinker); 175662306a36Sopenharmony_ci} 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci/* 175962306a36Sopenharmony_ci * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at 176062306a36Sopenharmony_ci * most *nr_to_scan extents, update *nr_to_scan accordingly. 176162306a36Sopenharmony_ci * 176262306a36Sopenharmony_ci * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan. 176362306a36Sopenharmony_ci * Increment *nr_shrunk by the number of reclaimed extents. Also update 176462306a36Sopenharmony_ci * ei->i_es_shrink_lblk to where we should continue scanning. 176562306a36Sopenharmony_ci */ 176662306a36Sopenharmony_cistatic int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, 176762306a36Sopenharmony_ci int *nr_to_scan, int *nr_shrunk) 176862306a36Sopenharmony_ci{ 176962306a36Sopenharmony_ci struct inode *inode = &ei->vfs_inode; 177062306a36Sopenharmony_ci struct ext4_es_tree *tree = &ei->i_es_tree; 177162306a36Sopenharmony_ci struct extent_status *es; 177262306a36Sopenharmony_ci struct rb_node *node; 177362306a36Sopenharmony_ci 177462306a36Sopenharmony_ci es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); 177562306a36Sopenharmony_ci if (!es) 177662306a36Sopenharmony_ci goto out_wrap; 177762306a36Sopenharmony_ci 177862306a36Sopenharmony_ci while (*nr_to_scan > 0) { 177962306a36Sopenharmony_ci if (es->es_lblk > end) { 178062306a36Sopenharmony_ci ei->i_es_shrink_lblk = end + 1; 178162306a36Sopenharmony_ci return 0; 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci (*nr_to_scan)--; 178562306a36Sopenharmony_ci node = rb_next(&es->rb_node); 178662306a36Sopenharmony_ci 178762306a36Sopenharmony_ci if (ext4_es_must_keep(es)) 178862306a36Sopenharmony_ci goto next; 178962306a36Sopenharmony_ci if (ext4_es_is_referenced(es)) { 179062306a36Sopenharmony_ci ext4_es_clear_referenced(es); 179162306a36Sopenharmony_ci goto next; 179262306a36Sopenharmony_ci } 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci rb_erase(&es->rb_node, &tree->root); 179562306a36Sopenharmony_ci ext4_es_free_extent(inode, es); 179662306a36Sopenharmony_ci (*nr_shrunk)++; 179762306a36Sopenharmony_cinext: 179862306a36Sopenharmony_ci if (!node) 179962306a36Sopenharmony_ci goto out_wrap; 180062306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 180162306a36Sopenharmony_ci } 180262306a36Sopenharmony_ci ei->i_es_shrink_lblk = es->es_lblk; 180362306a36Sopenharmony_ci return 1; 180462306a36Sopenharmony_ciout_wrap: 180562306a36Sopenharmony_ci ei->i_es_shrink_lblk = 0; 180662306a36Sopenharmony_ci return 0; 180762306a36Sopenharmony_ci} 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_cistatic int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan) 181062306a36Sopenharmony_ci{ 181162306a36Sopenharmony_ci struct inode *inode = &ei->vfs_inode; 181262306a36Sopenharmony_ci int nr_shrunk = 0; 181362306a36Sopenharmony_ci ext4_lblk_t start = ei->i_es_shrink_lblk; 181462306a36Sopenharmony_ci static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 181562306a36Sopenharmony_ci DEFAULT_RATELIMIT_BURST); 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci if (ei->i_es_shk_nr == 0) 181862306a36Sopenharmony_ci return 0; 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && 182162306a36Sopenharmony_ci __ratelimit(&_rs)) 182262306a36Sopenharmony_ci ext4_warning(inode->i_sb, "forced shrink of precached extents"); 182362306a36Sopenharmony_ci 182462306a36Sopenharmony_ci if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) && 182562306a36Sopenharmony_ci start != 0) 182662306a36Sopenharmony_ci es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk); 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci ei->i_es_tree.cache_es = NULL; 182962306a36Sopenharmony_ci return nr_shrunk; 183062306a36Sopenharmony_ci} 183162306a36Sopenharmony_ci 183262306a36Sopenharmony_ci/* 183362306a36Sopenharmony_ci * Called to support EXT4_IOC_CLEAR_ES_CACHE. We can only remove 183462306a36Sopenharmony_ci * discretionary entries from the extent status cache. (Some entries 183562306a36Sopenharmony_ci * must be present for proper operations.) 183662306a36Sopenharmony_ci */ 183762306a36Sopenharmony_civoid ext4_clear_inode_es(struct inode *inode) 183862306a36Sopenharmony_ci{ 183962306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 184062306a36Sopenharmony_ci struct extent_status *es; 184162306a36Sopenharmony_ci struct ext4_es_tree *tree; 184262306a36Sopenharmony_ci struct rb_node *node; 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci write_lock(&ei->i_es_lock); 184562306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_es_tree; 184662306a36Sopenharmony_ci tree->cache_es = NULL; 184762306a36Sopenharmony_ci node = rb_first(&tree->root); 184862306a36Sopenharmony_ci while (node) { 184962306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 185062306a36Sopenharmony_ci node = rb_next(node); 185162306a36Sopenharmony_ci if (!ext4_es_must_keep(es)) { 185262306a36Sopenharmony_ci rb_erase(&es->rb_node, &tree->root); 185362306a36Sopenharmony_ci ext4_es_free_extent(inode, es); 185462306a36Sopenharmony_ci } 185562306a36Sopenharmony_ci } 185662306a36Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED); 185762306a36Sopenharmony_ci write_unlock(&ei->i_es_lock); 185862306a36Sopenharmony_ci} 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci#ifdef ES_DEBUG__ 186162306a36Sopenharmony_cistatic void ext4_print_pending_tree(struct inode *inode) 186262306a36Sopenharmony_ci{ 186362306a36Sopenharmony_ci struct ext4_pending_tree *tree; 186462306a36Sopenharmony_ci struct rb_node *node; 186562306a36Sopenharmony_ci struct pending_reservation *pr; 186662306a36Sopenharmony_ci 186762306a36Sopenharmony_ci printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino); 186862306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_pending_tree; 186962306a36Sopenharmony_ci node = rb_first(&tree->root); 187062306a36Sopenharmony_ci while (node) { 187162306a36Sopenharmony_ci pr = rb_entry(node, struct pending_reservation, rb_node); 187262306a36Sopenharmony_ci printk(KERN_DEBUG " %u", pr->lclu); 187362306a36Sopenharmony_ci node = rb_next(node); 187462306a36Sopenharmony_ci } 187562306a36Sopenharmony_ci printk(KERN_DEBUG "\n"); 187662306a36Sopenharmony_ci} 187762306a36Sopenharmony_ci#else 187862306a36Sopenharmony_ci#define ext4_print_pending_tree(inode) 187962306a36Sopenharmony_ci#endif 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ciint __init ext4_init_pending(void) 188262306a36Sopenharmony_ci{ 188362306a36Sopenharmony_ci ext4_pending_cachep = KMEM_CACHE(pending_reservation, SLAB_RECLAIM_ACCOUNT); 188462306a36Sopenharmony_ci if (ext4_pending_cachep == NULL) 188562306a36Sopenharmony_ci return -ENOMEM; 188662306a36Sopenharmony_ci return 0; 188762306a36Sopenharmony_ci} 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_civoid ext4_exit_pending(void) 189062306a36Sopenharmony_ci{ 189162306a36Sopenharmony_ci kmem_cache_destroy(ext4_pending_cachep); 189262306a36Sopenharmony_ci} 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_civoid ext4_init_pending_tree(struct ext4_pending_tree *tree) 189562306a36Sopenharmony_ci{ 189662306a36Sopenharmony_ci tree->root = RB_ROOT; 189762306a36Sopenharmony_ci} 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci/* 190062306a36Sopenharmony_ci * __get_pending - retrieve a pointer to a pending reservation 190162306a36Sopenharmony_ci * 190262306a36Sopenharmony_ci * @inode - file containing the pending cluster reservation 190362306a36Sopenharmony_ci * @lclu - logical cluster of interest 190462306a36Sopenharmony_ci * 190562306a36Sopenharmony_ci * Returns a pointer to a pending reservation if it's a member of 190662306a36Sopenharmony_ci * the set, and NULL if not. Must be called holding i_es_lock. 190762306a36Sopenharmony_ci */ 190862306a36Sopenharmony_cistatic struct pending_reservation *__get_pending(struct inode *inode, 190962306a36Sopenharmony_ci ext4_lblk_t lclu) 191062306a36Sopenharmony_ci{ 191162306a36Sopenharmony_ci struct ext4_pending_tree *tree; 191262306a36Sopenharmony_ci struct rb_node *node; 191362306a36Sopenharmony_ci struct pending_reservation *pr = NULL; 191462306a36Sopenharmony_ci 191562306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_pending_tree; 191662306a36Sopenharmony_ci node = (&tree->root)->rb_node; 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci while (node) { 191962306a36Sopenharmony_ci pr = rb_entry(node, struct pending_reservation, rb_node); 192062306a36Sopenharmony_ci if (lclu < pr->lclu) 192162306a36Sopenharmony_ci node = node->rb_left; 192262306a36Sopenharmony_ci else if (lclu > pr->lclu) 192362306a36Sopenharmony_ci node = node->rb_right; 192462306a36Sopenharmony_ci else if (lclu == pr->lclu) 192562306a36Sopenharmony_ci return pr; 192662306a36Sopenharmony_ci } 192762306a36Sopenharmony_ci return NULL; 192862306a36Sopenharmony_ci} 192962306a36Sopenharmony_ci 193062306a36Sopenharmony_ci/* 193162306a36Sopenharmony_ci * __insert_pending - adds a pending cluster reservation to the set of 193262306a36Sopenharmony_ci * pending reservations 193362306a36Sopenharmony_ci * 193462306a36Sopenharmony_ci * @inode - file containing the cluster 193562306a36Sopenharmony_ci * @lblk - logical block in the cluster to be added 193662306a36Sopenharmony_ci * @prealloc - preallocated pending entry 193762306a36Sopenharmony_ci * 193862306a36Sopenharmony_ci * Returns 0 on successful insertion and -ENOMEM on failure. If the 193962306a36Sopenharmony_ci * pending reservation is already in the set, returns successfully. 194062306a36Sopenharmony_ci */ 194162306a36Sopenharmony_cistatic int __insert_pending(struct inode *inode, ext4_lblk_t lblk, 194262306a36Sopenharmony_ci struct pending_reservation **prealloc) 194362306a36Sopenharmony_ci{ 194462306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 194562306a36Sopenharmony_ci struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; 194662306a36Sopenharmony_ci struct rb_node **p = &tree->root.rb_node; 194762306a36Sopenharmony_ci struct rb_node *parent = NULL; 194862306a36Sopenharmony_ci struct pending_reservation *pr; 194962306a36Sopenharmony_ci ext4_lblk_t lclu; 195062306a36Sopenharmony_ci int ret = 0; 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci lclu = EXT4_B2C(sbi, lblk); 195362306a36Sopenharmony_ci /* search to find parent for insertion */ 195462306a36Sopenharmony_ci while (*p) { 195562306a36Sopenharmony_ci parent = *p; 195662306a36Sopenharmony_ci pr = rb_entry(parent, struct pending_reservation, rb_node); 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci if (lclu < pr->lclu) { 195962306a36Sopenharmony_ci p = &(*p)->rb_left; 196062306a36Sopenharmony_ci } else if (lclu > pr->lclu) { 196162306a36Sopenharmony_ci p = &(*p)->rb_right; 196262306a36Sopenharmony_ci } else { 196362306a36Sopenharmony_ci /* pending reservation already inserted */ 196462306a36Sopenharmony_ci goto out; 196562306a36Sopenharmony_ci } 196662306a36Sopenharmony_ci } 196762306a36Sopenharmony_ci 196862306a36Sopenharmony_ci if (likely(*prealloc == NULL)) { 196962306a36Sopenharmony_ci pr = __alloc_pending(false); 197062306a36Sopenharmony_ci if (!pr) { 197162306a36Sopenharmony_ci ret = -ENOMEM; 197262306a36Sopenharmony_ci goto out; 197362306a36Sopenharmony_ci } 197462306a36Sopenharmony_ci } else { 197562306a36Sopenharmony_ci pr = *prealloc; 197662306a36Sopenharmony_ci *prealloc = NULL; 197762306a36Sopenharmony_ci } 197862306a36Sopenharmony_ci pr->lclu = lclu; 197962306a36Sopenharmony_ci 198062306a36Sopenharmony_ci rb_link_node(&pr->rb_node, parent, p); 198162306a36Sopenharmony_ci rb_insert_color(&pr->rb_node, &tree->root); 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ciout: 198462306a36Sopenharmony_ci return ret; 198562306a36Sopenharmony_ci} 198662306a36Sopenharmony_ci 198762306a36Sopenharmony_ci/* 198862306a36Sopenharmony_ci * __remove_pending - removes a pending cluster reservation from the set 198962306a36Sopenharmony_ci * of pending reservations 199062306a36Sopenharmony_ci * 199162306a36Sopenharmony_ci * @inode - file containing the cluster 199262306a36Sopenharmony_ci * @lblk - logical block in the pending cluster reservation to be removed 199362306a36Sopenharmony_ci * 199462306a36Sopenharmony_ci * Returns successfully if pending reservation is not a member of the set. 199562306a36Sopenharmony_ci */ 199662306a36Sopenharmony_cistatic void __remove_pending(struct inode *inode, ext4_lblk_t lblk) 199762306a36Sopenharmony_ci{ 199862306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 199962306a36Sopenharmony_ci struct pending_reservation *pr; 200062306a36Sopenharmony_ci struct ext4_pending_tree *tree; 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci pr = __get_pending(inode, EXT4_B2C(sbi, lblk)); 200362306a36Sopenharmony_ci if (pr != NULL) { 200462306a36Sopenharmony_ci tree = &EXT4_I(inode)->i_pending_tree; 200562306a36Sopenharmony_ci rb_erase(&pr->rb_node, &tree->root); 200662306a36Sopenharmony_ci __free_pending(pr); 200762306a36Sopenharmony_ci } 200862306a36Sopenharmony_ci} 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_ci/* 201162306a36Sopenharmony_ci * ext4_remove_pending - removes a pending cluster reservation from the set 201262306a36Sopenharmony_ci * of pending reservations 201362306a36Sopenharmony_ci * 201462306a36Sopenharmony_ci * @inode - file containing the cluster 201562306a36Sopenharmony_ci * @lblk - logical block in the pending cluster reservation to be removed 201662306a36Sopenharmony_ci * 201762306a36Sopenharmony_ci * Locking for external use of __remove_pending. 201862306a36Sopenharmony_ci */ 201962306a36Sopenharmony_civoid ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk) 202062306a36Sopenharmony_ci{ 202162306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 202262306a36Sopenharmony_ci 202362306a36Sopenharmony_ci write_lock(&ei->i_es_lock); 202462306a36Sopenharmony_ci __remove_pending(inode, lblk); 202562306a36Sopenharmony_ci write_unlock(&ei->i_es_lock); 202662306a36Sopenharmony_ci} 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci/* 202962306a36Sopenharmony_ci * ext4_is_pending - determine whether a cluster has a pending reservation 203062306a36Sopenharmony_ci * on it 203162306a36Sopenharmony_ci * 203262306a36Sopenharmony_ci * @inode - file containing the cluster 203362306a36Sopenharmony_ci * @lblk - logical block in the cluster 203462306a36Sopenharmony_ci * 203562306a36Sopenharmony_ci * Returns true if there's a pending reservation for the cluster in the 203662306a36Sopenharmony_ci * set of pending reservations, and false if not. 203762306a36Sopenharmony_ci */ 203862306a36Sopenharmony_cibool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk) 203962306a36Sopenharmony_ci{ 204062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 204162306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 204262306a36Sopenharmony_ci bool ret; 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci read_lock(&ei->i_es_lock); 204562306a36Sopenharmony_ci ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL); 204662306a36Sopenharmony_ci read_unlock(&ei->i_es_lock); 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ci return ret; 204962306a36Sopenharmony_ci} 205062306a36Sopenharmony_ci 205162306a36Sopenharmony_ci/* 205262306a36Sopenharmony_ci * ext4_es_insert_delayed_block - adds a delayed block to the extents status 205362306a36Sopenharmony_ci * tree, adding a pending reservation where 205462306a36Sopenharmony_ci * needed 205562306a36Sopenharmony_ci * 205662306a36Sopenharmony_ci * @inode - file containing the newly added block 205762306a36Sopenharmony_ci * @lblk - logical block to be added 205862306a36Sopenharmony_ci * @allocated - indicates whether a physical cluster has been allocated for 205962306a36Sopenharmony_ci * the logical cluster that contains the block 206062306a36Sopenharmony_ci */ 206162306a36Sopenharmony_civoid ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, 206262306a36Sopenharmony_ci bool allocated) 206362306a36Sopenharmony_ci{ 206462306a36Sopenharmony_ci struct extent_status newes; 206562306a36Sopenharmony_ci int err1 = 0, err2 = 0, err3 = 0; 206662306a36Sopenharmony_ci struct extent_status *es1 = NULL; 206762306a36Sopenharmony_ci struct extent_status *es2 = NULL; 206862306a36Sopenharmony_ci struct pending_reservation *pr = NULL; 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 207162306a36Sopenharmony_ci return; 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci es_debug("add [%u/1) delayed to extent status tree of inode %lu\n", 207462306a36Sopenharmony_ci lblk, inode->i_ino); 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_ci newes.es_lblk = lblk; 207762306a36Sopenharmony_ci newes.es_len = 1; 207862306a36Sopenharmony_ci ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED); 207962306a36Sopenharmony_ci trace_ext4_es_insert_delayed_block(inode, &newes, allocated); 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci ext4_es_insert_extent_check(inode, &newes); 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ciretry: 208462306a36Sopenharmony_ci if (err1 && !es1) 208562306a36Sopenharmony_ci es1 = __es_alloc_extent(true); 208662306a36Sopenharmony_ci if ((err1 || err2) && !es2) 208762306a36Sopenharmony_ci es2 = __es_alloc_extent(true); 208862306a36Sopenharmony_ci if ((err1 || err2 || err3) && allocated && !pr) 208962306a36Sopenharmony_ci pr = __alloc_pending(true); 209062306a36Sopenharmony_ci write_lock(&EXT4_I(inode)->i_es_lock); 209162306a36Sopenharmony_ci 209262306a36Sopenharmony_ci err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); 209362306a36Sopenharmony_ci if (err1 != 0) 209462306a36Sopenharmony_ci goto error; 209562306a36Sopenharmony_ci /* Free preallocated extent if it didn't get used. */ 209662306a36Sopenharmony_ci if (es1) { 209762306a36Sopenharmony_ci if (!es1->es_len) 209862306a36Sopenharmony_ci __es_free_extent(es1); 209962306a36Sopenharmony_ci es1 = NULL; 210062306a36Sopenharmony_ci } 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci err2 = __es_insert_extent(inode, &newes, es2); 210362306a36Sopenharmony_ci if (err2 != 0) 210462306a36Sopenharmony_ci goto error; 210562306a36Sopenharmony_ci /* Free preallocated extent if it didn't get used. */ 210662306a36Sopenharmony_ci if (es2) { 210762306a36Sopenharmony_ci if (!es2->es_len) 210862306a36Sopenharmony_ci __es_free_extent(es2); 210962306a36Sopenharmony_ci es2 = NULL; 211062306a36Sopenharmony_ci } 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_ci if (allocated) { 211362306a36Sopenharmony_ci err3 = __insert_pending(inode, lblk, &pr); 211462306a36Sopenharmony_ci if (err3 != 0) 211562306a36Sopenharmony_ci goto error; 211662306a36Sopenharmony_ci if (pr) { 211762306a36Sopenharmony_ci __free_pending(pr); 211862306a36Sopenharmony_ci pr = NULL; 211962306a36Sopenharmony_ci } 212062306a36Sopenharmony_ci } 212162306a36Sopenharmony_cierror: 212262306a36Sopenharmony_ci write_unlock(&EXT4_I(inode)->i_es_lock); 212362306a36Sopenharmony_ci if (err1 || err2 || err3) 212462306a36Sopenharmony_ci goto retry; 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci ext4_es_print_tree(inode); 212762306a36Sopenharmony_ci ext4_print_pending_tree(inode); 212862306a36Sopenharmony_ci return; 212962306a36Sopenharmony_ci} 213062306a36Sopenharmony_ci 213162306a36Sopenharmony_ci/* 213262306a36Sopenharmony_ci * __es_delayed_clu - count number of clusters containing blocks that 213362306a36Sopenharmony_ci * are delayed only 213462306a36Sopenharmony_ci * 213562306a36Sopenharmony_ci * @inode - file containing block range 213662306a36Sopenharmony_ci * @start - logical block defining start of range 213762306a36Sopenharmony_ci * @end - logical block defining end of range 213862306a36Sopenharmony_ci * 213962306a36Sopenharmony_ci * Returns the number of clusters containing only delayed (not delayed 214062306a36Sopenharmony_ci * and unwritten) blocks in the range specified by @start and @end. Any 214162306a36Sopenharmony_ci * cluster or part of a cluster within the range and containing a delayed 214262306a36Sopenharmony_ci * and not unwritten block within the range is counted as a whole cluster. 214362306a36Sopenharmony_ci */ 214462306a36Sopenharmony_cistatic unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start, 214562306a36Sopenharmony_ci ext4_lblk_t end) 214662306a36Sopenharmony_ci{ 214762306a36Sopenharmony_ci struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 214862306a36Sopenharmony_ci struct extent_status *es; 214962306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 215062306a36Sopenharmony_ci struct rb_node *node; 215162306a36Sopenharmony_ci ext4_lblk_t first_lclu, last_lclu; 215262306a36Sopenharmony_ci unsigned long long last_counted_lclu; 215362306a36Sopenharmony_ci unsigned int n = 0; 215462306a36Sopenharmony_ci 215562306a36Sopenharmony_ci /* guaranteed to be unequal to any ext4_lblk_t value */ 215662306a36Sopenharmony_ci last_counted_lclu = ~0ULL; 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci es = __es_tree_search(&tree->root, start); 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci while (es && (es->es_lblk <= end)) { 216162306a36Sopenharmony_ci if (ext4_es_is_delonly(es)) { 216262306a36Sopenharmony_ci if (es->es_lblk <= start) 216362306a36Sopenharmony_ci first_lclu = EXT4_B2C(sbi, start); 216462306a36Sopenharmony_ci else 216562306a36Sopenharmony_ci first_lclu = EXT4_B2C(sbi, es->es_lblk); 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci if (ext4_es_end(es) >= end) 216862306a36Sopenharmony_ci last_lclu = EXT4_B2C(sbi, end); 216962306a36Sopenharmony_ci else 217062306a36Sopenharmony_ci last_lclu = EXT4_B2C(sbi, ext4_es_end(es)); 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci if (first_lclu == last_counted_lclu) 217362306a36Sopenharmony_ci n += last_lclu - first_lclu; 217462306a36Sopenharmony_ci else 217562306a36Sopenharmony_ci n += last_lclu - first_lclu + 1; 217662306a36Sopenharmony_ci last_counted_lclu = last_lclu; 217762306a36Sopenharmony_ci } 217862306a36Sopenharmony_ci node = rb_next(&es->rb_node); 217962306a36Sopenharmony_ci if (!node) 218062306a36Sopenharmony_ci break; 218162306a36Sopenharmony_ci es = rb_entry(node, struct extent_status, rb_node); 218262306a36Sopenharmony_ci } 218362306a36Sopenharmony_ci 218462306a36Sopenharmony_ci return n; 218562306a36Sopenharmony_ci} 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_ci/* 218862306a36Sopenharmony_ci * ext4_es_delayed_clu - count number of clusters containing blocks that 218962306a36Sopenharmony_ci * are both delayed and unwritten 219062306a36Sopenharmony_ci * 219162306a36Sopenharmony_ci * @inode - file containing block range 219262306a36Sopenharmony_ci * @lblk - logical block defining start of range 219362306a36Sopenharmony_ci * @len - number of blocks in range 219462306a36Sopenharmony_ci * 219562306a36Sopenharmony_ci * Locking for external use of __es_delayed_clu(). 219662306a36Sopenharmony_ci */ 219762306a36Sopenharmony_ciunsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, 219862306a36Sopenharmony_ci ext4_lblk_t len) 219962306a36Sopenharmony_ci{ 220062306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 220162306a36Sopenharmony_ci ext4_lblk_t end; 220262306a36Sopenharmony_ci unsigned int n; 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ci if (len == 0) 220562306a36Sopenharmony_ci return 0; 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci end = lblk + len - 1; 220862306a36Sopenharmony_ci WARN_ON(end < lblk); 220962306a36Sopenharmony_ci 221062306a36Sopenharmony_ci read_lock(&ei->i_es_lock); 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci n = __es_delayed_clu(inode, lblk, end); 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci read_unlock(&ei->i_es_lock); 221562306a36Sopenharmony_ci 221662306a36Sopenharmony_ci return n; 221762306a36Sopenharmony_ci} 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci/* 222062306a36Sopenharmony_ci * __revise_pending - makes, cancels, or leaves unchanged pending cluster 222162306a36Sopenharmony_ci * reservations for a specified block range depending 222262306a36Sopenharmony_ci * upon the presence or absence of delayed blocks 222362306a36Sopenharmony_ci * outside the range within clusters at the ends of the 222462306a36Sopenharmony_ci * range 222562306a36Sopenharmony_ci * 222662306a36Sopenharmony_ci * @inode - file containing the range 222762306a36Sopenharmony_ci * @lblk - logical block defining the start of range 222862306a36Sopenharmony_ci * @len - length of range in blocks 222962306a36Sopenharmony_ci * @prealloc - preallocated pending entry 223062306a36Sopenharmony_ci * 223162306a36Sopenharmony_ci * Used after a newly allocated extent is added to the extents status tree. 223262306a36Sopenharmony_ci * Requires that the extents in the range have either written or unwritten 223362306a36Sopenharmony_ci * status. Must be called while holding i_es_lock. 223462306a36Sopenharmony_ci */ 223562306a36Sopenharmony_cistatic int __revise_pending(struct inode *inode, ext4_lblk_t lblk, 223662306a36Sopenharmony_ci ext4_lblk_t len, 223762306a36Sopenharmony_ci struct pending_reservation **prealloc) 223862306a36Sopenharmony_ci{ 223962306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 224062306a36Sopenharmony_ci ext4_lblk_t end = lblk + len - 1; 224162306a36Sopenharmony_ci ext4_lblk_t first, last; 224262306a36Sopenharmony_ci bool f_del = false, l_del = false; 224362306a36Sopenharmony_ci int ret = 0; 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci if (len == 0) 224662306a36Sopenharmony_ci return 0; 224762306a36Sopenharmony_ci 224862306a36Sopenharmony_ci /* 224962306a36Sopenharmony_ci * Two cases - block range within single cluster and block range 225062306a36Sopenharmony_ci * spanning two or more clusters. Note that a cluster belonging 225162306a36Sopenharmony_ci * to a range starting and/or ending on a cluster boundary is treated 225262306a36Sopenharmony_ci * as if it does not contain a delayed extent. The new range may 225362306a36Sopenharmony_ci * have allocated space for previously delayed blocks out to the 225462306a36Sopenharmony_ci * cluster boundary, requiring that any pre-existing pending 225562306a36Sopenharmony_ci * reservation be canceled. Because this code only looks at blocks 225662306a36Sopenharmony_ci * outside the range, it should revise pending reservations 225762306a36Sopenharmony_ci * correctly even if the extent represented by the range can't be 225862306a36Sopenharmony_ci * inserted in the extents status tree due to ENOSPC. 225962306a36Sopenharmony_ci */ 226062306a36Sopenharmony_ci 226162306a36Sopenharmony_ci if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) { 226262306a36Sopenharmony_ci first = EXT4_LBLK_CMASK(sbi, lblk); 226362306a36Sopenharmony_ci if (first != lblk) 226462306a36Sopenharmony_ci f_del = __es_scan_range(inode, &ext4_es_is_delonly, 226562306a36Sopenharmony_ci first, lblk - 1); 226662306a36Sopenharmony_ci if (f_del) { 226762306a36Sopenharmony_ci ret = __insert_pending(inode, first, prealloc); 226862306a36Sopenharmony_ci if (ret < 0) 226962306a36Sopenharmony_ci goto out; 227062306a36Sopenharmony_ci } else { 227162306a36Sopenharmony_ci last = EXT4_LBLK_CMASK(sbi, end) + 227262306a36Sopenharmony_ci sbi->s_cluster_ratio - 1; 227362306a36Sopenharmony_ci if (last != end) 227462306a36Sopenharmony_ci l_del = __es_scan_range(inode, 227562306a36Sopenharmony_ci &ext4_es_is_delonly, 227662306a36Sopenharmony_ci end + 1, last); 227762306a36Sopenharmony_ci if (l_del) { 227862306a36Sopenharmony_ci ret = __insert_pending(inode, last, prealloc); 227962306a36Sopenharmony_ci if (ret < 0) 228062306a36Sopenharmony_ci goto out; 228162306a36Sopenharmony_ci } else 228262306a36Sopenharmony_ci __remove_pending(inode, last); 228362306a36Sopenharmony_ci } 228462306a36Sopenharmony_ci } else { 228562306a36Sopenharmony_ci first = EXT4_LBLK_CMASK(sbi, lblk); 228662306a36Sopenharmony_ci if (first != lblk) 228762306a36Sopenharmony_ci f_del = __es_scan_range(inode, &ext4_es_is_delonly, 228862306a36Sopenharmony_ci first, lblk - 1); 228962306a36Sopenharmony_ci if (f_del) { 229062306a36Sopenharmony_ci ret = __insert_pending(inode, first, prealloc); 229162306a36Sopenharmony_ci if (ret < 0) 229262306a36Sopenharmony_ci goto out; 229362306a36Sopenharmony_ci } else 229462306a36Sopenharmony_ci __remove_pending(inode, first); 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_ci last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; 229762306a36Sopenharmony_ci if (last != end) 229862306a36Sopenharmony_ci l_del = __es_scan_range(inode, &ext4_es_is_delonly, 229962306a36Sopenharmony_ci end + 1, last); 230062306a36Sopenharmony_ci if (l_del) { 230162306a36Sopenharmony_ci ret = __insert_pending(inode, last, prealloc); 230262306a36Sopenharmony_ci if (ret < 0) 230362306a36Sopenharmony_ci goto out; 230462306a36Sopenharmony_ci } else 230562306a36Sopenharmony_ci __remove_pending(inode, last); 230662306a36Sopenharmony_ci } 230762306a36Sopenharmony_ciout: 230862306a36Sopenharmony_ci return ret; 230962306a36Sopenharmony_ci} 2310