18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * This file is part of UBIFS. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2006-2008 Nokia Corporation. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Authors: Artem Bityutskiy (Битюцкий Артём) 88c2ecf20Sopenharmony_ci * Adrian Hunter 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci/* 128c2ecf20Sopenharmony_ci * This file implements UBIFS shrinker which evicts clean znodes from the TNC 138c2ecf20Sopenharmony_ci * tree when Linux VM needs more RAM. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * We do not implement any LRU lists to find oldest znodes to free because it 168c2ecf20Sopenharmony_ci * would add additional overhead to the file system fast paths. So the shrinker 178c2ecf20Sopenharmony_ci * just walks the TNC tree when searching for znodes to free. 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * If the root of a TNC sub-tree is clean and old enough, then the children are 208c2ecf20Sopenharmony_ci * also clean and old enough. So the shrinker walks the TNC in level order and 218c2ecf20Sopenharmony_ci * dumps entire sub-trees. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * The age of znodes is just the time-stamp when they were last looked at. 248c2ecf20Sopenharmony_ci * The current shrinker first tries to evict old znodes, then young ones. 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * Since the shrinker is global, it has to protect against races with FS 278c2ecf20Sopenharmony_ci * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. 288c2ecf20Sopenharmony_ci */ 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#include "ubifs.h" 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* List of all UBIFS file-system instances */ 338c2ecf20Sopenharmony_ciLIST_HEAD(ubifs_infos); 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci/* 368c2ecf20Sopenharmony_ci * We number each shrinker run and record the number on the ubifs_info structure 378c2ecf20Sopenharmony_ci * so that we can easily work out which ubifs_info structures have already been 388c2ecf20Sopenharmony_ci * done by the current run. 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_cistatic unsigned int shrinker_run_no; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci/* Protects 'ubifs_infos' list */ 438c2ecf20Sopenharmony_ciDEFINE_SPINLOCK(ubifs_infos_lock); 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/* Global clean znode counter (for all mounted UBIFS instances) */ 468c2ecf20Sopenharmony_ciatomic_long_t ubifs_clean_zn_cnt; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/** 498c2ecf20Sopenharmony_ci * shrink_tnc - shrink TNC tree. 508c2ecf20Sopenharmony_ci * @c: UBIFS file-system description object 518c2ecf20Sopenharmony_ci * @nr: number of znodes to free 528c2ecf20Sopenharmony_ci * @age: the age of znodes to free 538c2ecf20Sopenharmony_ci * @contention: if any contention, this is set to %1 548c2ecf20Sopenharmony_ci * 558c2ecf20Sopenharmony_ci * This function traverses TNC tree and frees clean znodes. It does not free 568c2ecf20Sopenharmony_ci * clean znodes which younger then @age. Returns number of freed znodes. 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_cistatic int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci int total_freed = 0; 618c2ecf20Sopenharmony_ci struct ubifs_znode *znode, *zprev; 628c2ecf20Sopenharmony_ci time64_t time = ktime_get_seconds(); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci ubifs_assert(c, mutex_is_locked(&c->umount_mutex)); 658c2ecf20Sopenharmony_ci ubifs_assert(c, mutex_is_locked(&c->tnc_mutex)); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) 688c2ecf20Sopenharmony_ci return 0; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci /* 718c2ecf20Sopenharmony_ci * Traverse the TNC tree in levelorder manner, so that it is possible 728c2ecf20Sopenharmony_ci * to destroy large sub-trees. Indeed, if a znode is old, then all its 738c2ecf20Sopenharmony_ci * children are older or of the same age. 748c2ecf20Sopenharmony_ci * 758c2ecf20Sopenharmony_ci * Note, we are holding 'c->tnc_mutex', so we do not have to lock the 768c2ecf20Sopenharmony_ci * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is 778c2ecf20Sopenharmony_ci * changed only when the 'c->tnc_mutex' is held. 788c2ecf20Sopenharmony_ci */ 798c2ecf20Sopenharmony_ci zprev = NULL; 808c2ecf20Sopenharmony_ci znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); 818c2ecf20Sopenharmony_ci while (znode && total_freed < nr && 828c2ecf20Sopenharmony_ci atomic_long_read(&c->clean_zn_cnt) > 0) { 838c2ecf20Sopenharmony_ci int freed; 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci /* 868c2ecf20Sopenharmony_ci * If the znode is clean, but it is in the 'c->cnext' list, this 878c2ecf20Sopenharmony_ci * means that this znode has just been written to flash as a 888c2ecf20Sopenharmony_ci * part of commit and was marked clean. They will be removed 898c2ecf20Sopenharmony_ci * from the list at end commit. We cannot change the list, 908c2ecf20Sopenharmony_ci * because it is not protected by any mutex (design decision to 918c2ecf20Sopenharmony_ci * make commit really independent and parallel to main I/O). So 928c2ecf20Sopenharmony_ci * we just skip these znodes. 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * Note, the 'clean_zn_cnt' counters are not updated until 958c2ecf20Sopenharmony_ci * after the commit, so the UBIFS shrinker does not report 968c2ecf20Sopenharmony_ci * the znodes which are in the 'c->cnext' list as freeable. 978c2ecf20Sopenharmony_ci * 988c2ecf20Sopenharmony_ci * Also note, if the root of a sub-tree is not in 'c->cnext', 998c2ecf20Sopenharmony_ci * then the whole sub-tree is not in 'c->cnext' as well, so it 1008c2ecf20Sopenharmony_ci * is safe to dump whole sub-tree. 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci if (znode->cnext) { 1048c2ecf20Sopenharmony_ci /* 1058c2ecf20Sopenharmony_ci * Very soon these znodes will be removed from the list 1068c2ecf20Sopenharmony_ci * and become freeable. 1078c2ecf20Sopenharmony_ci */ 1088c2ecf20Sopenharmony_ci *contention = 1; 1098c2ecf20Sopenharmony_ci } else if (!ubifs_zn_dirty(znode) && 1108c2ecf20Sopenharmony_ci abs(time - znode->time) >= age) { 1118c2ecf20Sopenharmony_ci if (znode->parent) 1128c2ecf20Sopenharmony_ci znode->parent->zbranch[znode->iip].znode = NULL; 1138c2ecf20Sopenharmony_ci else 1148c2ecf20Sopenharmony_ci c->zroot.znode = NULL; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci freed = ubifs_destroy_tnc_subtree(c, znode); 1178c2ecf20Sopenharmony_ci atomic_long_sub(freed, &ubifs_clean_zn_cnt); 1188c2ecf20Sopenharmony_ci atomic_long_sub(freed, &c->clean_zn_cnt); 1198c2ecf20Sopenharmony_ci total_freed += freed; 1208c2ecf20Sopenharmony_ci znode = zprev; 1218c2ecf20Sopenharmony_ci } 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci if (unlikely(!c->zroot.znode)) 1248c2ecf20Sopenharmony_ci break; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci zprev = znode; 1278c2ecf20Sopenharmony_ci znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); 1288c2ecf20Sopenharmony_ci cond_resched(); 1298c2ecf20Sopenharmony_ci } 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci return total_freed; 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/** 1358c2ecf20Sopenharmony_ci * shrink_tnc_trees - shrink UBIFS TNC trees. 1368c2ecf20Sopenharmony_ci * @nr: number of znodes to free 1378c2ecf20Sopenharmony_ci * @age: the age of znodes to free 1388c2ecf20Sopenharmony_ci * @contention: if any contention, this is set to %1 1398c2ecf20Sopenharmony_ci * 1408c2ecf20Sopenharmony_ci * This function walks the list of mounted UBIFS file-systems and frees clean 1418c2ecf20Sopenharmony_ci * znodes which are older than @age, until at least @nr znodes are freed. 1428c2ecf20Sopenharmony_ci * Returns the number of freed znodes. 1438c2ecf20Sopenharmony_ci */ 1448c2ecf20Sopenharmony_cistatic int shrink_tnc_trees(int nr, int age, int *contention) 1458c2ecf20Sopenharmony_ci{ 1468c2ecf20Sopenharmony_ci struct ubifs_info *c; 1478c2ecf20Sopenharmony_ci struct list_head *p; 1488c2ecf20Sopenharmony_ci unsigned int run_no; 1498c2ecf20Sopenharmony_ci int freed = 0; 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci spin_lock(&ubifs_infos_lock); 1528c2ecf20Sopenharmony_ci do { 1538c2ecf20Sopenharmony_ci run_no = ++shrinker_run_no; 1548c2ecf20Sopenharmony_ci } while (run_no == 0); 1558c2ecf20Sopenharmony_ci /* Iterate over all mounted UBIFS file-systems and try to shrink them */ 1568c2ecf20Sopenharmony_ci p = ubifs_infos.next; 1578c2ecf20Sopenharmony_ci while (p != &ubifs_infos) { 1588c2ecf20Sopenharmony_ci c = list_entry(p, struct ubifs_info, infos_list); 1598c2ecf20Sopenharmony_ci /* 1608c2ecf20Sopenharmony_ci * We move the ones we do to the end of the list, so we stop 1618c2ecf20Sopenharmony_ci * when we see one we have already done. 1628c2ecf20Sopenharmony_ci */ 1638c2ecf20Sopenharmony_ci if (c->shrinker_run_no == run_no) 1648c2ecf20Sopenharmony_ci break; 1658c2ecf20Sopenharmony_ci if (!mutex_trylock(&c->umount_mutex)) { 1668c2ecf20Sopenharmony_ci /* Some un-mount is in progress, try next FS */ 1678c2ecf20Sopenharmony_ci *contention = 1; 1688c2ecf20Sopenharmony_ci p = p->next; 1698c2ecf20Sopenharmony_ci continue; 1708c2ecf20Sopenharmony_ci } 1718c2ecf20Sopenharmony_ci /* 1728c2ecf20Sopenharmony_ci * We're holding 'c->umount_mutex', so the file-system won't go 1738c2ecf20Sopenharmony_ci * away. 1748c2ecf20Sopenharmony_ci */ 1758c2ecf20Sopenharmony_ci if (!mutex_trylock(&c->tnc_mutex)) { 1768c2ecf20Sopenharmony_ci mutex_unlock(&c->umount_mutex); 1778c2ecf20Sopenharmony_ci *contention = 1; 1788c2ecf20Sopenharmony_ci p = p->next; 1798c2ecf20Sopenharmony_ci continue; 1808c2ecf20Sopenharmony_ci } 1818c2ecf20Sopenharmony_ci spin_unlock(&ubifs_infos_lock); 1828c2ecf20Sopenharmony_ci /* 1838c2ecf20Sopenharmony_ci * OK, now we have TNC locked, the file-system cannot go away - 1848c2ecf20Sopenharmony_ci * it is safe to reap the cache. 1858c2ecf20Sopenharmony_ci */ 1868c2ecf20Sopenharmony_ci c->shrinker_run_no = run_no; 1878c2ecf20Sopenharmony_ci freed += shrink_tnc(c, nr, age, contention); 1888c2ecf20Sopenharmony_ci mutex_unlock(&c->tnc_mutex); 1898c2ecf20Sopenharmony_ci spin_lock(&ubifs_infos_lock); 1908c2ecf20Sopenharmony_ci /* Get the next list element before we move this one */ 1918c2ecf20Sopenharmony_ci p = p->next; 1928c2ecf20Sopenharmony_ci /* 1938c2ecf20Sopenharmony_ci * Move this one to the end of the list to provide some 1948c2ecf20Sopenharmony_ci * fairness. 1958c2ecf20Sopenharmony_ci */ 1968c2ecf20Sopenharmony_ci list_move_tail(&c->infos_list, &ubifs_infos); 1978c2ecf20Sopenharmony_ci mutex_unlock(&c->umount_mutex); 1988c2ecf20Sopenharmony_ci if (freed >= nr) 1998c2ecf20Sopenharmony_ci break; 2008c2ecf20Sopenharmony_ci } 2018c2ecf20Sopenharmony_ci spin_unlock(&ubifs_infos_lock); 2028c2ecf20Sopenharmony_ci return freed; 2038c2ecf20Sopenharmony_ci} 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci/** 2068c2ecf20Sopenharmony_ci * kick_a_thread - kick a background thread to start commit. 2078c2ecf20Sopenharmony_ci * 2088c2ecf20Sopenharmony_ci * This function kicks a background thread to start background commit. Returns 2098c2ecf20Sopenharmony_ci * %-1 if a thread was kicked or there is another reason to assume the memory 2108c2ecf20Sopenharmony_ci * will soon be freed or become freeable. If there are no dirty znodes, returns 2118c2ecf20Sopenharmony_ci * %0. 2128c2ecf20Sopenharmony_ci */ 2138c2ecf20Sopenharmony_cistatic int kick_a_thread(void) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci int i; 2168c2ecf20Sopenharmony_ci struct ubifs_info *c; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci /* 2198c2ecf20Sopenharmony_ci * Iterate over all mounted UBIFS file-systems and find out if there is 2208c2ecf20Sopenharmony_ci * already an ongoing commit operation there. If no, then iterate for 2218c2ecf20Sopenharmony_ci * the second time and initiate background commit. 2228c2ecf20Sopenharmony_ci */ 2238c2ecf20Sopenharmony_ci spin_lock(&ubifs_infos_lock); 2248c2ecf20Sopenharmony_ci for (i = 0; i < 2; i++) { 2258c2ecf20Sopenharmony_ci list_for_each_entry(c, &ubifs_infos, infos_list) { 2268c2ecf20Sopenharmony_ci long dirty_zn_cnt; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (!mutex_trylock(&c->umount_mutex)) { 2298c2ecf20Sopenharmony_ci /* 2308c2ecf20Sopenharmony_ci * Some un-mount is in progress, it will 2318c2ecf20Sopenharmony_ci * certainly free memory, so just return. 2328c2ecf20Sopenharmony_ci */ 2338c2ecf20Sopenharmony_ci spin_unlock(&ubifs_infos_lock); 2348c2ecf20Sopenharmony_ci return -1; 2358c2ecf20Sopenharmony_ci } 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || 2408c2ecf20Sopenharmony_ci c->ro_mount || c->ro_error) { 2418c2ecf20Sopenharmony_ci mutex_unlock(&c->umount_mutex); 2428c2ecf20Sopenharmony_ci continue; 2438c2ecf20Sopenharmony_ci } 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci if (c->cmt_state != COMMIT_RESTING) { 2468c2ecf20Sopenharmony_ci spin_unlock(&ubifs_infos_lock); 2478c2ecf20Sopenharmony_ci mutex_unlock(&c->umount_mutex); 2488c2ecf20Sopenharmony_ci return -1; 2498c2ecf20Sopenharmony_ci } 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci if (i == 1) { 2528c2ecf20Sopenharmony_ci list_move_tail(&c->infos_list, &ubifs_infos); 2538c2ecf20Sopenharmony_ci spin_unlock(&ubifs_infos_lock); 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci ubifs_request_bg_commit(c); 2568c2ecf20Sopenharmony_ci mutex_unlock(&c->umount_mutex); 2578c2ecf20Sopenharmony_ci return -1; 2588c2ecf20Sopenharmony_ci } 2598c2ecf20Sopenharmony_ci mutex_unlock(&c->umount_mutex); 2608c2ecf20Sopenharmony_ci } 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci spin_unlock(&ubifs_infos_lock); 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci return 0; 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ciunsigned long ubifs_shrink_count(struct shrinker *shrink, 2688c2ecf20Sopenharmony_ci struct shrink_control *sc) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci /* 2738c2ecf20Sopenharmony_ci * Due to the way UBIFS updates the clean znode counter it may 2748c2ecf20Sopenharmony_ci * temporarily be negative. 2758c2ecf20Sopenharmony_ci */ 2768c2ecf20Sopenharmony_ci return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ciunsigned long ubifs_shrink_scan(struct shrinker *shrink, 2808c2ecf20Sopenharmony_ci struct shrink_control *sc) 2818c2ecf20Sopenharmony_ci{ 2828c2ecf20Sopenharmony_ci unsigned long nr = sc->nr_to_scan; 2838c2ecf20Sopenharmony_ci int contention = 0; 2848c2ecf20Sopenharmony_ci unsigned long freed; 2858c2ecf20Sopenharmony_ci long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci if (!clean_zn_cnt) { 2888c2ecf20Sopenharmony_ci /* 2898c2ecf20Sopenharmony_ci * No clean znodes, nothing to reap. All we can do in this case 2908c2ecf20Sopenharmony_ci * is to kick background threads to start commit, which will 2918c2ecf20Sopenharmony_ci * probably make clean znodes which, in turn, will be freeable. 2928c2ecf20Sopenharmony_ci * And we return -1 which means will make VM call us again 2938c2ecf20Sopenharmony_ci * later. 2948c2ecf20Sopenharmony_ci */ 2958c2ecf20Sopenharmony_ci dbg_tnc("no clean znodes, kick a thread"); 2968c2ecf20Sopenharmony_ci return kick_a_thread(); 2978c2ecf20Sopenharmony_ci } 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); 3008c2ecf20Sopenharmony_ci if (freed >= nr) 3018c2ecf20Sopenharmony_ci goto out; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci dbg_tnc("not enough old znodes, try to free young ones"); 3048c2ecf20Sopenharmony_ci freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); 3058c2ecf20Sopenharmony_ci if (freed >= nr) 3068c2ecf20Sopenharmony_ci goto out; 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci dbg_tnc("not enough young znodes, free all"); 3098c2ecf20Sopenharmony_ci freed += shrink_tnc_trees(nr - freed, 0, &contention); 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci if (!freed && contention) { 3128c2ecf20Sopenharmony_ci dbg_tnc("freed nothing, but contention"); 3138c2ecf20Sopenharmony_ci return SHRINK_STOP; 3148c2ecf20Sopenharmony_ci } 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ciout: 3178c2ecf20Sopenharmony_ci dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); 3188c2ecf20Sopenharmony_ci return freed; 3198c2ecf20Sopenharmony_ci} 320