xref: /kernel/linux/linux-5.10/fs/ubifs/shrinker.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * This file is part of UBIFS.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2006-2008 Nokia Corporation.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Authors: Artem Bityutskiy (Битюцкий Артём)
88c2ecf20Sopenharmony_ci *          Adrian Hunter
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci/*
128c2ecf20Sopenharmony_ci * This file implements UBIFS shrinker which evicts clean znodes from the TNC
138c2ecf20Sopenharmony_ci * tree when Linux VM needs more RAM.
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci * We do not implement any LRU lists to find oldest znodes to free because it
168c2ecf20Sopenharmony_ci * would add additional overhead to the file system fast paths. So the shrinker
178c2ecf20Sopenharmony_ci * just walks the TNC tree when searching for znodes to free.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * If the root of a TNC sub-tree is clean and old enough, then the children are
208c2ecf20Sopenharmony_ci * also clean and old enough. So the shrinker walks the TNC in level order and
218c2ecf20Sopenharmony_ci * dumps entire sub-trees.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * The age of znodes is just the time-stamp when they were last looked at.
248c2ecf20Sopenharmony_ci * The current shrinker first tries to evict old znodes, then young ones.
258c2ecf20Sopenharmony_ci *
268c2ecf20Sopenharmony_ci * Since the shrinker is global, it has to protect against races with FS
278c2ecf20Sopenharmony_ci * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
288c2ecf20Sopenharmony_ci */
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#include "ubifs.h"
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/* List of all UBIFS file-system instances */
338c2ecf20Sopenharmony_ciLIST_HEAD(ubifs_infos);
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci/*
368c2ecf20Sopenharmony_ci * We number each shrinker run and record the number on the ubifs_info structure
378c2ecf20Sopenharmony_ci * so that we can easily work out which ubifs_info structures have already been
388c2ecf20Sopenharmony_ci * done by the current run.
398c2ecf20Sopenharmony_ci */
408c2ecf20Sopenharmony_cistatic unsigned int shrinker_run_no;
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci/* Protects 'ubifs_infos' list */
438c2ecf20Sopenharmony_ciDEFINE_SPINLOCK(ubifs_infos_lock);
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/* Global clean znode counter (for all mounted UBIFS instances) */
468c2ecf20Sopenharmony_ciatomic_long_t ubifs_clean_zn_cnt;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci/**
498c2ecf20Sopenharmony_ci * shrink_tnc - shrink TNC tree.
508c2ecf20Sopenharmony_ci * @c: UBIFS file-system description object
518c2ecf20Sopenharmony_ci * @nr: number of znodes to free
528c2ecf20Sopenharmony_ci * @age: the age of znodes to free
538c2ecf20Sopenharmony_ci * @contention: if any contention, this is set to %1
548c2ecf20Sopenharmony_ci *
558c2ecf20Sopenharmony_ci * This function traverses TNC tree and frees clean znodes. It does not free
568c2ecf20Sopenharmony_ci * clean znodes which younger then @age. Returns number of freed znodes.
578c2ecf20Sopenharmony_ci */
588c2ecf20Sopenharmony_cistatic int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	int total_freed = 0;
618c2ecf20Sopenharmony_ci	struct ubifs_znode *znode, *zprev;
628c2ecf20Sopenharmony_ci	time64_t time = ktime_get_seconds();
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
658c2ecf20Sopenharmony_ci	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
688c2ecf20Sopenharmony_ci		return 0;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	/*
718c2ecf20Sopenharmony_ci	 * Traverse the TNC tree in levelorder manner, so that it is possible
728c2ecf20Sopenharmony_ci	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
738c2ecf20Sopenharmony_ci	 * children are older or of the same age.
748c2ecf20Sopenharmony_ci	 *
758c2ecf20Sopenharmony_ci	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
768c2ecf20Sopenharmony_ci	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
778c2ecf20Sopenharmony_ci	 * changed only when the 'c->tnc_mutex' is held.
788c2ecf20Sopenharmony_ci	 */
798c2ecf20Sopenharmony_ci	zprev = NULL;
808c2ecf20Sopenharmony_ci	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
818c2ecf20Sopenharmony_ci	while (znode && total_freed < nr &&
828c2ecf20Sopenharmony_ci	       atomic_long_read(&c->clean_zn_cnt) > 0) {
838c2ecf20Sopenharmony_ci		int freed;
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci		/*
868c2ecf20Sopenharmony_ci		 * If the znode is clean, but it is in the 'c->cnext' list, this
878c2ecf20Sopenharmony_ci		 * means that this znode has just been written to flash as a
888c2ecf20Sopenharmony_ci		 * part of commit and was marked clean. They will be removed
898c2ecf20Sopenharmony_ci		 * from the list at end commit. We cannot change the list,
908c2ecf20Sopenharmony_ci		 * because it is not protected by any mutex (design decision to
918c2ecf20Sopenharmony_ci		 * make commit really independent and parallel to main I/O). So
928c2ecf20Sopenharmony_ci		 * we just skip these znodes.
938c2ecf20Sopenharmony_ci		 *
948c2ecf20Sopenharmony_ci		 * Note, the 'clean_zn_cnt' counters are not updated until
958c2ecf20Sopenharmony_ci		 * after the commit, so the UBIFS shrinker does not report
968c2ecf20Sopenharmony_ci		 * the znodes which are in the 'c->cnext' list as freeable.
978c2ecf20Sopenharmony_ci		 *
988c2ecf20Sopenharmony_ci		 * Also note, if the root of a sub-tree is not in 'c->cnext',
998c2ecf20Sopenharmony_ci		 * then the whole sub-tree is not in 'c->cnext' as well, so it
1008c2ecf20Sopenharmony_ci		 * is safe to dump whole sub-tree.
1018c2ecf20Sopenharmony_ci		 */
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci		if (znode->cnext) {
1048c2ecf20Sopenharmony_ci			/*
1058c2ecf20Sopenharmony_ci			 * Very soon these znodes will be removed from the list
1068c2ecf20Sopenharmony_ci			 * and become freeable.
1078c2ecf20Sopenharmony_ci			 */
1088c2ecf20Sopenharmony_ci			*contention = 1;
1098c2ecf20Sopenharmony_ci		} else if (!ubifs_zn_dirty(znode) &&
1108c2ecf20Sopenharmony_ci			   abs(time - znode->time) >= age) {
1118c2ecf20Sopenharmony_ci			if (znode->parent)
1128c2ecf20Sopenharmony_ci				znode->parent->zbranch[znode->iip].znode = NULL;
1138c2ecf20Sopenharmony_ci			else
1148c2ecf20Sopenharmony_ci				c->zroot.znode = NULL;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci			freed = ubifs_destroy_tnc_subtree(c, znode);
1178c2ecf20Sopenharmony_ci			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
1188c2ecf20Sopenharmony_ci			atomic_long_sub(freed, &c->clean_zn_cnt);
1198c2ecf20Sopenharmony_ci			total_freed += freed;
1208c2ecf20Sopenharmony_ci			znode = zprev;
1218c2ecf20Sopenharmony_ci		}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci		if (unlikely(!c->zroot.znode))
1248c2ecf20Sopenharmony_ci			break;
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci		zprev = znode;
1278c2ecf20Sopenharmony_ci		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
1288c2ecf20Sopenharmony_ci		cond_resched();
1298c2ecf20Sopenharmony_ci	}
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	return total_freed;
1328c2ecf20Sopenharmony_ci}
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci/**
1358c2ecf20Sopenharmony_ci * shrink_tnc_trees - shrink UBIFS TNC trees.
1368c2ecf20Sopenharmony_ci * @nr: number of znodes to free
1378c2ecf20Sopenharmony_ci * @age: the age of znodes to free
1388c2ecf20Sopenharmony_ci * @contention: if any contention, this is set to %1
1398c2ecf20Sopenharmony_ci *
1408c2ecf20Sopenharmony_ci * This function walks the list of mounted UBIFS file-systems and frees clean
1418c2ecf20Sopenharmony_ci * znodes which are older than @age, until at least @nr znodes are freed.
1428c2ecf20Sopenharmony_ci * Returns the number of freed znodes.
1438c2ecf20Sopenharmony_ci */
1448c2ecf20Sopenharmony_cistatic int shrink_tnc_trees(int nr, int age, int *contention)
1458c2ecf20Sopenharmony_ci{
1468c2ecf20Sopenharmony_ci	struct ubifs_info *c;
1478c2ecf20Sopenharmony_ci	struct list_head *p;
1488c2ecf20Sopenharmony_ci	unsigned int run_no;
1498c2ecf20Sopenharmony_ci	int freed = 0;
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	spin_lock(&ubifs_infos_lock);
1528c2ecf20Sopenharmony_ci	do {
1538c2ecf20Sopenharmony_ci		run_no = ++shrinker_run_no;
1548c2ecf20Sopenharmony_ci	} while (run_no == 0);
1558c2ecf20Sopenharmony_ci	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
1568c2ecf20Sopenharmony_ci	p = ubifs_infos.next;
1578c2ecf20Sopenharmony_ci	while (p != &ubifs_infos) {
1588c2ecf20Sopenharmony_ci		c = list_entry(p, struct ubifs_info, infos_list);
1598c2ecf20Sopenharmony_ci		/*
1608c2ecf20Sopenharmony_ci		 * We move the ones we do to the end of the list, so we stop
1618c2ecf20Sopenharmony_ci		 * when we see one we have already done.
1628c2ecf20Sopenharmony_ci		 */
1638c2ecf20Sopenharmony_ci		if (c->shrinker_run_no == run_no)
1648c2ecf20Sopenharmony_ci			break;
1658c2ecf20Sopenharmony_ci		if (!mutex_trylock(&c->umount_mutex)) {
1668c2ecf20Sopenharmony_ci			/* Some un-mount is in progress, try next FS */
1678c2ecf20Sopenharmony_ci			*contention = 1;
1688c2ecf20Sopenharmony_ci			p = p->next;
1698c2ecf20Sopenharmony_ci			continue;
1708c2ecf20Sopenharmony_ci		}
1718c2ecf20Sopenharmony_ci		/*
1728c2ecf20Sopenharmony_ci		 * We're holding 'c->umount_mutex', so the file-system won't go
1738c2ecf20Sopenharmony_ci		 * away.
1748c2ecf20Sopenharmony_ci		 */
1758c2ecf20Sopenharmony_ci		if (!mutex_trylock(&c->tnc_mutex)) {
1768c2ecf20Sopenharmony_ci			mutex_unlock(&c->umount_mutex);
1778c2ecf20Sopenharmony_ci			*contention = 1;
1788c2ecf20Sopenharmony_ci			p = p->next;
1798c2ecf20Sopenharmony_ci			continue;
1808c2ecf20Sopenharmony_ci		}
1818c2ecf20Sopenharmony_ci		spin_unlock(&ubifs_infos_lock);
1828c2ecf20Sopenharmony_ci		/*
1838c2ecf20Sopenharmony_ci		 * OK, now we have TNC locked, the file-system cannot go away -
1848c2ecf20Sopenharmony_ci		 * it is safe to reap the cache.
1858c2ecf20Sopenharmony_ci		 */
1868c2ecf20Sopenharmony_ci		c->shrinker_run_no = run_no;
1878c2ecf20Sopenharmony_ci		freed += shrink_tnc(c, nr, age, contention);
1888c2ecf20Sopenharmony_ci		mutex_unlock(&c->tnc_mutex);
1898c2ecf20Sopenharmony_ci		spin_lock(&ubifs_infos_lock);
1908c2ecf20Sopenharmony_ci		/* Get the next list element before we move this one */
1918c2ecf20Sopenharmony_ci		p = p->next;
1928c2ecf20Sopenharmony_ci		/*
1938c2ecf20Sopenharmony_ci		 * Move this one to the end of the list to provide some
1948c2ecf20Sopenharmony_ci		 * fairness.
1958c2ecf20Sopenharmony_ci		 */
1968c2ecf20Sopenharmony_ci		list_move_tail(&c->infos_list, &ubifs_infos);
1978c2ecf20Sopenharmony_ci		mutex_unlock(&c->umount_mutex);
1988c2ecf20Sopenharmony_ci		if (freed >= nr)
1998c2ecf20Sopenharmony_ci			break;
2008c2ecf20Sopenharmony_ci	}
2018c2ecf20Sopenharmony_ci	spin_unlock(&ubifs_infos_lock);
2028c2ecf20Sopenharmony_ci	return freed;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci/**
2068c2ecf20Sopenharmony_ci * kick_a_thread - kick a background thread to start commit.
2078c2ecf20Sopenharmony_ci *
2088c2ecf20Sopenharmony_ci * This function kicks a background thread to start background commit. Returns
2098c2ecf20Sopenharmony_ci * %-1 if a thread was kicked or there is another reason to assume the memory
2108c2ecf20Sopenharmony_ci * will soon be freed or become freeable. If there are no dirty znodes, returns
2118c2ecf20Sopenharmony_ci * %0.
2128c2ecf20Sopenharmony_ci */
2138c2ecf20Sopenharmony_cistatic int kick_a_thread(void)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	int i;
2168c2ecf20Sopenharmony_ci	struct ubifs_info *c;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	/*
2198c2ecf20Sopenharmony_ci	 * Iterate over all mounted UBIFS file-systems and find out if there is
2208c2ecf20Sopenharmony_ci	 * already an ongoing commit operation there. If no, then iterate for
2218c2ecf20Sopenharmony_ci	 * the second time and initiate background commit.
2228c2ecf20Sopenharmony_ci	 */
2238c2ecf20Sopenharmony_ci	spin_lock(&ubifs_infos_lock);
2248c2ecf20Sopenharmony_ci	for (i = 0; i < 2; i++) {
2258c2ecf20Sopenharmony_ci		list_for_each_entry(c, &ubifs_infos, infos_list) {
2268c2ecf20Sopenharmony_ci			long dirty_zn_cnt;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci			if (!mutex_trylock(&c->umount_mutex)) {
2298c2ecf20Sopenharmony_ci				/*
2308c2ecf20Sopenharmony_ci				 * Some un-mount is in progress, it will
2318c2ecf20Sopenharmony_ci				 * certainly free memory, so just return.
2328c2ecf20Sopenharmony_ci				 */
2338c2ecf20Sopenharmony_ci				spin_unlock(&ubifs_infos_lock);
2348c2ecf20Sopenharmony_ci				return -1;
2358c2ecf20Sopenharmony_ci			}
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
2408c2ecf20Sopenharmony_ci			    c->ro_mount || c->ro_error) {
2418c2ecf20Sopenharmony_ci				mutex_unlock(&c->umount_mutex);
2428c2ecf20Sopenharmony_ci				continue;
2438c2ecf20Sopenharmony_ci			}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci			if (c->cmt_state != COMMIT_RESTING) {
2468c2ecf20Sopenharmony_ci				spin_unlock(&ubifs_infos_lock);
2478c2ecf20Sopenharmony_ci				mutex_unlock(&c->umount_mutex);
2488c2ecf20Sopenharmony_ci				return -1;
2498c2ecf20Sopenharmony_ci			}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci			if (i == 1) {
2528c2ecf20Sopenharmony_ci				list_move_tail(&c->infos_list, &ubifs_infos);
2538c2ecf20Sopenharmony_ci				spin_unlock(&ubifs_infos_lock);
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci				ubifs_request_bg_commit(c);
2568c2ecf20Sopenharmony_ci				mutex_unlock(&c->umount_mutex);
2578c2ecf20Sopenharmony_ci				return -1;
2588c2ecf20Sopenharmony_ci			}
2598c2ecf20Sopenharmony_ci			mutex_unlock(&c->umount_mutex);
2608c2ecf20Sopenharmony_ci		}
2618c2ecf20Sopenharmony_ci	}
2628c2ecf20Sopenharmony_ci	spin_unlock(&ubifs_infos_lock);
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	return 0;
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ciunsigned long ubifs_shrink_count(struct shrinker *shrink,
2688c2ecf20Sopenharmony_ci				 struct shrink_control *sc)
2698c2ecf20Sopenharmony_ci{
2708c2ecf20Sopenharmony_ci	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	/*
2738c2ecf20Sopenharmony_ci	 * Due to the way UBIFS updates the clean znode counter it may
2748c2ecf20Sopenharmony_ci	 * temporarily be negative.
2758c2ecf20Sopenharmony_ci	 */
2768c2ecf20Sopenharmony_ci	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
2778c2ecf20Sopenharmony_ci}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ciunsigned long ubifs_shrink_scan(struct shrinker *shrink,
2808c2ecf20Sopenharmony_ci				struct shrink_control *sc)
2818c2ecf20Sopenharmony_ci{
2828c2ecf20Sopenharmony_ci	unsigned long nr = sc->nr_to_scan;
2838c2ecf20Sopenharmony_ci	int contention = 0;
2848c2ecf20Sopenharmony_ci	unsigned long freed;
2858c2ecf20Sopenharmony_ci	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	if (!clean_zn_cnt) {
2888c2ecf20Sopenharmony_ci		/*
2898c2ecf20Sopenharmony_ci		 * No clean znodes, nothing to reap. All we can do in this case
2908c2ecf20Sopenharmony_ci		 * is to kick background threads to start commit, which will
2918c2ecf20Sopenharmony_ci		 * probably make clean znodes which, in turn, will be freeable.
2928c2ecf20Sopenharmony_ci		 * And we return -1 which means will make VM call us again
2938c2ecf20Sopenharmony_ci		 * later.
2948c2ecf20Sopenharmony_ci		 */
2958c2ecf20Sopenharmony_ci		dbg_tnc("no clean znodes, kick a thread");
2968c2ecf20Sopenharmony_ci		return kick_a_thread();
2978c2ecf20Sopenharmony_ci	}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
3008c2ecf20Sopenharmony_ci	if (freed >= nr)
3018c2ecf20Sopenharmony_ci		goto out;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	dbg_tnc("not enough old znodes, try to free young ones");
3048c2ecf20Sopenharmony_ci	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
3058c2ecf20Sopenharmony_ci	if (freed >= nr)
3068c2ecf20Sopenharmony_ci		goto out;
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	dbg_tnc("not enough young znodes, free all");
3098c2ecf20Sopenharmony_ci	freed += shrink_tnc_trees(nr - freed, 0, &contention);
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	if (!freed && contention) {
3128c2ecf20Sopenharmony_ci		dbg_tnc("freed nothing, but contention");
3138c2ecf20Sopenharmony_ci		return SHRINK_STOP;
3148c2ecf20Sopenharmony_ci	}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ciout:
3178c2ecf20Sopenharmony_ci	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
3188c2ecf20Sopenharmony_ci	return freed;
3198c2ecf20Sopenharmony_ci}
320