162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * This file is part of UBIFS.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2006-2008 Nokia Corporation.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Authors: Artem Bityutskiy (Битюцкий Артём)
862306a36Sopenharmony_ci *          Adrian Hunter
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci/*
1262306a36Sopenharmony_ci * This file implements UBIFS shrinker which evicts clean znodes from the TNC
1362306a36Sopenharmony_ci * tree when Linux VM needs more RAM.
1462306a36Sopenharmony_ci *
1562306a36Sopenharmony_ci * We do not implement any LRU lists to find oldest znodes to free because it
1662306a36Sopenharmony_ci * would add additional overhead to the file system fast paths. So the shrinker
1762306a36Sopenharmony_ci * just walks the TNC tree when searching for znodes to free.
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci * If the root of a TNC sub-tree is clean and old enough, then the children are
2062306a36Sopenharmony_ci * also clean and old enough. So the shrinker walks the TNC in level order and
2162306a36Sopenharmony_ci * dumps entire sub-trees.
2262306a36Sopenharmony_ci *
2362306a36Sopenharmony_ci * The age of znodes is just the time-stamp when they were last looked at.
2462306a36Sopenharmony_ci * The current shrinker first tries to evict old znodes, then young ones.
2562306a36Sopenharmony_ci *
2662306a36Sopenharmony_ci * Since the shrinker is global, it has to protect against races with FS
2762306a36Sopenharmony_ci * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
2862306a36Sopenharmony_ci */
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include "ubifs.h"
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci/* List of all UBIFS file-system instances */
3362306a36Sopenharmony_ciLIST_HEAD(ubifs_infos);
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci/*
3662306a36Sopenharmony_ci * We number each shrinker run and record the number on the ubifs_info structure
3762306a36Sopenharmony_ci * so that we can easily work out which ubifs_info structures have already been
3862306a36Sopenharmony_ci * done by the current run.
3962306a36Sopenharmony_ci */
4062306a36Sopenharmony_cistatic unsigned int shrinker_run_no;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci/* Protects 'ubifs_infos' list */
4362306a36Sopenharmony_ciDEFINE_SPINLOCK(ubifs_infos_lock);
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci/* Global clean znode counter (for all mounted UBIFS instances) */
4662306a36Sopenharmony_ciatomic_long_t ubifs_clean_zn_cnt;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci/**
4962306a36Sopenharmony_ci * shrink_tnc - shrink TNC tree.
5062306a36Sopenharmony_ci * @c: UBIFS file-system description object
5162306a36Sopenharmony_ci * @nr: number of znodes to free
5262306a36Sopenharmony_ci * @age: the age of znodes to free
5362306a36Sopenharmony_ci * @contention: if any contention, this is set to %1
5462306a36Sopenharmony_ci *
5562306a36Sopenharmony_ci * This function traverses TNC tree and frees clean znodes. It does not free
5662306a36Sopenharmony_ci * clean znodes which younger then @age. Returns number of freed znodes.
5762306a36Sopenharmony_ci */
5862306a36Sopenharmony_cistatic int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
5962306a36Sopenharmony_ci{
6062306a36Sopenharmony_ci	int total_freed = 0;
6162306a36Sopenharmony_ci	struct ubifs_znode *znode, *zprev;
6262306a36Sopenharmony_ci	time64_t time = ktime_get_seconds();
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
6562306a36Sopenharmony_ci	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
6862306a36Sopenharmony_ci		return 0;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	/*
7162306a36Sopenharmony_ci	 * Traverse the TNC tree in levelorder manner, so that it is possible
7262306a36Sopenharmony_ci	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
7362306a36Sopenharmony_ci	 * children are older or of the same age.
7462306a36Sopenharmony_ci	 *
7562306a36Sopenharmony_ci	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
7662306a36Sopenharmony_ci	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
7762306a36Sopenharmony_ci	 * changed only when the 'c->tnc_mutex' is held.
7862306a36Sopenharmony_ci	 */
7962306a36Sopenharmony_ci	zprev = NULL;
8062306a36Sopenharmony_ci	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
8162306a36Sopenharmony_ci	while (znode && total_freed < nr &&
8262306a36Sopenharmony_ci	       atomic_long_read(&c->clean_zn_cnt) > 0) {
8362306a36Sopenharmony_ci		int freed;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci		/*
8662306a36Sopenharmony_ci		 * If the znode is clean, but it is in the 'c->cnext' list, this
8762306a36Sopenharmony_ci		 * means that this znode has just been written to flash as a
8862306a36Sopenharmony_ci		 * part of commit and was marked clean. They will be removed
8962306a36Sopenharmony_ci		 * from the list at end commit. We cannot change the list,
9062306a36Sopenharmony_ci		 * because it is not protected by any mutex (design decision to
9162306a36Sopenharmony_ci		 * make commit really independent and parallel to main I/O). So
9262306a36Sopenharmony_ci		 * we just skip these znodes.
9362306a36Sopenharmony_ci		 *
9462306a36Sopenharmony_ci		 * Note, the 'clean_zn_cnt' counters are not updated until
9562306a36Sopenharmony_ci		 * after the commit, so the UBIFS shrinker does not report
9662306a36Sopenharmony_ci		 * the znodes which are in the 'c->cnext' list as freeable.
9762306a36Sopenharmony_ci		 *
9862306a36Sopenharmony_ci		 * Also note, if the root of a sub-tree is not in 'c->cnext',
9962306a36Sopenharmony_ci		 * then the whole sub-tree is not in 'c->cnext' as well, so it
10062306a36Sopenharmony_ci		 * is safe to dump whole sub-tree.
10162306a36Sopenharmony_ci		 */
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci		if (znode->cnext) {
10462306a36Sopenharmony_ci			/*
10562306a36Sopenharmony_ci			 * Very soon these znodes will be removed from the list
10662306a36Sopenharmony_ci			 * and become freeable.
10762306a36Sopenharmony_ci			 */
10862306a36Sopenharmony_ci			*contention = 1;
10962306a36Sopenharmony_ci		} else if (!ubifs_zn_dirty(znode) &&
11062306a36Sopenharmony_ci			   abs(time - znode->time) >= age) {
11162306a36Sopenharmony_ci			if (znode->parent)
11262306a36Sopenharmony_ci				znode->parent->zbranch[znode->iip].znode = NULL;
11362306a36Sopenharmony_ci			else
11462306a36Sopenharmony_ci				c->zroot.znode = NULL;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci			freed = ubifs_destroy_tnc_subtree(c, znode);
11762306a36Sopenharmony_ci			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
11862306a36Sopenharmony_ci			atomic_long_sub(freed, &c->clean_zn_cnt);
11962306a36Sopenharmony_ci			total_freed += freed;
12062306a36Sopenharmony_ci			znode = zprev;
12162306a36Sopenharmony_ci		}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci		if (unlikely(!c->zroot.znode))
12462306a36Sopenharmony_ci			break;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci		zprev = znode;
12762306a36Sopenharmony_ci		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
12862306a36Sopenharmony_ci		cond_resched();
12962306a36Sopenharmony_ci	}
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	return total_freed;
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci/**
13562306a36Sopenharmony_ci * shrink_tnc_trees - shrink UBIFS TNC trees.
13662306a36Sopenharmony_ci * @nr: number of znodes to free
13762306a36Sopenharmony_ci * @age: the age of znodes to free
13862306a36Sopenharmony_ci * @contention: if any contention, this is set to %1
13962306a36Sopenharmony_ci *
14062306a36Sopenharmony_ci * This function walks the list of mounted UBIFS file-systems and frees clean
14162306a36Sopenharmony_ci * znodes which are older than @age, until at least @nr znodes are freed.
14262306a36Sopenharmony_ci * Returns the number of freed znodes.
14362306a36Sopenharmony_ci */
14462306a36Sopenharmony_cistatic int shrink_tnc_trees(int nr, int age, int *contention)
14562306a36Sopenharmony_ci{
14662306a36Sopenharmony_ci	struct ubifs_info *c;
14762306a36Sopenharmony_ci	struct list_head *p;
14862306a36Sopenharmony_ci	unsigned int run_no;
14962306a36Sopenharmony_ci	int freed = 0;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	spin_lock(&ubifs_infos_lock);
15262306a36Sopenharmony_ci	do {
15362306a36Sopenharmony_ci		run_no = ++shrinker_run_no;
15462306a36Sopenharmony_ci	} while (run_no == 0);
15562306a36Sopenharmony_ci	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
15662306a36Sopenharmony_ci	p = ubifs_infos.next;
15762306a36Sopenharmony_ci	while (p != &ubifs_infos) {
15862306a36Sopenharmony_ci		c = list_entry(p, struct ubifs_info, infos_list);
15962306a36Sopenharmony_ci		/*
16062306a36Sopenharmony_ci		 * We move the ones we do to the end of the list, so we stop
16162306a36Sopenharmony_ci		 * when we see one we have already done.
16262306a36Sopenharmony_ci		 */
16362306a36Sopenharmony_ci		if (c->shrinker_run_no == run_no)
16462306a36Sopenharmony_ci			break;
16562306a36Sopenharmony_ci		if (!mutex_trylock(&c->umount_mutex)) {
16662306a36Sopenharmony_ci			/* Some un-mount is in progress, try next FS */
16762306a36Sopenharmony_ci			*contention = 1;
16862306a36Sopenharmony_ci			p = p->next;
16962306a36Sopenharmony_ci			continue;
17062306a36Sopenharmony_ci		}
17162306a36Sopenharmony_ci		/*
17262306a36Sopenharmony_ci		 * We're holding 'c->umount_mutex', so the file-system won't go
17362306a36Sopenharmony_ci		 * away.
17462306a36Sopenharmony_ci		 */
17562306a36Sopenharmony_ci		if (!mutex_trylock(&c->tnc_mutex)) {
17662306a36Sopenharmony_ci			mutex_unlock(&c->umount_mutex);
17762306a36Sopenharmony_ci			*contention = 1;
17862306a36Sopenharmony_ci			p = p->next;
17962306a36Sopenharmony_ci			continue;
18062306a36Sopenharmony_ci		}
18162306a36Sopenharmony_ci		spin_unlock(&ubifs_infos_lock);
18262306a36Sopenharmony_ci		/*
18362306a36Sopenharmony_ci		 * OK, now we have TNC locked, the file-system cannot go away -
18462306a36Sopenharmony_ci		 * it is safe to reap the cache.
18562306a36Sopenharmony_ci		 */
18662306a36Sopenharmony_ci		c->shrinker_run_no = run_no;
18762306a36Sopenharmony_ci		freed += shrink_tnc(c, nr, age, contention);
18862306a36Sopenharmony_ci		mutex_unlock(&c->tnc_mutex);
18962306a36Sopenharmony_ci		spin_lock(&ubifs_infos_lock);
19062306a36Sopenharmony_ci		/* Get the next list element before we move this one */
19162306a36Sopenharmony_ci		p = p->next;
19262306a36Sopenharmony_ci		/*
19362306a36Sopenharmony_ci		 * Move this one to the end of the list to provide some
19462306a36Sopenharmony_ci		 * fairness.
19562306a36Sopenharmony_ci		 */
19662306a36Sopenharmony_ci		list_move_tail(&c->infos_list, &ubifs_infos);
19762306a36Sopenharmony_ci		mutex_unlock(&c->umount_mutex);
19862306a36Sopenharmony_ci		if (freed >= nr)
19962306a36Sopenharmony_ci			break;
20062306a36Sopenharmony_ci	}
20162306a36Sopenharmony_ci	spin_unlock(&ubifs_infos_lock);
20262306a36Sopenharmony_ci	return freed;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci/**
20662306a36Sopenharmony_ci * kick_a_thread - kick a background thread to start commit.
20762306a36Sopenharmony_ci *
20862306a36Sopenharmony_ci * This function kicks a background thread to start background commit. Returns
20962306a36Sopenharmony_ci * %-1 if a thread was kicked or there is another reason to assume the memory
21062306a36Sopenharmony_ci * will soon be freed or become freeable. If there are no dirty znodes, returns
21162306a36Sopenharmony_ci * %0.
21262306a36Sopenharmony_ci */
21362306a36Sopenharmony_cistatic int kick_a_thread(void)
21462306a36Sopenharmony_ci{
21562306a36Sopenharmony_ci	int i;
21662306a36Sopenharmony_ci	struct ubifs_info *c;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	/*
21962306a36Sopenharmony_ci	 * Iterate over all mounted UBIFS file-systems and find out if there is
22062306a36Sopenharmony_ci	 * already an ongoing commit operation there. If no, then iterate for
22162306a36Sopenharmony_ci	 * the second time and initiate background commit.
22262306a36Sopenharmony_ci	 */
22362306a36Sopenharmony_ci	spin_lock(&ubifs_infos_lock);
22462306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
22562306a36Sopenharmony_ci		list_for_each_entry(c, &ubifs_infos, infos_list) {
22662306a36Sopenharmony_ci			long dirty_zn_cnt;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci			if (!mutex_trylock(&c->umount_mutex)) {
22962306a36Sopenharmony_ci				/*
23062306a36Sopenharmony_ci				 * Some un-mount is in progress, it will
23162306a36Sopenharmony_ci				 * certainly free memory, so just return.
23262306a36Sopenharmony_ci				 */
23362306a36Sopenharmony_ci				spin_unlock(&ubifs_infos_lock);
23462306a36Sopenharmony_ci				return -1;
23562306a36Sopenharmony_ci			}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
24062306a36Sopenharmony_ci			    c->ro_mount || c->ro_error) {
24162306a36Sopenharmony_ci				mutex_unlock(&c->umount_mutex);
24262306a36Sopenharmony_ci				continue;
24362306a36Sopenharmony_ci			}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci			if (c->cmt_state != COMMIT_RESTING) {
24662306a36Sopenharmony_ci				spin_unlock(&ubifs_infos_lock);
24762306a36Sopenharmony_ci				mutex_unlock(&c->umount_mutex);
24862306a36Sopenharmony_ci				return -1;
24962306a36Sopenharmony_ci			}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci			if (i == 1) {
25262306a36Sopenharmony_ci				list_move_tail(&c->infos_list, &ubifs_infos);
25362306a36Sopenharmony_ci				spin_unlock(&ubifs_infos_lock);
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci				ubifs_request_bg_commit(c);
25662306a36Sopenharmony_ci				mutex_unlock(&c->umount_mutex);
25762306a36Sopenharmony_ci				return -1;
25862306a36Sopenharmony_ci			}
25962306a36Sopenharmony_ci			mutex_unlock(&c->umount_mutex);
26062306a36Sopenharmony_ci		}
26162306a36Sopenharmony_ci	}
26262306a36Sopenharmony_ci	spin_unlock(&ubifs_infos_lock);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	return 0;
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ciunsigned long ubifs_shrink_count(struct shrinker *shrink,
26862306a36Sopenharmony_ci				 struct shrink_control *sc)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	/*
27362306a36Sopenharmony_ci	 * Due to the way UBIFS updates the clean znode counter it may
27462306a36Sopenharmony_ci	 * temporarily be negative.
27562306a36Sopenharmony_ci	 */
27662306a36Sopenharmony_ci	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
27762306a36Sopenharmony_ci}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ciunsigned long ubifs_shrink_scan(struct shrinker *shrink,
28062306a36Sopenharmony_ci				struct shrink_control *sc)
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci	unsigned long nr = sc->nr_to_scan;
28362306a36Sopenharmony_ci	int contention = 0;
28462306a36Sopenharmony_ci	unsigned long freed;
28562306a36Sopenharmony_ci	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	if (!clean_zn_cnt) {
28862306a36Sopenharmony_ci		/*
28962306a36Sopenharmony_ci		 * No clean znodes, nothing to reap. All we can do in this case
29062306a36Sopenharmony_ci		 * is to kick background threads to start commit, which will
29162306a36Sopenharmony_ci		 * probably make clean znodes which, in turn, will be freeable.
29262306a36Sopenharmony_ci		 * And we return -1 which means will make VM call us again
29362306a36Sopenharmony_ci		 * later.
29462306a36Sopenharmony_ci		 */
29562306a36Sopenharmony_ci		dbg_tnc("no clean znodes, kick a thread");
29662306a36Sopenharmony_ci		return kick_a_thread();
29762306a36Sopenharmony_ci	}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
30062306a36Sopenharmony_ci	if (freed >= nr)
30162306a36Sopenharmony_ci		goto out;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	dbg_tnc("not enough old znodes, try to free young ones");
30462306a36Sopenharmony_ci	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
30562306a36Sopenharmony_ci	if (freed >= nr)
30662306a36Sopenharmony_ci		goto out;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	dbg_tnc("not enough young znodes, free all");
30962306a36Sopenharmony_ci	freed += shrink_tnc_trees(nr - freed, 0, &contention);
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	if (!freed && contention) {
31262306a36Sopenharmony_ci		dbg_tnc("freed nothing, but contention");
31362306a36Sopenharmony_ci		return SHRINK_STOP;
31462306a36Sopenharmony_ci	}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ciout:
31762306a36Sopenharmony_ci	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
31862306a36Sopenharmony_ci	return freed;
31962306a36Sopenharmony_ci}
320