162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2024 Huawei Device Co., Ltd.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <asm/page.h>
762306a36Sopenharmony_ci#include <linux/mm.h>
862306a36Sopenharmony_ci#include <linux/mm_types.h>
962306a36Sopenharmony_ci#include <linux/radix-tree.h>
1062306a36Sopenharmony_ci#include <linux/rmap.h>
1162306a36Sopenharmony_ci#include <linux/slab.h>
1262306a36Sopenharmony_ci#include <linux/oom.h> /* find_lock_task_mm */
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/mm_purgeable.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_cistruct uxpte_t {
1762306a36Sopenharmony_ci	atomic64_t val;
1862306a36Sopenharmony_ci};
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#define UXPTE_SIZE_SHIFT 3
2162306a36Sopenharmony_ci#define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT)
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT)
2462306a36Sopenharmony_ci#define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT)
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#define UXPTE_PRESENT_BIT 1
2762306a36Sopenharmony_ci#define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1)
2862306a36Sopenharmony_ci#define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT)
2962306a36Sopenharmony_ci#define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE)
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#define vpn(vaddr) ((vaddr) >> PAGE_SHIFT)
3262306a36Sopenharmony_ci#define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT)
3362306a36Sopenharmony_ci#define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1))
3462306a36Sopenharmony_ci#define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT))
3562306a36Sopenharmony_ci#define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT)
3662306a36Sopenharmony_ci#define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK)
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic inline long uxpte_read(struct uxpte_t *uxpte)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	return atomic64_read(&uxpte->val);
4162306a36Sopenharmony_ci}
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic inline void uxpte_set(struct uxpte_t *uxpte, long val)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	atomic64_set(&uxpte->val, val);
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cistatic inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	return atomic64_cmpxchg(&uxpte->val, old, new) == old;
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_civoid mm_init_uxpgd(struct mm_struct *mm)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	mm->uxpgd = NULL;
5662306a36Sopenharmony_ci	spin_lock_init(&mm->uxpgd_lock);
5762306a36Sopenharmony_ci}
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_civoid mm_clear_uxpgd(struct mm_struct *mm)
6062306a36Sopenharmony_ci{
6162306a36Sopenharmony_ci	struct page *page = NULL;
6262306a36Sopenharmony_ci	void **slot = NULL;
6362306a36Sopenharmony_ci	struct radix_tree_iter iter;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	spin_lock(&mm->uxpgd_lock);
6662306a36Sopenharmony_ci	if (!mm->uxpgd)
6762306a36Sopenharmony_ci		goto out;
6862306a36Sopenharmony_ci	radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) {
6962306a36Sopenharmony_ci		page = radix_tree_delete(mm->uxpgd, iter.index);
7062306a36Sopenharmony_ci		put_page(page);
7162306a36Sopenharmony_ci	}
7262306a36Sopenharmony_ciout:
7362306a36Sopenharmony_ci	kfree(mm->uxpgd);
7462306a36Sopenharmony_ci	mm->uxpgd = NULL;
7562306a36Sopenharmony_ci	spin_unlock(&mm->uxpgd_lock);
7662306a36Sopenharmony_ci}
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci/* should hold uxpgd_lock before invoke */
7962306a36Sopenharmony_cistatic struct page *lookup_uxpte_page(struct vm_area_struct *vma,
8062306a36Sopenharmony_ci	unsigned long addr, bool alloc)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	struct radix_tree_root *uxpgd = NULL;
8362306a36Sopenharmony_ci	struct page *page = NULL;
8462306a36Sopenharmony_ci    struct folio *new_folio = NULL;
8562306a36Sopenharmony_ci	struct page *new_page = NULL;
8662306a36Sopenharmony_ci	struct mm_struct *mm = vma->vm_mm;
8762306a36Sopenharmony_ci	unsigned long uxpn = uxpte_pn(addr);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	if (mm->uxpgd)
9062306a36Sopenharmony_ci		goto lookup;
9162306a36Sopenharmony_ci	if (!alloc)
9262306a36Sopenharmony_ci		goto out;
9362306a36Sopenharmony_ci	spin_unlock(&mm->uxpgd_lock);
9462306a36Sopenharmony_ci	uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL);
9562306a36Sopenharmony_ci	if (!uxpgd) {
9662306a36Sopenharmony_ci		pr_err("uxpgd alloc failed.\n");
9762306a36Sopenharmony_ci		spin_lock(&mm->uxpgd_lock);
9862306a36Sopenharmony_ci		goto out;
9962306a36Sopenharmony_ci	}
10062306a36Sopenharmony_ci	INIT_RADIX_TREE(uxpgd, GFP_KERNEL);
10162306a36Sopenharmony_ci	spin_lock(&mm->uxpgd_lock);
10262306a36Sopenharmony_ci	if (mm->uxpgd)
10362306a36Sopenharmony_ci		kfree(uxpgd);
10462306a36Sopenharmony_ci	else
10562306a36Sopenharmony_ci		mm->uxpgd = uxpgd;
10662306a36Sopenharmony_cilookup:
10762306a36Sopenharmony_ci	page = radix_tree_lookup(mm->uxpgd, uxpn);
10862306a36Sopenharmony_ci	if (page)
10962306a36Sopenharmony_ci		goto out;
11062306a36Sopenharmony_ci	if (!alloc)
11162306a36Sopenharmony_ci		goto out;
11262306a36Sopenharmony_ci	spin_unlock(&mm->uxpgd_lock);
11362306a36Sopenharmony_ci	new_folio = vma_alloc_zeroed_movable_folio(vma, addr);
11462306a36Sopenharmony_ci	if (!new_folio) {
11562306a36Sopenharmony_ci		pr_err("uxpte page alloc fail.\n");
11662306a36Sopenharmony_ci		spin_lock(&mm->uxpgd_lock);
11762306a36Sopenharmony_ci		goto out;
11862306a36Sopenharmony_ci	}
11962306a36Sopenharmony_ci    new_page = &new_folio->page;
12062306a36Sopenharmony_ci	if (radix_tree_preload(GFP_KERNEL)) {
12162306a36Sopenharmony_ci		put_page(new_page);
12262306a36Sopenharmony_ci		pr_err("radix preload fail.\n");
12362306a36Sopenharmony_ci		spin_lock(&mm->uxpgd_lock);
12462306a36Sopenharmony_ci		goto out;
12562306a36Sopenharmony_ci	}
12662306a36Sopenharmony_ci	spin_lock(&mm->uxpgd_lock);
12762306a36Sopenharmony_ci	page = radix_tree_lookup(mm->uxpgd, uxpn);
12862306a36Sopenharmony_ci	if (page) {
12962306a36Sopenharmony_ci		put_page(new_page);
13062306a36Sopenharmony_ci	} else {
13162306a36Sopenharmony_ci		page = new_page;
13262306a36Sopenharmony_ci		radix_tree_insert(mm->uxpgd, uxpn, page);
13362306a36Sopenharmony_ci	}
13462306a36Sopenharmony_ci	radix_tree_preload_end();
13562306a36Sopenharmony_ciout:
13662306a36Sopenharmony_ci	return page;
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci/* should hold uxpgd_lock before invoke */
14062306a36Sopenharmony_cistatic struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma,
14162306a36Sopenharmony_ci		unsigned long addr, bool alloc)
14262306a36Sopenharmony_ci{
14362306a36Sopenharmony_ci	struct uxpte_t *uxpte = NULL;
14462306a36Sopenharmony_ci	struct page *page = NULL;
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	page = lookup_uxpte_page(vma, addr, alloc);
14762306a36Sopenharmony_ci	if (!page)
14862306a36Sopenharmony_ci		return NULL;
14962306a36Sopenharmony_ci	uxpte = page_to_virt(page);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	return uxpte + uxpte_off(addr);
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cibool lock_uxpte(struct vm_area_struct *vma, unsigned long addr)
15562306a36Sopenharmony_ci{
15662306a36Sopenharmony_ci	struct uxpte_t *uxpte = NULL;
15762306a36Sopenharmony_ci	long val = 0;
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	spin_lock(&vma->vm_mm->uxpgd_lock);
16062306a36Sopenharmony_ci	uxpte = lookup_uxpte(vma, addr, true);
16162306a36Sopenharmony_ci	if (!uxpte)
16262306a36Sopenharmony_ci		goto unlock;
16362306a36Sopenharmony_ciretry:
16462306a36Sopenharmony_ci	val = uxpte_read(uxpte);
16562306a36Sopenharmony_ci	if (val >> 1)
16662306a36Sopenharmony_ci		goto unlock;
16762306a36Sopenharmony_ci	if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM))
16862306a36Sopenharmony_ci		goto retry;
16962306a36Sopenharmony_ci	val = UXPTE_UNDER_RECLAIM;
17062306a36Sopenharmony_ciunlock:
17162306a36Sopenharmony_ci	spin_unlock(&vma->vm_mm->uxpgd_lock);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	return val == UXPTE_UNDER_RECLAIM;
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_civoid unlock_uxpte(struct vm_area_struct *vma, unsigned long addr)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	struct uxpte_t *uxpte = NULL;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	spin_lock(&vma->vm_mm->uxpgd_lock);
18162306a36Sopenharmony_ci	uxpte = lookup_uxpte(vma, addr, false);
18262306a36Sopenharmony_ci	if (!uxpte)
18362306a36Sopenharmony_ci		goto unlock;
18462306a36Sopenharmony_ci	uxpte_set(uxpte, 0);
18562306a36Sopenharmony_ciunlock:
18662306a36Sopenharmony_ci	spin_unlock(&vma->vm_mm->uxpgd_lock);
18762306a36Sopenharmony_ci}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_cibool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr)
19062306a36Sopenharmony_ci{
19162306a36Sopenharmony_ci	struct uxpte_t *uxpte = NULL;
19262306a36Sopenharmony_ci	long val = 0;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	spin_lock(&vma->vm_mm->uxpgd_lock);
19562306a36Sopenharmony_ci	uxpte = lookup_uxpte(vma, addr, true);
19662306a36Sopenharmony_ci	if (!uxpte)
19762306a36Sopenharmony_ci		goto unlock;
19862306a36Sopenharmony_ciretry:
19962306a36Sopenharmony_ci	val = uxpte_read(uxpte);
20062306a36Sopenharmony_ci	if (val & 1)
20162306a36Sopenharmony_ci		goto unlock;
20262306a36Sopenharmony_ci	if (!uxpte_cas(uxpte, val, val + 1))
20362306a36Sopenharmony_ci		goto retry;
20462306a36Sopenharmony_ci	val++;
20562306a36Sopenharmony_ciunlock:
20662306a36Sopenharmony_ci	spin_unlock(&vma->vm_mm->uxpgd_lock);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	return val & 1;
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_civoid uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	struct uxpte_t *uxpte = NULL;
21462306a36Sopenharmony_ci	long val = 0;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	spin_lock(&vma->vm_mm->uxpgd_lock);
21762306a36Sopenharmony_ci	uxpte = lookup_uxpte(vma, addr, false);
21862306a36Sopenharmony_ci	if (!uxpte)
21962306a36Sopenharmony_ci		goto unlock;
22062306a36Sopenharmony_ciretry:
22162306a36Sopenharmony_ci	val = uxpte_read(uxpte);
22262306a36Sopenharmony_ci	if (!(val & 1))
22362306a36Sopenharmony_ci		goto unlock;
22462306a36Sopenharmony_ci	if (!uxpte_cas(uxpte, val, val - 1))
22562306a36Sopenharmony_ci		goto retry;
22662306a36Sopenharmony_ciunlock:
22762306a36Sopenharmony_ci	spin_unlock(&vma->vm_mm->uxpgd_lock);
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_civm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry)
23162306a36Sopenharmony_ci{
23262306a36Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
23362306a36Sopenharmony_ci	unsigned long vma_uxpn = vma->vm_pgoff;
23462306a36Sopenharmony_ci	unsigned long off_uxpn = vpn(vmf->address - vma->vm_start);
23562306a36Sopenharmony_ci	unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn);
23662306a36Sopenharmony_ci	struct page *page = NULL;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	if (unlikely(anon_vma_prepare(vma)))
23962306a36Sopenharmony_ci		return VM_FAULT_OOM;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	spin_lock(&vma->vm_mm->uxpgd_lock);
24262306a36Sopenharmony_ci	page = lookup_uxpte_page(vma, addr, true);
24362306a36Sopenharmony_ci	spin_unlock(&vma->vm_mm->uxpgd_lock);
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	if (!page)
24662306a36Sopenharmony_ci		return VM_FAULT_OOM;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	*entry = mk_pte(page, vma->vm_page_prot);
24962306a36Sopenharmony_ci	*entry = pte_sw_mkyoung(*entry);
25062306a36Sopenharmony_ci	if (vma->vm_flags & VM_WRITE)
25162306a36Sopenharmony_ci		*entry = pte_mkwrite(pte_mkdirty(*entry), vma);
25262306a36Sopenharmony_ci	return 0;
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_cistatic void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages,
25662306a36Sopenharmony_ci	unsigned long *pined_purg_pages)
25762306a36Sopenharmony_ci{
25862306a36Sopenharmony_ci	struct page *page = NULL;
25962306a36Sopenharmony_ci	void **slot = NULL;
26062306a36Sopenharmony_ci	struct radix_tree_iter iter;
26162306a36Sopenharmony_ci	struct uxpte_t *uxpte = NULL;
26262306a36Sopenharmony_ci	long pte_entry = 0;
26362306a36Sopenharmony_ci	int index = 0;
26462306a36Sopenharmony_ci	unsigned long nr_total = 0, nr_pined = 0;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	spin_lock(&mm->uxpgd_lock);
26762306a36Sopenharmony_ci	if (!mm->uxpgd)
26862306a36Sopenharmony_ci		goto out;
26962306a36Sopenharmony_ci	radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) {
27062306a36Sopenharmony_ci		page = radix_tree_deref_slot(slot);
27162306a36Sopenharmony_ci		if (unlikely(!page))
27262306a36Sopenharmony_ci			continue;
27362306a36Sopenharmony_ci		uxpte = page_to_virt(page);
27462306a36Sopenharmony_ci		for (index = 0; index < UXPTE_PER_PAGE; index++) {
27562306a36Sopenharmony_ci			pte_entry = uxpte_read(&(uxpte[index]));
27662306a36Sopenharmony_ci			if (uxpte_present(pte_entry) == 0) /* not present */
27762306a36Sopenharmony_ci				continue;
27862306a36Sopenharmony_ci			nr_total++;
27962306a36Sopenharmony_ci			if (uxpte_refcnt(pte_entry) > 0) /* pined by user */
28062306a36Sopenharmony_ci				nr_pined++;
28162306a36Sopenharmony_ci		}
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ciout:
28462306a36Sopenharmony_ci	spin_unlock(&mm->uxpgd_lock);
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	if (total_purg_pages)
28762306a36Sopenharmony_ci		*total_purg_pages = nr_total;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	if (pined_purg_pages)
29062306a36Sopenharmony_ci		*pined_purg_pages = nr_pined;
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_civoid mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages,
29462306a36Sopenharmony_ci	unsigned long *pined_purg_pages)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	if (unlikely(!mm))
29762306a36Sopenharmony_ci		return;
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	if (!total_purg_pages && !pined_purg_pages)
30062306a36Sopenharmony_ci		return;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	__mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages);
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_civoid purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	struct task_struct *p = NULL;
30862306a36Sopenharmony_ci	struct task_struct *tsk = NULL;
30962306a36Sopenharmony_ci	unsigned long mm_nr_purge = 0, mm_nr_pined = 0;
31062306a36Sopenharmony_ci	unsigned long nr_total = 0, nr_pined = 0;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	if (!total_purg_pages && !pined_purg_pages)
31362306a36Sopenharmony_ci		return;
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (total_purg_pages)
31662306a36Sopenharmony_ci		*total_purg_pages = 0;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	if (pined_purg_pages)
31962306a36Sopenharmony_ci		*pined_purg_pages = 0;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	rcu_read_lock();
32262306a36Sopenharmony_ci	for_each_process(p) {
32362306a36Sopenharmony_ci		tsk = find_lock_task_mm(p);
32462306a36Sopenharmony_ci		if (!tsk) {
32562306a36Sopenharmony_ci			/*
32662306a36Sopenharmony_ci			 * It is a kthread or all of p's threads have already
32762306a36Sopenharmony_ci			 * detached their mm's.
32862306a36Sopenharmony_ci			 */
32962306a36Sopenharmony_ci			continue;
33062306a36Sopenharmony_ci		}
33162306a36Sopenharmony_ci		__mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined);
33262306a36Sopenharmony_ci		nr_total += mm_nr_purge;
33362306a36Sopenharmony_ci		nr_pined += mm_nr_pined;
33462306a36Sopenharmony_ci		task_unlock(tsk);
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci		if (mm_nr_purge > 0) {
33762306a36Sopenharmony_ci			pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL",
33862306a36Sopenharmony_ci				mm_nr_pined, mm_nr_purge);
33962306a36Sopenharmony_ci		}
34062306a36Sopenharmony_ci	}
34162306a36Sopenharmony_ci	rcu_read_unlock();
34262306a36Sopenharmony_ci	if (total_purg_pages)
34362306a36Sopenharmony_ci		*total_purg_pages = nr_total;
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	if (pined_purg_pages)
34662306a36Sopenharmony_ci		*pined_purg_pages = nr_pined;
34762306a36Sopenharmony_ci	pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total);
34862306a36Sopenharmony_ci}
349