162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2024 Huawei Device Co., Ltd. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <asm/page.h> 762306a36Sopenharmony_ci#include <linux/mm.h> 862306a36Sopenharmony_ci#include <linux/mm_types.h> 962306a36Sopenharmony_ci#include <linux/radix-tree.h> 1062306a36Sopenharmony_ci#include <linux/rmap.h> 1162306a36Sopenharmony_ci#include <linux/slab.h> 1262306a36Sopenharmony_ci#include <linux/oom.h> /* find_lock_task_mm */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/mm_purgeable.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_cistruct uxpte_t { 1762306a36Sopenharmony_ci atomic64_t val; 1862306a36Sopenharmony_ci}; 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#define UXPTE_SIZE_SHIFT 3 2162306a36Sopenharmony_ci#define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT) 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT) 2462306a36Sopenharmony_ci#define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT) 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#define UXPTE_PRESENT_BIT 1 2762306a36Sopenharmony_ci#define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1) 2862306a36Sopenharmony_ci#define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT) 2962306a36Sopenharmony_ci#define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE) 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define vpn(vaddr) ((vaddr) >> PAGE_SHIFT) 3262306a36Sopenharmony_ci#define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT) 3362306a36Sopenharmony_ci#define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1)) 3462306a36Sopenharmony_ci#define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT)) 3562306a36Sopenharmony_ci#define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT) 3662306a36Sopenharmony_ci#define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK) 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic inline long uxpte_read(struct uxpte_t *uxpte) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci return atomic64_read(&uxpte->val); 4162306a36Sopenharmony_ci} 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_cistatic inline void uxpte_set(struct uxpte_t *uxpte, long val) 4462306a36Sopenharmony_ci{ 4562306a36Sopenharmony_ci atomic64_set(&uxpte->val, val); 4662306a36Sopenharmony_ci} 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cistatic inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci return atomic64_cmpxchg(&uxpte->val, old, new) == old; 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_civoid mm_init_uxpgd(struct mm_struct *mm) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci mm->uxpgd = NULL; 5662306a36Sopenharmony_ci spin_lock_init(&mm->uxpgd_lock); 5762306a36Sopenharmony_ci} 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_civoid mm_clear_uxpgd(struct mm_struct *mm) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci struct page *page = NULL; 6262306a36Sopenharmony_ci void **slot = NULL; 6362306a36Sopenharmony_ci struct radix_tree_iter iter; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 6662306a36Sopenharmony_ci if (!mm->uxpgd) 6762306a36Sopenharmony_ci goto out; 6862306a36Sopenharmony_ci radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { 6962306a36Sopenharmony_ci page = radix_tree_delete(mm->uxpgd, iter.index); 7062306a36Sopenharmony_ci put_page(page); 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ciout: 7362306a36Sopenharmony_ci kfree(mm->uxpgd); 7462306a36Sopenharmony_ci mm->uxpgd = NULL; 7562306a36Sopenharmony_ci spin_unlock(&mm->uxpgd_lock); 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci/* should hold uxpgd_lock before invoke */ 7962306a36Sopenharmony_cistatic struct page *lookup_uxpte_page(struct vm_area_struct *vma, 8062306a36Sopenharmony_ci unsigned long addr, bool alloc) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci struct radix_tree_root *uxpgd = NULL; 8362306a36Sopenharmony_ci struct page *page = NULL; 8462306a36Sopenharmony_ci struct folio *new_folio = NULL; 8562306a36Sopenharmony_ci struct page *new_page = NULL; 8662306a36Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 8762306a36Sopenharmony_ci unsigned long uxpn = uxpte_pn(addr); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci if (mm->uxpgd) 9062306a36Sopenharmony_ci goto lookup; 9162306a36Sopenharmony_ci if (!alloc) 9262306a36Sopenharmony_ci goto out; 9362306a36Sopenharmony_ci spin_unlock(&mm->uxpgd_lock); 9462306a36Sopenharmony_ci uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL); 9562306a36Sopenharmony_ci if (!uxpgd) { 9662306a36Sopenharmony_ci pr_err("uxpgd alloc failed.\n"); 9762306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 9862306a36Sopenharmony_ci goto out; 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci INIT_RADIX_TREE(uxpgd, GFP_KERNEL); 10162306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 10262306a36Sopenharmony_ci if (mm->uxpgd) 10362306a36Sopenharmony_ci kfree(uxpgd); 10462306a36Sopenharmony_ci else 10562306a36Sopenharmony_ci mm->uxpgd = uxpgd; 10662306a36Sopenharmony_cilookup: 10762306a36Sopenharmony_ci page = radix_tree_lookup(mm->uxpgd, uxpn); 10862306a36Sopenharmony_ci if (page) 10962306a36Sopenharmony_ci goto out; 11062306a36Sopenharmony_ci if (!alloc) 11162306a36Sopenharmony_ci goto out; 11262306a36Sopenharmony_ci spin_unlock(&mm->uxpgd_lock); 11362306a36Sopenharmony_ci new_folio = vma_alloc_zeroed_movable_folio(vma, addr); 11462306a36Sopenharmony_ci if (!new_folio) { 11562306a36Sopenharmony_ci pr_err("uxpte page alloc fail.\n"); 11662306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 11762306a36Sopenharmony_ci goto out; 11862306a36Sopenharmony_ci } 11962306a36Sopenharmony_ci new_page = &new_folio->page; 12062306a36Sopenharmony_ci if (radix_tree_preload(GFP_KERNEL)) { 12162306a36Sopenharmony_ci put_page(new_page); 12262306a36Sopenharmony_ci pr_err("radix preload fail.\n"); 12362306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 12462306a36Sopenharmony_ci goto out; 12562306a36Sopenharmony_ci } 12662306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 12762306a36Sopenharmony_ci page = radix_tree_lookup(mm->uxpgd, uxpn); 12862306a36Sopenharmony_ci if (page) { 12962306a36Sopenharmony_ci put_page(new_page); 13062306a36Sopenharmony_ci } else { 13162306a36Sopenharmony_ci page = new_page; 13262306a36Sopenharmony_ci radix_tree_insert(mm->uxpgd, uxpn, page); 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci radix_tree_preload_end(); 13562306a36Sopenharmony_ciout: 13662306a36Sopenharmony_ci return page; 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci/* should hold uxpgd_lock before invoke */ 14062306a36Sopenharmony_cistatic struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma, 14162306a36Sopenharmony_ci unsigned long addr, bool alloc) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci struct uxpte_t *uxpte = NULL; 14462306a36Sopenharmony_ci struct page *page = NULL; 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci page = lookup_uxpte_page(vma, addr, alloc); 14762306a36Sopenharmony_ci if (!page) 14862306a36Sopenharmony_ci return NULL; 14962306a36Sopenharmony_ci uxpte = page_to_virt(page); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci return uxpte + uxpte_off(addr); 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cibool lock_uxpte(struct vm_area_struct *vma, unsigned long addr) 15562306a36Sopenharmony_ci{ 15662306a36Sopenharmony_ci struct uxpte_t *uxpte = NULL; 15762306a36Sopenharmony_ci long val = 0; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci spin_lock(&vma->vm_mm->uxpgd_lock); 16062306a36Sopenharmony_ci uxpte = lookup_uxpte(vma, addr, true); 16162306a36Sopenharmony_ci if (!uxpte) 16262306a36Sopenharmony_ci goto unlock; 16362306a36Sopenharmony_ciretry: 16462306a36Sopenharmony_ci val = uxpte_read(uxpte); 16562306a36Sopenharmony_ci if (val >> 1) 16662306a36Sopenharmony_ci goto unlock; 16762306a36Sopenharmony_ci if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM)) 16862306a36Sopenharmony_ci goto retry; 16962306a36Sopenharmony_ci val = UXPTE_UNDER_RECLAIM; 17062306a36Sopenharmony_ciunlock: 17162306a36Sopenharmony_ci spin_unlock(&vma->vm_mm->uxpgd_lock); 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci return val == UXPTE_UNDER_RECLAIM; 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_civoid unlock_uxpte(struct vm_area_struct *vma, unsigned long addr) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci struct uxpte_t *uxpte = NULL; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci spin_lock(&vma->vm_mm->uxpgd_lock); 18162306a36Sopenharmony_ci uxpte = lookup_uxpte(vma, addr, false); 18262306a36Sopenharmony_ci if (!uxpte) 18362306a36Sopenharmony_ci goto unlock; 18462306a36Sopenharmony_ci uxpte_set(uxpte, 0); 18562306a36Sopenharmony_ciunlock: 18662306a36Sopenharmony_ci spin_unlock(&vma->vm_mm->uxpgd_lock); 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_cibool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci struct uxpte_t *uxpte = NULL; 19262306a36Sopenharmony_ci long val = 0; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci spin_lock(&vma->vm_mm->uxpgd_lock); 19562306a36Sopenharmony_ci uxpte = lookup_uxpte(vma, addr, true); 19662306a36Sopenharmony_ci if (!uxpte) 19762306a36Sopenharmony_ci goto unlock; 19862306a36Sopenharmony_ciretry: 19962306a36Sopenharmony_ci val = uxpte_read(uxpte); 20062306a36Sopenharmony_ci if (val & 1) 20162306a36Sopenharmony_ci goto unlock; 20262306a36Sopenharmony_ci if (!uxpte_cas(uxpte, val, val + 1)) 20362306a36Sopenharmony_ci goto retry; 20462306a36Sopenharmony_ci val++; 20562306a36Sopenharmony_ciunlock: 20662306a36Sopenharmony_ci spin_unlock(&vma->vm_mm->uxpgd_lock); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci return val & 1; 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_civoid uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci struct uxpte_t *uxpte = NULL; 21462306a36Sopenharmony_ci long val = 0; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci spin_lock(&vma->vm_mm->uxpgd_lock); 21762306a36Sopenharmony_ci uxpte = lookup_uxpte(vma, addr, false); 21862306a36Sopenharmony_ci if (!uxpte) 21962306a36Sopenharmony_ci goto unlock; 22062306a36Sopenharmony_ciretry: 22162306a36Sopenharmony_ci val = uxpte_read(uxpte); 22262306a36Sopenharmony_ci if (!(val & 1)) 22362306a36Sopenharmony_ci goto unlock; 22462306a36Sopenharmony_ci if (!uxpte_cas(uxpte, val, val - 1)) 22562306a36Sopenharmony_ci goto retry; 22662306a36Sopenharmony_ciunlock: 22762306a36Sopenharmony_ci spin_unlock(&vma->vm_mm->uxpgd_lock); 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_civm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 23362306a36Sopenharmony_ci unsigned long vma_uxpn = vma->vm_pgoff; 23462306a36Sopenharmony_ci unsigned long off_uxpn = vpn(vmf->address - vma->vm_start); 23562306a36Sopenharmony_ci unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn); 23662306a36Sopenharmony_ci struct page *page = NULL; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (unlikely(anon_vma_prepare(vma))) 23962306a36Sopenharmony_ci return VM_FAULT_OOM; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci spin_lock(&vma->vm_mm->uxpgd_lock); 24262306a36Sopenharmony_ci page = lookup_uxpte_page(vma, addr, true); 24362306a36Sopenharmony_ci spin_unlock(&vma->vm_mm->uxpgd_lock); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci if (!page) 24662306a36Sopenharmony_ci return VM_FAULT_OOM; 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci *entry = mk_pte(page, vma->vm_page_prot); 24962306a36Sopenharmony_ci *entry = pte_sw_mkyoung(*entry); 25062306a36Sopenharmony_ci if (vma->vm_flags & VM_WRITE) 25162306a36Sopenharmony_ci *entry = pte_mkwrite(pte_mkdirty(*entry), vma); 25262306a36Sopenharmony_ci return 0; 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cistatic void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, 25662306a36Sopenharmony_ci unsigned long *pined_purg_pages) 25762306a36Sopenharmony_ci{ 25862306a36Sopenharmony_ci struct page *page = NULL; 25962306a36Sopenharmony_ci void **slot = NULL; 26062306a36Sopenharmony_ci struct radix_tree_iter iter; 26162306a36Sopenharmony_ci struct uxpte_t *uxpte = NULL; 26262306a36Sopenharmony_ci long pte_entry = 0; 26362306a36Sopenharmony_ci int index = 0; 26462306a36Sopenharmony_ci unsigned long nr_total = 0, nr_pined = 0; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci spin_lock(&mm->uxpgd_lock); 26762306a36Sopenharmony_ci if (!mm->uxpgd) 26862306a36Sopenharmony_ci goto out; 26962306a36Sopenharmony_ci radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { 27062306a36Sopenharmony_ci page = radix_tree_deref_slot(slot); 27162306a36Sopenharmony_ci if (unlikely(!page)) 27262306a36Sopenharmony_ci continue; 27362306a36Sopenharmony_ci uxpte = page_to_virt(page); 27462306a36Sopenharmony_ci for (index = 0; index < UXPTE_PER_PAGE; index++) { 27562306a36Sopenharmony_ci pte_entry = uxpte_read(&(uxpte[index])); 27662306a36Sopenharmony_ci if (uxpte_present(pte_entry) == 0) /* not present */ 27762306a36Sopenharmony_ci continue; 27862306a36Sopenharmony_ci nr_total++; 27962306a36Sopenharmony_ci if (uxpte_refcnt(pte_entry) > 0) /* pined by user */ 28062306a36Sopenharmony_ci nr_pined++; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ciout: 28462306a36Sopenharmony_ci spin_unlock(&mm->uxpgd_lock); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci if (total_purg_pages) 28762306a36Sopenharmony_ci *total_purg_pages = nr_total; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (pined_purg_pages) 29062306a36Sopenharmony_ci *pined_purg_pages = nr_pined; 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_civoid mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, 29462306a36Sopenharmony_ci unsigned long *pined_purg_pages) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci if (unlikely(!mm)) 29762306a36Sopenharmony_ci return; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci if (!total_purg_pages && !pined_purg_pages) 30062306a36Sopenharmony_ci return; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci __mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages); 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_civoid purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci struct task_struct *p = NULL; 30862306a36Sopenharmony_ci struct task_struct *tsk = NULL; 30962306a36Sopenharmony_ci unsigned long mm_nr_purge = 0, mm_nr_pined = 0; 31062306a36Sopenharmony_ci unsigned long nr_total = 0, nr_pined = 0; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci if (!total_purg_pages && !pined_purg_pages) 31362306a36Sopenharmony_ci return; 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci if (total_purg_pages) 31662306a36Sopenharmony_ci *total_purg_pages = 0; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci if (pined_purg_pages) 31962306a36Sopenharmony_ci *pined_purg_pages = 0; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci rcu_read_lock(); 32262306a36Sopenharmony_ci for_each_process(p) { 32362306a36Sopenharmony_ci tsk = find_lock_task_mm(p); 32462306a36Sopenharmony_ci if (!tsk) { 32562306a36Sopenharmony_ci /* 32662306a36Sopenharmony_ci * It is a kthread or all of p's threads have already 32762306a36Sopenharmony_ci * detached their mm's. 32862306a36Sopenharmony_ci */ 32962306a36Sopenharmony_ci continue; 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci __mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined); 33262306a36Sopenharmony_ci nr_total += mm_nr_purge; 33362306a36Sopenharmony_ci nr_pined += mm_nr_pined; 33462306a36Sopenharmony_ci task_unlock(tsk); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci if (mm_nr_purge > 0) { 33762306a36Sopenharmony_ci pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL", 33862306a36Sopenharmony_ci mm_nr_pined, mm_nr_purge); 33962306a36Sopenharmony_ci } 34062306a36Sopenharmony_ci } 34162306a36Sopenharmony_ci rcu_read_unlock(); 34262306a36Sopenharmony_ci if (total_purg_pages) 34362306a36Sopenharmony_ci *total_purg_pages = nr_total; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (pined_purg_pages) 34662306a36Sopenharmony_ci *pined_purg_pages = nr_pined; 34762306a36Sopenharmony_ci pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total); 34862306a36Sopenharmony_ci} 349