162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * DAMON Primitives for Virtual Address Spaces 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author: SeongJae Park <sjpark@amazon.de> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#define pr_fmt(fmt) "damon-va: " fmt 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <asm-generic/mman-common.h> 1162306a36Sopenharmony_ci#include <linux/highmem.h> 1262306a36Sopenharmony_ci#include <linux/hugetlb.h> 1362306a36Sopenharmony_ci#include <linux/mmu_notifier.h> 1462306a36Sopenharmony_ci#include <linux/page_idle.h> 1562306a36Sopenharmony_ci#include <linux/pagewalk.h> 1662306a36Sopenharmony_ci#include <linux/sched/mm.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include "ops-common.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 2162306a36Sopenharmony_ci#undef DAMON_MIN_REGION 2262306a36Sopenharmony_ci#define DAMON_MIN_REGION 1 2362306a36Sopenharmony_ci#endif 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* 2662306a36Sopenharmony_ci * 't->pid' should be the pointer to the relevant 'struct pid' having reference 2762306a36Sopenharmony_ci * count. Caller must put the returned task, unless it is NULL. 2862306a36Sopenharmony_ci */ 2962306a36Sopenharmony_cistatic inline struct task_struct *damon_get_task_struct(struct damon_target *t) 3062306a36Sopenharmony_ci{ 3162306a36Sopenharmony_ci return get_pid_task(t->pid, PIDTYPE_PID); 3262306a36Sopenharmony_ci} 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/* 3562306a36Sopenharmony_ci * Get the mm_struct of the given target 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * Caller _must_ put the mm_struct after use, unless it is NULL. 3862306a36Sopenharmony_ci * 3962306a36Sopenharmony_ci * Returns the mm_struct of the target on success, NULL on failure 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_cistatic struct mm_struct *damon_get_mm(struct damon_target *t) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci struct task_struct *task; 4462306a36Sopenharmony_ci struct mm_struct *mm; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci task = damon_get_task_struct(t); 4762306a36Sopenharmony_ci if (!task) 4862306a36Sopenharmony_ci return NULL; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci mm = get_task_mm(task); 5162306a36Sopenharmony_ci put_task_struct(task); 5262306a36Sopenharmony_ci return mm; 5362306a36Sopenharmony_ci} 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci/* 5662306a36Sopenharmony_ci * Functions for the initial monitoring target regions construction 5762306a36Sopenharmony_ci */ 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci/* 6062306a36Sopenharmony_ci * Size-evenly split a region into 'nr_pieces' small regions 6162306a36Sopenharmony_ci * 6262306a36Sopenharmony_ci * Returns 0 on success, or negative error code otherwise. 6362306a36Sopenharmony_ci */ 6462306a36Sopenharmony_cistatic int damon_va_evenly_split_region(struct damon_target *t, 6562306a36Sopenharmony_ci struct damon_region *r, unsigned int nr_pieces) 6662306a36Sopenharmony_ci{ 6762306a36Sopenharmony_ci unsigned long sz_orig, sz_piece, orig_end; 6862306a36Sopenharmony_ci struct damon_region *n = NULL, *next; 6962306a36Sopenharmony_ci unsigned long start; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci if (!r || !nr_pieces) 7262306a36Sopenharmony_ci return -EINVAL; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci orig_end = r->ar.end; 7562306a36Sopenharmony_ci sz_orig = damon_sz_region(r); 7662306a36Sopenharmony_ci sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci if (!sz_piece) 7962306a36Sopenharmony_ci return -EINVAL; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci r->ar.end = r->ar.start + sz_piece; 8262306a36Sopenharmony_ci next = damon_next_region(r); 8362306a36Sopenharmony_ci for (start = r->ar.end; start + sz_piece <= orig_end; 8462306a36Sopenharmony_ci start += sz_piece) { 8562306a36Sopenharmony_ci n = damon_new_region(start, start + sz_piece); 8662306a36Sopenharmony_ci if (!n) 8762306a36Sopenharmony_ci return -ENOMEM; 8862306a36Sopenharmony_ci damon_insert_region(n, r, next, t); 8962306a36Sopenharmony_ci r = n; 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci /* complement last region for possible rounding error */ 9262306a36Sopenharmony_ci if (n) 9362306a36Sopenharmony_ci n->ar.end = orig_end; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci return 0; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic unsigned long sz_range(struct damon_addr_range *r) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci return r->end - r->start; 10162306a36Sopenharmony_ci} 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci/* 10462306a36Sopenharmony_ci * Find three regions separated by two biggest unmapped regions 10562306a36Sopenharmony_ci * 10662306a36Sopenharmony_ci * vma the head vma of the target address space 10762306a36Sopenharmony_ci * regions an array of three address ranges that results will be saved 10862306a36Sopenharmony_ci * 10962306a36Sopenharmony_ci * This function receives an address space and finds three regions in it which 11062306a36Sopenharmony_ci * separated by the two biggest unmapped regions in the space. Please refer to 11162306a36Sopenharmony_ci * below comments of '__damon_va_init_regions()' function to know why this is 11262306a36Sopenharmony_ci * necessary. 11362306a36Sopenharmony_ci * 11462306a36Sopenharmony_ci * Returns 0 if success, or negative error code otherwise. 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_cistatic int __damon_va_three_regions(struct mm_struct *mm, 11762306a36Sopenharmony_ci struct damon_addr_range regions[3]) 11862306a36Sopenharmony_ci{ 11962306a36Sopenharmony_ci struct damon_addr_range first_gap = {0}, second_gap = {0}; 12062306a36Sopenharmony_ci VMA_ITERATOR(vmi, mm, 0); 12162306a36Sopenharmony_ci struct vm_area_struct *vma, *prev = NULL; 12262306a36Sopenharmony_ci unsigned long start; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* 12562306a36Sopenharmony_ci * Find the two biggest gaps so that first_gap > second_gap > others. 12662306a36Sopenharmony_ci * If this is too slow, it can be optimised to examine the maple 12762306a36Sopenharmony_ci * tree gaps. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci for_each_vma(vmi, vma) { 13062306a36Sopenharmony_ci unsigned long gap; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci if (!prev) { 13362306a36Sopenharmony_ci start = vma->vm_start; 13462306a36Sopenharmony_ci goto next; 13562306a36Sopenharmony_ci } 13662306a36Sopenharmony_ci gap = vma->vm_start - prev->vm_end; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (gap > sz_range(&first_gap)) { 13962306a36Sopenharmony_ci second_gap = first_gap; 14062306a36Sopenharmony_ci first_gap.start = prev->vm_end; 14162306a36Sopenharmony_ci first_gap.end = vma->vm_start; 14262306a36Sopenharmony_ci } else if (gap > sz_range(&second_gap)) { 14362306a36Sopenharmony_ci second_gap.start = prev->vm_end; 14462306a36Sopenharmony_ci second_gap.end = vma->vm_start; 14562306a36Sopenharmony_ci } 14662306a36Sopenharmony_cinext: 14762306a36Sopenharmony_ci prev = vma; 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci if (!sz_range(&second_gap) || !sz_range(&first_gap)) 15162306a36Sopenharmony_ci return -EINVAL; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci /* Sort the two biggest gaps by address */ 15462306a36Sopenharmony_ci if (first_gap.start > second_gap.start) 15562306a36Sopenharmony_ci swap(first_gap, second_gap); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* Store the result */ 15862306a36Sopenharmony_ci regions[0].start = ALIGN(start, DAMON_MIN_REGION); 15962306a36Sopenharmony_ci regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); 16062306a36Sopenharmony_ci regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); 16162306a36Sopenharmony_ci regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); 16262306a36Sopenharmony_ci regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); 16362306a36Sopenharmony_ci regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci return 0; 16662306a36Sopenharmony_ci} 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci/* 16962306a36Sopenharmony_ci * Get the three regions in the given target (task) 17062306a36Sopenharmony_ci * 17162306a36Sopenharmony_ci * Returns 0 on success, negative error code otherwise. 17262306a36Sopenharmony_ci */ 17362306a36Sopenharmony_cistatic int damon_va_three_regions(struct damon_target *t, 17462306a36Sopenharmony_ci struct damon_addr_range regions[3]) 17562306a36Sopenharmony_ci{ 17662306a36Sopenharmony_ci struct mm_struct *mm; 17762306a36Sopenharmony_ci int rc; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci mm = damon_get_mm(t); 18062306a36Sopenharmony_ci if (!mm) 18162306a36Sopenharmony_ci return -EINVAL; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci mmap_read_lock(mm); 18462306a36Sopenharmony_ci rc = __damon_va_three_regions(mm, regions); 18562306a36Sopenharmony_ci mmap_read_unlock(mm); 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci mmput(mm); 18862306a36Sopenharmony_ci return rc; 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci/* 19262306a36Sopenharmony_ci * Initialize the monitoring target regions for the given target (task) 19362306a36Sopenharmony_ci * 19462306a36Sopenharmony_ci * t the given target 19562306a36Sopenharmony_ci * 19662306a36Sopenharmony_ci * Because only a number of small portions of the entire address space 19762306a36Sopenharmony_ci * is actually mapped to the memory and accessed, monitoring the unmapped 19862306a36Sopenharmony_ci * regions is wasteful. That said, because we can deal with small noises, 19962306a36Sopenharmony_ci * tracking every mapping is not strictly required but could even incur a high 20062306a36Sopenharmony_ci * overhead if the mapping frequently changes or the number of mappings is 20162306a36Sopenharmony_ci * high. The adaptive regions adjustment mechanism will further help to deal 20262306a36Sopenharmony_ci * with the noise by simply identifying the unmapped areas as a region that 20362306a36Sopenharmony_ci * has no access. Moreover, applying the real mappings that would have many 20462306a36Sopenharmony_ci * unmapped areas inside will make the adaptive mechanism quite complex. That 20562306a36Sopenharmony_ci * said, too huge unmapped areas inside the monitoring target should be removed 20662306a36Sopenharmony_ci * to not take the time for the adaptive mechanism. 20762306a36Sopenharmony_ci * 20862306a36Sopenharmony_ci * For the reason, we convert the complex mappings to three distinct regions 20962306a36Sopenharmony_ci * that cover every mapped area of the address space. Also the two gaps 21062306a36Sopenharmony_ci * between the three regions are the two biggest unmapped areas in the given 21162306a36Sopenharmony_ci * address space. In detail, this function first identifies the start and the 21262306a36Sopenharmony_ci * end of the mappings and the two biggest unmapped areas of the address space. 21362306a36Sopenharmony_ci * Then, it constructs the three regions as below: 21462306a36Sopenharmony_ci * 21562306a36Sopenharmony_ci * [mappings[0]->start, big_two_unmapped_areas[0]->start) 21662306a36Sopenharmony_ci * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 21762306a36Sopenharmony_ci * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 21862306a36Sopenharmony_ci * 21962306a36Sopenharmony_ci * As usual memory map of processes is as below, the gap between the heap and 22062306a36Sopenharmony_ci * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 22162306a36Sopenharmony_ci * region and the stack will be two biggest unmapped regions. Because these 22262306a36Sopenharmony_ci * gaps are exceptionally huge areas in usual address space, excluding these 22362306a36Sopenharmony_ci * two biggest unmapped regions will be sufficient to make a trade-off. 22462306a36Sopenharmony_ci * 22562306a36Sopenharmony_ci * <heap> 22662306a36Sopenharmony_ci * <BIG UNMAPPED REGION 1> 22762306a36Sopenharmony_ci * <uppermost mmap()-ed region> 22862306a36Sopenharmony_ci * (other mmap()-ed regions and small unmapped regions) 22962306a36Sopenharmony_ci * <lowermost mmap()-ed region> 23062306a36Sopenharmony_ci * <BIG UNMAPPED REGION 2> 23162306a36Sopenharmony_ci * <stack> 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_cistatic void __damon_va_init_regions(struct damon_ctx *ctx, 23462306a36Sopenharmony_ci struct damon_target *t) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci struct damon_target *ti; 23762306a36Sopenharmony_ci struct damon_region *r; 23862306a36Sopenharmony_ci struct damon_addr_range regions[3]; 23962306a36Sopenharmony_ci unsigned long sz = 0, nr_pieces; 24062306a36Sopenharmony_ci int i, tidx = 0; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci if (damon_va_three_regions(t, regions)) { 24362306a36Sopenharmony_ci damon_for_each_target(ti, ctx) { 24462306a36Sopenharmony_ci if (ti == t) 24562306a36Sopenharmony_ci break; 24662306a36Sopenharmony_ci tidx++; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci pr_debug("Failed to get three regions of %dth target\n", tidx); 24962306a36Sopenharmony_ci return; 25062306a36Sopenharmony_ci } 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci for (i = 0; i < 3; i++) 25362306a36Sopenharmony_ci sz += regions[i].end - regions[i].start; 25462306a36Sopenharmony_ci if (ctx->attrs.min_nr_regions) 25562306a36Sopenharmony_ci sz /= ctx->attrs.min_nr_regions; 25662306a36Sopenharmony_ci if (sz < DAMON_MIN_REGION) 25762306a36Sopenharmony_ci sz = DAMON_MIN_REGION; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci /* Set the initial three regions of the target */ 26062306a36Sopenharmony_ci for (i = 0; i < 3; i++) { 26162306a36Sopenharmony_ci r = damon_new_region(regions[i].start, regions[i].end); 26262306a36Sopenharmony_ci if (!r) { 26362306a36Sopenharmony_ci pr_err("%d'th init region creation failed\n", i); 26462306a36Sopenharmony_ci return; 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci damon_add_region(r, t); 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci nr_pieces = (regions[i].end - regions[i].start) / sz; 26962306a36Sopenharmony_ci damon_va_evenly_split_region(t, r, nr_pieces); 27062306a36Sopenharmony_ci } 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci/* Initialize '->regions_list' of every target (task) */ 27462306a36Sopenharmony_cistatic void damon_va_init(struct damon_ctx *ctx) 27562306a36Sopenharmony_ci{ 27662306a36Sopenharmony_ci struct damon_target *t; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci damon_for_each_target(t, ctx) { 27962306a36Sopenharmony_ci /* the user may set the target regions as they want */ 28062306a36Sopenharmony_ci if (!damon_nr_regions(t)) 28162306a36Sopenharmony_ci __damon_va_init_regions(ctx, t); 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci/* 28662306a36Sopenharmony_ci * Update regions for current memory mappings 28762306a36Sopenharmony_ci */ 28862306a36Sopenharmony_cistatic void damon_va_update(struct damon_ctx *ctx) 28962306a36Sopenharmony_ci{ 29062306a36Sopenharmony_ci struct damon_addr_range three_regions[3]; 29162306a36Sopenharmony_ci struct damon_target *t; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci damon_for_each_target(t, ctx) { 29462306a36Sopenharmony_ci if (damon_va_three_regions(t, three_regions)) 29562306a36Sopenharmony_ci continue; 29662306a36Sopenharmony_ci damon_set_regions(t, three_regions, 3); 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci} 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_cistatic int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 30162306a36Sopenharmony_ci unsigned long next, struct mm_walk *walk) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci pte_t *pte; 30462306a36Sopenharmony_ci pmd_t pmde; 30562306a36Sopenharmony_ci spinlock_t *ptl; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci if (pmd_trans_huge(pmdp_get(pmd))) { 30862306a36Sopenharmony_ci ptl = pmd_lock(walk->mm, pmd); 30962306a36Sopenharmony_ci pmde = pmdp_get(pmd); 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci if (!pmd_present(pmde)) { 31262306a36Sopenharmony_ci spin_unlock(ptl); 31362306a36Sopenharmony_ci return 0; 31462306a36Sopenharmony_ci } 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (pmd_trans_huge(pmde)) { 31762306a36Sopenharmony_ci damon_pmdp_mkold(pmd, walk->vma, addr); 31862306a36Sopenharmony_ci spin_unlock(ptl); 31962306a36Sopenharmony_ci return 0; 32062306a36Sopenharmony_ci } 32162306a36Sopenharmony_ci spin_unlock(ptl); 32262306a36Sopenharmony_ci } 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 32562306a36Sopenharmony_ci if (!pte) { 32662306a36Sopenharmony_ci walk->action = ACTION_AGAIN; 32762306a36Sopenharmony_ci return 0; 32862306a36Sopenharmony_ci } 32962306a36Sopenharmony_ci if (!pte_present(ptep_get(pte))) 33062306a36Sopenharmony_ci goto out; 33162306a36Sopenharmony_ci damon_ptep_mkold(pte, walk->vma, addr); 33262306a36Sopenharmony_ciout: 33362306a36Sopenharmony_ci pte_unmap_unlock(pte, ptl); 33462306a36Sopenharmony_ci return 0; 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE 33862306a36Sopenharmony_cistatic void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 33962306a36Sopenharmony_ci struct vm_area_struct *vma, unsigned long addr) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci bool referenced = false; 34262306a36Sopenharmony_ci pte_t entry = huge_ptep_get(pte); 34362306a36Sopenharmony_ci struct folio *folio = pfn_folio(pte_pfn(entry)); 34462306a36Sopenharmony_ci unsigned long psize = huge_page_size(hstate_vma(vma)); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci folio_get(folio); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci if (pte_young(entry)) { 34962306a36Sopenharmony_ci referenced = true; 35062306a36Sopenharmony_ci entry = pte_mkold(entry); 35162306a36Sopenharmony_ci set_huge_pte_at(mm, addr, pte, entry, psize); 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci#ifdef CONFIG_MMU_NOTIFIER 35562306a36Sopenharmony_ci if (mmu_notifier_clear_young(mm, addr, 35662306a36Sopenharmony_ci addr + huge_page_size(hstate_vma(vma)))) 35762306a36Sopenharmony_ci referenced = true; 35862306a36Sopenharmony_ci#endif /* CONFIG_MMU_NOTIFIER */ 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci if (referenced) 36162306a36Sopenharmony_ci folio_set_young(folio); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci folio_set_idle(folio); 36462306a36Sopenharmony_ci folio_put(folio); 36562306a36Sopenharmony_ci} 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_cistatic int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 36862306a36Sopenharmony_ci unsigned long addr, unsigned long end, 36962306a36Sopenharmony_ci struct mm_walk *walk) 37062306a36Sopenharmony_ci{ 37162306a36Sopenharmony_ci struct hstate *h = hstate_vma(walk->vma); 37262306a36Sopenharmony_ci spinlock_t *ptl; 37362306a36Sopenharmony_ci pte_t entry; 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci ptl = huge_pte_lock(h, walk->mm, pte); 37662306a36Sopenharmony_ci entry = huge_ptep_get(pte); 37762306a36Sopenharmony_ci if (!pte_present(entry)) 37862306a36Sopenharmony_ci goto out; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ciout: 38362306a36Sopenharmony_ci spin_unlock(ptl); 38462306a36Sopenharmony_ci return 0; 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ci#else 38762306a36Sopenharmony_ci#define damon_mkold_hugetlb_entry NULL 38862306a36Sopenharmony_ci#endif /* CONFIG_HUGETLB_PAGE */ 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic const struct mm_walk_ops damon_mkold_ops = { 39162306a36Sopenharmony_ci .pmd_entry = damon_mkold_pmd_entry, 39262306a36Sopenharmony_ci .hugetlb_entry = damon_mkold_hugetlb_entry, 39362306a36Sopenharmony_ci .walk_lock = PGWALK_RDLOCK, 39462306a36Sopenharmony_ci}; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_cistatic void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci mmap_read_lock(mm); 39962306a36Sopenharmony_ci walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 40062306a36Sopenharmony_ci mmap_read_unlock(mm); 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci/* 40462306a36Sopenharmony_ci * Functions for the access checking of the regions 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_cistatic void __damon_va_prepare_access_check(struct mm_struct *mm, 40862306a36Sopenharmony_ci struct damon_region *r) 40962306a36Sopenharmony_ci{ 41062306a36Sopenharmony_ci r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci damon_va_mkold(mm, r->sampling_addr); 41362306a36Sopenharmony_ci} 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_cistatic void damon_va_prepare_access_checks(struct damon_ctx *ctx) 41662306a36Sopenharmony_ci{ 41762306a36Sopenharmony_ci struct damon_target *t; 41862306a36Sopenharmony_ci struct mm_struct *mm; 41962306a36Sopenharmony_ci struct damon_region *r; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci damon_for_each_target(t, ctx) { 42262306a36Sopenharmony_ci mm = damon_get_mm(t); 42362306a36Sopenharmony_ci if (!mm) 42462306a36Sopenharmony_ci continue; 42562306a36Sopenharmony_ci damon_for_each_region(r, t) 42662306a36Sopenharmony_ci __damon_va_prepare_access_check(mm, r); 42762306a36Sopenharmony_ci mmput(mm); 42862306a36Sopenharmony_ci } 42962306a36Sopenharmony_ci} 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_cistruct damon_young_walk_private { 43262306a36Sopenharmony_ci /* size of the folio for the access checked virtual memory address */ 43362306a36Sopenharmony_ci unsigned long *folio_sz; 43462306a36Sopenharmony_ci bool young; 43562306a36Sopenharmony_ci}; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_cistatic int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 43862306a36Sopenharmony_ci unsigned long next, struct mm_walk *walk) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci pte_t *pte; 44162306a36Sopenharmony_ci pte_t ptent; 44262306a36Sopenharmony_ci spinlock_t *ptl; 44362306a36Sopenharmony_ci struct folio *folio; 44462306a36Sopenharmony_ci struct damon_young_walk_private *priv = walk->private; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 44762306a36Sopenharmony_ci if (pmd_trans_huge(pmdp_get(pmd))) { 44862306a36Sopenharmony_ci pmd_t pmde; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci ptl = pmd_lock(walk->mm, pmd); 45162306a36Sopenharmony_ci pmde = pmdp_get(pmd); 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci if (!pmd_present(pmde)) { 45462306a36Sopenharmony_ci spin_unlock(ptl); 45562306a36Sopenharmony_ci return 0; 45662306a36Sopenharmony_ci } 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci if (!pmd_trans_huge(pmde)) { 45962306a36Sopenharmony_ci spin_unlock(ptl); 46062306a36Sopenharmony_ci goto regular_page; 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci folio = damon_get_folio(pmd_pfn(pmde)); 46362306a36Sopenharmony_ci if (!folio) 46462306a36Sopenharmony_ci goto huge_out; 46562306a36Sopenharmony_ci if (pmd_young(pmde) || !folio_test_idle(folio) || 46662306a36Sopenharmony_ci mmu_notifier_test_young(walk->mm, 46762306a36Sopenharmony_ci addr)) 46862306a36Sopenharmony_ci priv->young = true; 46962306a36Sopenharmony_ci *priv->folio_sz = HPAGE_PMD_SIZE; 47062306a36Sopenharmony_ci folio_put(folio); 47162306a36Sopenharmony_cihuge_out: 47262306a36Sopenharmony_ci spin_unlock(ptl); 47362306a36Sopenharmony_ci return 0; 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ciregular_page: 47762306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 48062306a36Sopenharmony_ci if (!pte) { 48162306a36Sopenharmony_ci walk->action = ACTION_AGAIN; 48262306a36Sopenharmony_ci return 0; 48362306a36Sopenharmony_ci } 48462306a36Sopenharmony_ci ptent = ptep_get(pte); 48562306a36Sopenharmony_ci if (!pte_present(ptent)) 48662306a36Sopenharmony_ci goto out; 48762306a36Sopenharmony_ci folio = damon_get_folio(pte_pfn(ptent)); 48862306a36Sopenharmony_ci if (!folio) 48962306a36Sopenharmony_ci goto out; 49062306a36Sopenharmony_ci if (pte_young(ptent) || !folio_test_idle(folio) || 49162306a36Sopenharmony_ci mmu_notifier_test_young(walk->mm, addr)) 49262306a36Sopenharmony_ci priv->young = true; 49362306a36Sopenharmony_ci *priv->folio_sz = folio_size(folio); 49462306a36Sopenharmony_ci folio_put(folio); 49562306a36Sopenharmony_ciout: 49662306a36Sopenharmony_ci pte_unmap_unlock(pte, ptl); 49762306a36Sopenharmony_ci return 0; 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE 50162306a36Sopenharmony_cistatic int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 50262306a36Sopenharmony_ci unsigned long addr, unsigned long end, 50362306a36Sopenharmony_ci struct mm_walk *walk) 50462306a36Sopenharmony_ci{ 50562306a36Sopenharmony_ci struct damon_young_walk_private *priv = walk->private; 50662306a36Sopenharmony_ci struct hstate *h = hstate_vma(walk->vma); 50762306a36Sopenharmony_ci struct folio *folio; 50862306a36Sopenharmony_ci spinlock_t *ptl; 50962306a36Sopenharmony_ci pte_t entry; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci ptl = huge_pte_lock(h, walk->mm, pte); 51262306a36Sopenharmony_ci entry = huge_ptep_get(pte); 51362306a36Sopenharmony_ci if (!pte_present(entry)) 51462306a36Sopenharmony_ci goto out; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci folio = pfn_folio(pte_pfn(entry)); 51762306a36Sopenharmony_ci folio_get(folio); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (pte_young(entry) || !folio_test_idle(folio) || 52062306a36Sopenharmony_ci mmu_notifier_test_young(walk->mm, addr)) 52162306a36Sopenharmony_ci priv->young = true; 52262306a36Sopenharmony_ci *priv->folio_sz = huge_page_size(h); 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci folio_put(folio); 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ciout: 52762306a36Sopenharmony_ci spin_unlock(ptl); 52862306a36Sopenharmony_ci return 0; 52962306a36Sopenharmony_ci} 53062306a36Sopenharmony_ci#else 53162306a36Sopenharmony_ci#define damon_young_hugetlb_entry NULL 53262306a36Sopenharmony_ci#endif /* CONFIG_HUGETLB_PAGE */ 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_cistatic const struct mm_walk_ops damon_young_ops = { 53562306a36Sopenharmony_ci .pmd_entry = damon_young_pmd_entry, 53662306a36Sopenharmony_ci .hugetlb_entry = damon_young_hugetlb_entry, 53762306a36Sopenharmony_ci .walk_lock = PGWALK_RDLOCK, 53862306a36Sopenharmony_ci}; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_cistatic bool damon_va_young(struct mm_struct *mm, unsigned long addr, 54162306a36Sopenharmony_ci unsigned long *folio_sz) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci struct damon_young_walk_private arg = { 54462306a36Sopenharmony_ci .folio_sz = folio_sz, 54562306a36Sopenharmony_ci .young = false, 54662306a36Sopenharmony_ci }; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci mmap_read_lock(mm); 54962306a36Sopenharmony_ci walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 55062306a36Sopenharmony_ci mmap_read_unlock(mm); 55162306a36Sopenharmony_ci return arg.young; 55262306a36Sopenharmony_ci} 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci/* 55562306a36Sopenharmony_ci * Check whether the region was accessed after the last preparation 55662306a36Sopenharmony_ci * 55762306a36Sopenharmony_ci * mm 'mm_struct' for the given virtual address space 55862306a36Sopenharmony_ci * r the region to be checked 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_cistatic void __damon_va_check_access(struct mm_struct *mm, 56162306a36Sopenharmony_ci struct damon_region *r, bool same_target) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci static unsigned long last_addr; 56462306a36Sopenharmony_ci static unsigned long last_folio_sz = PAGE_SIZE; 56562306a36Sopenharmony_ci static bool last_accessed; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci /* If the region is in the last checked page, reuse the result */ 56862306a36Sopenharmony_ci if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == 56962306a36Sopenharmony_ci ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { 57062306a36Sopenharmony_ci if (last_accessed) 57162306a36Sopenharmony_ci r->nr_accesses++; 57262306a36Sopenharmony_ci return; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); 57662306a36Sopenharmony_ci if (last_accessed) 57762306a36Sopenharmony_ci r->nr_accesses++; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci last_addr = r->sampling_addr; 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_cistatic unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 58362306a36Sopenharmony_ci{ 58462306a36Sopenharmony_ci struct damon_target *t; 58562306a36Sopenharmony_ci struct mm_struct *mm; 58662306a36Sopenharmony_ci struct damon_region *r; 58762306a36Sopenharmony_ci unsigned int max_nr_accesses = 0; 58862306a36Sopenharmony_ci bool same_target; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci damon_for_each_target(t, ctx) { 59162306a36Sopenharmony_ci mm = damon_get_mm(t); 59262306a36Sopenharmony_ci if (!mm) 59362306a36Sopenharmony_ci continue; 59462306a36Sopenharmony_ci same_target = false; 59562306a36Sopenharmony_ci damon_for_each_region(r, t) { 59662306a36Sopenharmony_ci __damon_va_check_access(mm, r, same_target); 59762306a36Sopenharmony_ci max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 59862306a36Sopenharmony_ci same_target = true; 59962306a36Sopenharmony_ci } 60062306a36Sopenharmony_ci mmput(mm); 60162306a36Sopenharmony_ci } 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci return max_nr_accesses; 60462306a36Sopenharmony_ci} 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci/* 60762306a36Sopenharmony_ci * Functions for the target validity check and cleanup 60862306a36Sopenharmony_ci */ 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_cistatic bool damon_va_target_valid(struct damon_target *t) 61162306a36Sopenharmony_ci{ 61262306a36Sopenharmony_ci struct task_struct *task; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci task = damon_get_task_struct(t); 61562306a36Sopenharmony_ci if (task) { 61662306a36Sopenharmony_ci put_task_struct(task); 61762306a36Sopenharmony_ci return true; 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci return false; 62162306a36Sopenharmony_ci} 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci#ifndef CONFIG_ADVISE_SYSCALLS 62462306a36Sopenharmony_cistatic unsigned long damos_madvise(struct damon_target *target, 62562306a36Sopenharmony_ci struct damon_region *r, int behavior) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci return 0; 62862306a36Sopenharmony_ci} 62962306a36Sopenharmony_ci#else 63062306a36Sopenharmony_cistatic unsigned long damos_madvise(struct damon_target *target, 63162306a36Sopenharmony_ci struct damon_region *r, int behavior) 63262306a36Sopenharmony_ci{ 63362306a36Sopenharmony_ci struct mm_struct *mm; 63462306a36Sopenharmony_ci unsigned long start = PAGE_ALIGN(r->ar.start); 63562306a36Sopenharmony_ci unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 63662306a36Sopenharmony_ci unsigned long applied; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci mm = damon_get_mm(target); 63962306a36Sopenharmony_ci if (!mm) 64062306a36Sopenharmony_ci return 0; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci applied = do_madvise(mm, start, len, behavior) ? 0 : len; 64362306a36Sopenharmony_ci mmput(mm); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci return applied; 64662306a36Sopenharmony_ci} 64762306a36Sopenharmony_ci#endif /* CONFIG_ADVISE_SYSCALLS */ 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_cistatic unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 65062306a36Sopenharmony_ci struct damon_target *t, struct damon_region *r, 65162306a36Sopenharmony_ci struct damos *scheme) 65262306a36Sopenharmony_ci{ 65362306a36Sopenharmony_ci int madv_action; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci switch (scheme->action) { 65662306a36Sopenharmony_ci case DAMOS_WILLNEED: 65762306a36Sopenharmony_ci madv_action = MADV_WILLNEED; 65862306a36Sopenharmony_ci break; 65962306a36Sopenharmony_ci case DAMOS_COLD: 66062306a36Sopenharmony_ci madv_action = MADV_COLD; 66162306a36Sopenharmony_ci break; 66262306a36Sopenharmony_ci case DAMOS_PAGEOUT: 66362306a36Sopenharmony_ci madv_action = MADV_PAGEOUT; 66462306a36Sopenharmony_ci break; 66562306a36Sopenharmony_ci case DAMOS_HUGEPAGE: 66662306a36Sopenharmony_ci madv_action = MADV_HUGEPAGE; 66762306a36Sopenharmony_ci break; 66862306a36Sopenharmony_ci case DAMOS_NOHUGEPAGE: 66962306a36Sopenharmony_ci madv_action = MADV_NOHUGEPAGE; 67062306a36Sopenharmony_ci break; 67162306a36Sopenharmony_ci case DAMOS_STAT: 67262306a36Sopenharmony_ci return 0; 67362306a36Sopenharmony_ci default: 67462306a36Sopenharmony_ci /* 67562306a36Sopenharmony_ci * DAMOS actions that are not yet supported by 'vaddr'. 67662306a36Sopenharmony_ci */ 67762306a36Sopenharmony_ci return 0; 67862306a36Sopenharmony_ci } 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci return damos_madvise(t, r, madv_action); 68162306a36Sopenharmony_ci} 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_cistatic int damon_va_scheme_score(struct damon_ctx *context, 68462306a36Sopenharmony_ci struct damon_target *t, struct damon_region *r, 68562306a36Sopenharmony_ci struct damos *scheme) 68662306a36Sopenharmony_ci{ 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci switch (scheme->action) { 68962306a36Sopenharmony_ci case DAMOS_PAGEOUT: 69062306a36Sopenharmony_ci return damon_cold_score(context, r, scheme); 69162306a36Sopenharmony_ci default: 69262306a36Sopenharmony_ci break; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci return DAMOS_MAX_SCORE; 69662306a36Sopenharmony_ci} 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_cistatic int __init damon_va_initcall(void) 69962306a36Sopenharmony_ci{ 70062306a36Sopenharmony_ci struct damon_operations ops = { 70162306a36Sopenharmony_ci .id = DAMON_OPS_VADDR, 70262306a36Sopenharmony_ci .init = damon_va_init, 70362306a36Sopenharmony_ci .update = damon_va_update, 70462306a36Sopenharmony_ci .prepare_access_checks = damon_va_prepare_access_checks, 70562306a36Sopenharmony_ci .check_accesses = damon_va_check_accesses, 70662306a36Sopenharmony_ci .reset_aggregated = NULL, 70762306a36Sopenharmony_ci .target_valid = damon_va_target_valid, 70862306a36Sopenharmony_ci .cleanup = NULL, 70962306a36Sopenharmony_ci .apply_scheme = damon_va_apply_scheme, 71062306a36Sopenharmony_ci .get_scheme_score = damon_va_scheme_score, 71162306a36Sopenharmony_ci }; 71262306a36Sopenharmony_ci /* ops for fixed virtual address ranges */ 71362306a36Sopenharmony_ci struct damon_operations ops_fvaddr = ops; 71462306a36Sopenharmony_ci int err; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci /* Don't set the monitoring target regions for the entire mapping */ 71762306a36Sopenharmony_ci ops_fvaddr.id = DAMON_OPS_FVADDR; 71862306a36Sopenharmony_ci ops_fvaddr.init = NULL; 71962306a36Sopenharmony_ci ops_fvaddr.update = NULL; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci err = damon_register_ops(&ops); 72262306a36Sopenharmony_ci if (err) 72362306a36Sopenharmony_ci return err; 72462306a36Sopenharmony_ci return damon_register_ops(&ops_fvaddr); 72562306a36Sopenharmony_ci}; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_cisubsys_initcall(damon_va_initcall); 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci#include "vaddr-test.h" 730