162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/pagewalk.h> 362306a36Sopenharmony_ci#include <linux/highmem.h> 462306a36Sopenharmony_ci#include <linux/sched.h> 562306a36Sopenharmony_ci#include <linux/hugetlb.h> 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci/* 862306a36Sopenharmony_ci * We want to know the real level where a entry is located ignoring any 962306a36Sopenharmony_ci * folding of levels which may be happening. For example if p4d is folded then 1062306a36Sopenharmony_ci * a missing entry found at level 1 (p4d) is actually at level 0 (pgd). 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_cistatic int real_depth(int depth) 1362306a36Sopenharmony_ci{ 1462306a36Sopenharmony_ci if (depth == 3 && PTRS_PER_PMD == 1) 1562306a36Sopenharmony_ci depth = 2; 1662306a36Sopenharmony_ci if (depth == 2 && PTRS_PER_PUD == 1) 1762306a36Sopenharmony_ci depth = 1; 1862306a36Sopenharmony_ci if (depth == 1 && PTRS_PER_P4D == 1) 1962306a36Sopenharmony_ci depth = 0; 2062306a36Sopenharmony_ci return depth; 2162306a36Sopenharmony_ci} 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cistatic int walk_pte_range_inner(pte_t *pte, unsigned long addr, 2462306a36Sopenharmony_ci unsigned long end, struct mm_walk *walk) 2562306a36Sopenharmony_ci{ 2662306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 2762306a36Sopenharmony_ci int err = 0; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci for (;;) { 3062306a36Sopenharmony_ci err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); 3162306a36Sopenharmony_ci if (err) 3262306a36Sopenharmony_ci break; 3362306a36Sopenharmony_ci if (addr >= end - PAGE_SIZE) 3462306a36Sopenharmony_ci break; 3562306a36Sopenharmony_ci addr += PAGE_SIZE; 3662306a36Sopenharmony_ci pte++; 3762306a36Sopenharmony_ci } 3862306a36Sopenharmony_ci return err; 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 4262306a36Sopenharmony_ci struct mm_walk *walk) 4362306a36Sopenharmony_ci{ 4462306a36Sopenharmony_ci pte_t *pte; 4562306a36Sopenharmony_ci int err = 0; 4662306a36Sopenharmony_ci spinlock_t *ptl; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci if (walk->no_vma) { 4962306a36Sopenharmony_ci /* 5062306a36Sopenharmony_ci * pte_offset_map() might apply user-specific validation. 5162306a36Sopenharmony_ci * Indeed, on x86_64 the pmd entries set up by init_espfix_ap() 5262306a36Sopenharmony_ci * fit its pmd_bad() check (_PAGE_NX set and _PAGE_RW clear), 5362306a36Sopenharmony_ci * and CONFIG_EFI_PGT_DUMP efi_mm goes so far as to walk them. 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_ci if (walk->mm == &init_mm || addr >= TASK_SIZE) 5662306a36Sopenharmony_ci pte = pte_offset_kernel(pmd, addr); 5762306a36Sopenharmony_ci else 5862306a36Sopenharmony_ci pte = pte_offset_map(pmd, addr); 5962306a36Sopenharmony_ci if (pte) { 6062306a36Sopenharmony_ci err = walk_pte_range_inner(pte, addr, end, walk); 6162306a36Sopenharmony_ci if (walk->mm != &init_mm && addr < TASK_SIZE) 6262306a36Sopenharmony_ci pte_unmap(pte); 6362306a36Sopenharmony_ci } 6462306a36Sopenharmony_ci } else { 6562306a36Sopenharmony_ci pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 6662306a36Sopenharmony_ci if (pte) { 6762306a36Sopenharmony_ci err = walk_pte_range_inner(pte, addr, end, walk); 6862306a36Sopenharmony_ci pte_unmap_unlock(pte, ptl); 6962306a36Sopenharmony_ci } 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci if (!pte) 7262306a36Sopenharmony_ci walk->action = ACTION_AGAIN; 7362306a36Sopenharmony_ci return err; 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci#ifdef CONFIG_ARCH_HAS_HUGEPD 7762306a36Sopenharmony_cistatic int walk_hugepd_range(hugepd_t *phpd, unsigned long addr, 7862306a36Sopenharmony_ci unsigned long end, struct mm_walk *walk, int pdshift) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci int err = 0; 8162306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 8262306a36Sopenharmony_ci int shift = hugepd_shift(*phpd); 8362306a36Sopenharmony_ci int page_size = 1 << shift; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci if (!ops->pte_entry) 8662306a36Sopenharmony_ci return 0; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci if (addr & (page_size - 1)) 8962306a36Sopenharmony_ci return 0; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci for (;;) { 9262306a36Sopenharmony_ci pte_t *pte; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci spin_lock(&walk->mm->page_table_lock); 9562306a36Sopenharmony_ci pte = hugepte_offset(*phpd, addr, pdshift); 9662306a36Sopenharmony_ci err = ops->pte_entry(pte, addr, addr + page_size, walk); 9762306a36Sopenharmony_ci spin_unlock(&walk->mm->page_table_lock); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci if (err) 10062306a36Sopenharmony_ci break; 10162306a36Sopenharmony_ci if (addr >= end - page_size) 10262306a36Sopenharmony_ci break; 10362306a36Sopenharmony_ci addr += page_size; 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci return err; 10662306a36Sopenharmony_ci} 10762306a36Sopenharmony_ci#else 10862306a36Sopenharmony_cistatic int walk_hugepd_range(hugepd_t *phpd, unsigned long addr, 10962306a36Sopenharmony_ci unsigned long end, struct mm_walk *walk, int pdshift) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci return 0; 11262306a36Sopenharmony_ci} 11362306a36Sopenharmony_ci#endif 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_cistatic int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, 11662306a36Sopenharmony_ci struct mm_walk *walk) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci pmd_t *pmd; 11962306a36Sopenharmony_ci unsigned long next; 12062306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 12162306a36Sopenharmony_ci int err = 0; 12262306a36Sopenharmony_ci int depth = real_depth(3); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 12562306a36Sopenharmony_ci do { 12662306a36Sopenharmony_ciagain: 12762306a36Sopenharmony_ci next = pmd_addr_end(addr, end); 12862306a36Sopenharmony_ci if (pmd_none(*pmd)) { 12962306a36Sopenharmony_ci if (ops->pte_hole) 13062306a36Sopenharmony_ci err = ops->pte_hole(addr, next, depth, walk); 13162306a36Sopenharmony_ci if (err) 13262306a36Sopenharmony_ci break; 13362306a36Sopenharmony_ci continue; 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci walk->action = ACTION_SUBTREE; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* 13962306a36Sopenharmony_ci * This implies that each ->pmd_entry() handler 14062306a36Sopenharmony_ci * needs to know about pmd_trans_huge() pmds 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_ci if (ops->pmd_entry) 14362306a36Sopenharmony_ci err = ops->pmd_entry(pmd, addr, next, walk); 14462306a36Sopenharmony_ci if (err) 14562306a36Sopenharmony_ci break; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (walk->action == ACTION_AGAIN) 14862306a36Sopenharmony_ci goto again; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci /* 15162306a36Sopenharmony_ci * Check this here so we only break down trans_huge 15262306a36Sopenharmony_ci * pages when we _need_ to 15362306a36Sopenharmony_ci */ 15462306a36Sopenharmony_ci if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) || 15562306a36Sopenharmony_ci walk->action == ACTION_CONTINUE || 15662306a36Sopenharmony_ci !(ops->pte_entry)) 15762306a36Sopenharmony_ci continue; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci if (walk->vma) 16062306a36Sopenharmony_ci split_huge_pmd(walk->vma, pmd, addr); 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci if (is_hugepd(__hugepd(pmd_val(*pmd)))) 16362306a36Sopenharmony_ci err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT); 16462306a36Sopenharmony_ci else 16562306a36Sopenharmony_ci err = walk_pte_range(pmd, addr, next, walk); 16662306a36Sopenharmony_ci if (err) 16762306a36Sopenharmony_ci break; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci if (walk->action == ACTION_AGAIN) 17062306a36Sopenharmony_ci goto again; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci } while (pmd++, addr = next, addr != end); 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci return err; 17562306a36Sopenharmony_ci} 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_cistatic int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, 17862306a36Sopenharmony_ci struct mm_walk *walk) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci pud_t *pud; 18162306a36Sopenharmony_ci unsigned long next; 18262306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 18362306a36Sopenharmony_ci int err = 0; 18462306a36Sopenharmony_ci int depth = real_depth(2); 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci pud = pud_offset(p4d, addr); 18762306a36Sopenharmony_ci do { 18862306a36Sopenharmony_ci again: 18962306a36Sopenharmony_ci next = pud_addr_end(addr, end); 19062306a36Sopenharmony_ci if (pud_none(*pud)) { 19162306a36Sopenharmony_ci if (ops->pte_hole) 19262306a36Sopenharmony_ci err = ops->pte_hole(addr, next, depth, walk); 19362306a36Sopenharmony_ci if (err) 19462306a36Sopenharmony_ci break; 19562306a36Sopenharmony_ci continue; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci walk->action = ACTION_SUBTREE; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci if (ops->pud_entry) 20162306a36Sopenharmony_ci err = ops->pud_entry(pud, addr, next, walk); 20262306a36Sopenharmony_ci if (err) 20362306a36Sopenharmony_ci break; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci if (walk->action == ACTION_AGAIN) 20662306a36Sopenharmony_ci goto again; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) || 20962306a36Sopenharmony_ci walk->action == ACTION_CONTINUE || 21062306a36Sopenharmony_ci !(ops->pmd_entry || ops->pte_entry)) 21162306a36Sopenharmony_ci continue; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci if (walk->vma) 21462306a36Sopenharmony_ci split_huge_pud(walk->vma, pud, addr); 21562306a36Sopenharmony_ci if (pud_none(*pud)) 21662306a36Sopenharmony_ci goto again; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci if (is_hugepd(__hugepd(pud_val(*pud)))) 21962306a36Sopenharmony_ci err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT); 22062306a36Sopenharmony_ci else 22162306a36Sopenharmony_ci err = walk_pmd_range(pud, addr, next, walk); 22262306a36Sopenharmony_ci if (err) 22362306a36Sopenharmony_ci break; 22462306a36Sopenharmony_ci } while (pud++, addr = next, addr != end); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci return err; 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, 23062306a36Sopenharmony_ci struct mm_walk *walk) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci p4d_t *p4d; 23362306a36Sopenharmony_ci unsigned long next; 23462306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 23562306a36Sopenharmony_ci int err = 0; 23662306a36Sopenharmony_ci int depth = real_depth(1); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 23962306a36Sopenharmony_ci do { 24062306a36Sopenharmony_ci next = p4d_addr_end(addr, end); 24162306a36Sopenharmony_ci if (p4d_none_or_clear_bad(p4d)) { 24262306a36Sopenharmony_ci if (ops->pte_hole) 24362306a36Sopenharmony_ci err = ops->pte_hole(addr, next, depth, walk); 24462306a36Sopenharmony_ci if (err) 24562306a36Sopenharmony_ci break; 24662306a36Sopenharmony_ci continue; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci if (ops->p4d_entry) { 24962306a36Sopenharmony_ci err = ops->p4d_entry(p4d, addr, next, walk); 25062306a36Sopenharmony_ci if (err) 25162306a36Sopenharmony_ci break; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci if (is_hugepd(__hugepd(p4d_val(*p4d)))) 25462306a36Sopenharmony_ci err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT); 25562306a36Sopenharmony_ci else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry) 25662306a36Sopenharmony_ci err = walk_pud_range(p4d, addr, next, walk); 25762306a36Sopenharmony_ci if (err) 25862306a36Sopenharmony_ci break; 25962306a36Sopenharmony_ci } while (p4d++, addr = next, addr != end); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci return err; 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic int walk_pgd_range(unsigned long addr, unsigned long end, 26562306a36Sopenharmony_ci struct mm_walk *walk) 26662306a36Sopenharmony_ci{ 26762306a36Sopenharmony_ci pgd_t *pgd; 26862306a36Sopenharmony_ci unsigned long next; 26962306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 27062306a36Sopenharmony_ci int err = 0; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci if (walk->pgd) 27362306a36Sopenharmony_ci pgd = walk->pgd + pgd_index(addr); 27462306a36Sopenharmony_ci else 27562306a36Sopenharmony_ci pgd = pgd_offset(walk->mm, addr); 27662306a36Sopenharmony_ci do { 27762306a36Sopenharmony_ci next = pgd_addr_end(addr, end); 27862306a36Sopenharmony_ci if (pgd_none_or_clear_bad(pgd)) { 27962306a36Sopenharmony_ci if (ops->pte_hole) 28062306a36Sopenharmony_ci err = ops->pte_hole(addr, next, 0, walk); 28162306a36Sopenharmony_ci if (err) 28262306a36Sopenharmony_ci break; 28362306a36Sopenharmony_ci continue; 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci if (ops->pgd_entry) { 28662306a36Sopenharmony_ci err = ops->pgd_entry(pgd, addr, next, walk); 28762306a36Sopenharmony_ci if (err) 28862306a36Sopenharmony_ci break; 28962306a36Sopenharmony_ci } 29062306a36Sopenharmony_ci if (is_hugepd(__hugepd(pgd_val(*pgd)))) 29162306a36Sopenharmony_ci err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT); 29262306a36Sopenharmony_ci else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry) 29362306a36Sopenharmony_ci err = walk_p4d_range(pgd, addr, next, walk); 29462306a36Sopenharmony_ci if (err) 29562306a36Sopenharmony_ci break; 29662306a36Sopenharmony_ci } while (pgd++, addr = next, addr != end); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci return err; 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE 30262306a36Sopenharmony_cistatic unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, 30362306a36Sopenharmony_ci unsigned long end) 30462306a36Sopenharmony_ci{ 30562306a36Sopenharmony_ci unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); 30662306a36Sopenharmony_ci return boundary < end ? boundary : end; 30762306a36Sopenharmony_ci} 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_cistatic int walk_hugetlb_range(unsigned long addr, unsigned long end, 31062306a36Sopenharmony_ci struct mm_walk *walk) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 31362306a36Sopenharmony_ci struct hstate *h = hstate_vma(vma); 31462306a36Sopenharmony_ci unsigned long next; 31562306a36Sopenharmony_ci unsigned long hmask = huge_page_mask(h); 31662306a36Sopenharmony_ci unsigned long sz = huge_page_size(h); 31762306a36Sopenharmony_ci pte_t *pte; 31862306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 31962306a36Sopenharmony_ci int err = 0; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci hugetlb_vma_lock_read(vma); 32262306a36Sopenharmony_ci do { 32362306a36Sopenharmony_ci next = hugetlb_entry_end(h, addr, end); 32462306a36Sopenharmony_ci pte = hugetlb_walk(vma, addr & hmask, sz); 32562306a36Sopenharmony_ci if (pte) 32662306a36Sopenharmony_ci err = ops->hugetlb_entry(pte, hmask, addr, next, walk); 32762306a36Sopenharmony_ci else if (ops->pte_hole) 32862306a36Sopenharmony_ci err = ops->pte_hole(addr, next, -1, walk); 32962306a36Sopenharmony_ci if (err) 33062306a36Sopenharmony_ci break; 33162306a36Sopenharmony_ci } while (addr = next, addr != end); 33262306a36Sopenharmony_ci hugetlb_vma_unlock_read(vma); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci return err; 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci#else /* CONFIG_HUGETLB_PAGE */ 33862306a36Sopenharmony_cistatic int walk_hugetlb_range(unsigned long addr, unsigned long end, 33962306a36Sopenharmony_ci struct mm_walk *walk) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci return 0; 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci#endif /* CONFIG_HUGETLB_PAGE */ 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci/* 34762306a36Sopenharmony_ci * Decide whether we really walk over the current vma on [@start, @end) 34862306a36Sopenharmony_ci * or skip it via the returned value. Return 0 if we do walk over the 34962306a36Sopenharmony_ci * current vma, and return 1 if we skip the vma. Negative values means 35062306a36Sopenharmony_ci * error, where we abort the current walk. 35162306a36Sopenharmony_ci */ 35262306a36Sopenharmony_cistatic int walk_page_test(unsigned long start, unsigned long end, 35362306a36Sopenharmony_ci struct mm_walk *walk) 35462306a36Sopenharmony_ci{ 35562306a36Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 35662306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci if (ops->test_walk) 35962306a36Sopenharmony_ci return ops->test_walk(start, end, walk); 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci /* 36262306a36Sopenharmony_ci * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP 36362306a36Sopenharmony_ci * range, so we don't walk over it as we do for normal vmas. However, 36462306a36Sopenharmony_ci * Some callers are interested in handling hole range and they don't 36562306a36Sopenharmony_ci * want to just ignore any single address range. Such users certainly 36662306a36Sopenharmony_ci * define their ->pte_hole() callbacks, so let's delegate them to handle 36762306a36Sopenharmony_ci * vma(VM_PFNMAP). 36862306a36Sopenharmony_ci */ 36962306a36Sopenharmony_ci if (vma->vm_flags & VM_PFNMAP) { 37062306a36Sopenharmony_ci int err = 1; 37162306a36Sopenharmony_ci if (ops->pte_hole) 37262306a36Sopenharmony_ci err = ops->pte_hole(start, end, -1, walk); 37362306a36Sopenharmony_ci return err ? err : 1; 37462306a36Sopenharmony_ci } 37562306a36Sopenharmony_ci return 0; 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_cistatic int __walk_page_range(unsigned long start, unsigned long end, 37962306a36Sopenharmony_ci struct mm_walk *walk) 38062306a36Sopenharmony_ci{ 38162306a36Sopenharmony_ci int err = 0; 38262306a36Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 38362306a36Sopenharmony_ci const struct mm_walk_ops *ops = walk->ops; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci if (ops->pre_vma) { 38662306a36Sopenharmony_ci err = ops->pre_vma(start, end, walk); 38762306a36Sopenharmony_ci if (err) 38862306a36Sopenharmony_ci return err; 38962306a36Sopenharmony_ci } 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci if (is_vm_hugetlb_page(vma)) { 39262306a36Sopenharmony_ci if (ops->hugetlb_entry) 39362306a36Sopenharmony_ci err = walk_hugetlb_range(start, end, walk); 39462306a36Sopenharmony_ci } else 39562306a36Sopenharmony_ci err = walk_pgd_range(start, end, walk); 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci if (ops->post_vma) 39862306a36Sopenharmony_ci ops->post_vma(walk); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci return err; 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_cistatic inline void process_mm_walk_lock(struct mm_struct *mm, 40462306a36Sopenharmony_ci enum page_walk_lock walk_lock) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci if (walk_lock == PGWALK_RDLOCK) 40762306a36Sopenharmony_ci mmap_assert_locked(mm); 40862306a36Sopenharmony_ci else 40962306a36Sopenharmony_ci mmap_assert_write_locked(mm); 41062306a36Sopenharmony_ci} 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_cistatic inline void process_vma_walk_lock(struct vm_area_struct *vma, 41362306a36Sopenharmony_ci enum page_walk_lock walk_lock) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci#ifdef CONFIG_PER_VMA_LOCK 41662306a36Sopenharmony_ci switch (walk_lock) { 41762306a36Sopenharmony_ci case PGWALK_WRLOCK: 41862306a36Sopenharmony_ci vma_start_write(vma); 41962306a36Sopenharmony_ci break; 42062306a36Sopenharmony_ci case PGWALK_WRLOCK_VERIFY: 42162306a36Sopenharmony_ci vma_assert_write_locked(vma); 42262306a36Sopenharmony_ci break; 42362306a36Sopenharmony_ci case PGWALK_RDLOCK: 42462306a36Sopenharmony_ci /* PGWALK_RDLOCK is handled by process_mm_walk_lock */ 42562306a36Sopenharmony_ci break; 42662306a36Sopenharmony_ci } 42762306a36Sopenharmony_ci#endif 42862306a36Sopenharmony_ci} 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci/** 43162306a36Sopenharmony_ci * walk_page_range - walk page table with caller specific callbacks 43262306a36Sopenharmony_ci * @mm: mm_struct representing the target process of page table walk 43362306a36Sopenharmony_ci * @start: start address of the virtual address range 43462306a36Sopenharmony_ci * @end: end address of the virtual address range 43562306a36Sopenharmony_ci * @ops: operation to call during the walk 43662306a36Sopenharmony_ci * @private: private data for callbacks' usage 43762306a36Sopenharmony_ci * 43862306a36Sopenharmony_ci * Recursively walk the page table tree of the process represented by @mm 43962306a36Sopenharmony_ci * within the virtual address range [@start, @end). During walking, we can do 44062306a36Sopenharmony_ci * some caller-specific works for each entry, by setting up pmd_entry(), 44162306a36Sopenharmony_ci * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these 44262306a36Sopenharmony_ci * callbacks, the associated entries/pages are just ignored. 44362306a36Sopenharmony_ci * The return values of these callbacks are commonly defined like below: 44462306a36Sopenharmony_ci * 44562306a36Sopenharmony_ci * - 0 : succeeded to handle the current entry, and if you don't reach the 44662306a36Sopenharmony_ci * end address yet, continue to walk. 44762306a36Sopenharmony_ci * - >0 : succeeded to handle the current entry, and return to the caller 44862306a36Sopenharmony_ci * with caller specific value. 44962306a36Sopenharmony_ci * - <0 : failed to handle the current entry, and return to the caller 45062306a36Sopenharmony_ci * with error code. 45162306a36Sopenharmony_ci * 45262306a36Sopenharmony_ci * Before starting to walk page table, some callers want to check whether 45362306a36Sopenharmony_ci * they really want to walk over the current vma, typically by checking 45462306a36Sopenharmony_ci * its vm_flags. walk_page_test() and @ops->test_walk() are used for this 45562306a36Sopenharmony_ci * purpose. 45662306a36Sopenharmony_ci * 45762306a36Sopenharmony_ci * If operations need to be staged before and committed after a vma is walked, 45862306a36Sopenharmony_ci * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(), 45962306a36Sopenharmony_ci * since it is intended to handle commit-type operations, can't return any 46062306a36Sopenharmony_ci * errors. 46162306a36Sopenharmony_ci * 46262306a36Sopenharmony_ci * struct mm_walk keeps current values of some common data like vma and pmd, 46362306a36Sopenharmony_ci * which are useful for the access from callbacks. If you want to pass some 46462306a36Sopenharmony_ci * caller-specific data to callbacks, @private should be helpful. 46562306a36Sopenharmony_ci * 46662306a36Sopenharmony_ci * Locking: 46762306a36Sopenharmony_ci * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock, 46862306a36Sopenharmony_ci * because these function traverse vma list and/or access to vma's data. 46962306a36Sopenharmony_ci */ 47062306a36Sopenharmony_ciint walk_page_range(struct mm_struct *mm, unsigned long start, 47162306a36Sopenharmony_ci unsigned long end, const struct mm_walk_ops *ops, 47262306a36Sopenharmony_ci void *private) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci int err = 0; 47562306a36Sopenharmony_ci unsigned long next; 47662306a36Sopenharmony_ci struct vm_area_struct *vma; 47762306a36Sopenharmony_ci struct mm_walk walk = { 47862306a36Sopenharmony_ci .ops = ops, 47962306a36Sopenharmony_ci .mm = mm, 48062306a36Sopenharmony_ci .private = private, 48162306a36Sopenharmony_ci }; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci if (start >= end) 48462306a36Sopenharmony_ci return -EINVAL; 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci if (!walk.mm) 48762306a36Sopenharmony_ci return -EINVAL; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci process_mm_walk_lock(walk.mm, ops->walk_lock); 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci vma = find_vma(walk.mm, start); 49262306a36Sopenharmony_ci do { 49362306a36Sopenharmony_ci if (!vma) { /* after the last vma */ 49462306a36Sopenharmony_ci walk.vma = NULL; 49562306a36Sopenharmony_ci next = end; 49662306a36Sopenharmony_ci if (ops->pte_hole) 49762306a36Sopenharmony_ci err = ops->pte_hole(start, next, -1, &walk); 49862306a36Sopenharmony_ci } else if (start < vma->vm_start) { /* outside vma */ 49962306a36Sopenharmony_ci walk.vma = NULL; 50062306a36Sopenharmony_ci next = min(end, vma->vm_start); 50162306a36Sopenharmony_ci if (ops->pte_hole) 50262306a36Sopenharmony_ci err = ops->pte_hole(start, next, -1, &walk); 50362306a36Sopenharmony_ci } else { /* inside vma */ 50462306a36Sopenharmony_ci process_vma_walk_lock(vma, ops->walk_lock); 50562306a36Sopenharmony_ci walk.vma = vma; 50662306a36Sopenharmony_ci next = min(end, vma->vm_end); 50762306a36Sopenharmony_ci vma = find_vma(mm, vma->vm_end); 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci err = walk_page_test(start, next, &walk); 51062306a36Sopenharmony_ci if (err > 0) { 51162306a36Sopenharmony_ci /* 51262306a36Sopenharmony_ci * positive return values are purely for 51362306a36Sopenharmony_ci * controlling the pagewalk, so should never 51462306a36Sopenharmony_ci * be passed to the callers. 51562306a36Sopenharmony_ci */ 51662306a36Sopenharmony_ci err = 0; 51762306a36Sopenharmony_ci continue; 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci if (err < 0) 52062306a36Sopenharmony_ci break; 52162306a36Sopenharmony_ci err = __walk_page_range(start, next, &walk); 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci if (err) 52462306a36Sopenharmony_ci break; 52562306a36Sopenharmony_ci } while (start = next, start < end); 52662306a36Sopenharmony_ci return err; 52762306a36Sopenharmony_ci} 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci/** 53062306a36Sopenharmony_ci * walk_page_range_novma - walk a range of pagetables not backed by a vma 53162306a36Sopenharmony_ci * @mm: mm_struct representing the target process of page table walk 53262306a36Sopenharmony_ci * @start: start address of the virtual address range 53362306a36Sopenharmony_ci * @end: end address of the virtual address range 53462306a36Sopenharmony_ci * @ops: operation to call during the walk 53562306a36Sopenharmony_ci * @pgd: pgd to walk if different from mm->pgd 53662306a36Sopenharmony_ci * @private: private data for callbacks' usage 53762306a36Sopenharmony_ci * 53862306a36Sopenharmony_ci * Similar to walk_page_range() but can walk any page tables even if they are 53962306a36Sopenharmony_ci * not backed by VMAs. Because 'unusual' entries may be walked this function 54062306a36Sopenharmony_ci * will also not lock the PTEs for the pte_entry() callback. This is useful for 54162306a36Sopenharmony_ci * walking the kernel pages tables or page tables for firmware. 54262306a36Sopenharmony_ci */ 54362306a36Sopenharmony_ciint walk_page_range_novma(struct mm_struct *mm, unsigned long start, 54462306a36Sopenharmony_ci unsigned long end, const struct mm_walk_ops *ops, 54562306a36Sopenharmony_ci pgd_t *pgd, 54662306a36Sopenharmony_ci void *private) 54762306a36Sopenharmony_ci{ 54862306a36Sopenharmony_ci struct mm_walk walk = { 54962306a36Sopenharmony_ci .ops = ops, 55062306a36Sopenharmony_ci .mm = mm, 55162306a36Sopenharmony_ci .pgd = pgd, 55262306a36Sopenharmony_ci .private = private, 55362306a36Sopenharmony_ci .no_vma = true 55462306a36Sopenharmony_ci }; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci if (start >= end || !walk.mm) 55762306a36Sopenharmony_ci return -EINVAL; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci mmap_assert_write_locked(walk.mm); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci return walk_pgd_range(start, end, &walk); 56262306a36Sopenharmony_ci} 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ciint walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, 56562306a36Sopenharmony_ci unsigned long end, const struct mm_walk_ops *ops, 56662306a36Sopenharmony_ci void *private) 56762306a36Sopenharmony_ci{ 56862306a36Sopenharmony_ci struct mm_walk walk = { 56962306a36Sopenharmony_ci .ops = ops, 57062306a36Sopenharmony_ci .mm = vma->vm_mm, 57162306a36Sopenharmony_ci .vma = vma, 57262306a36Sopenharmony_ci .private = private, 57362306a36Sopenharmony_ci }; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (start >= end || !walk.mm) 57662306a36Sopenharmony_ci return -EINVAL; 57762306a36Sopenharmony_ci if (start < vma->vm_start || end > vma->vm_end) 57862306a36Sopenharmony_ci return -EINVAL; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci process_mm_walk_lock(walk.mm, ops->walk_lock); 58162306a36Sopenharmony_ci process_vma_walk_lock(vma, ops->walk_lock); 58262306a36Sopenharmony_ci return __walk_page_range(start, end, &walk); 58362306a36Sopenharmony_ci} 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ciint walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, 58662306a36Sopenharmony_ci void *private) 58762306a36Sopenharmony_ci{ 58862306a36Sopenharmony_ci struct mm_walk walk = { 58962306a36Sopenharmony_ci .ops = ops, 59062306a36Sopenharmony_ci .mm = vma->vm_mm, 59162306a36Sopenharmony_ci .vma = vma, 59262306a36Sopenharmony_ci .private = private, 59362306a36Sopenharmony_ci }; 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci if (!walk.mm) 59662306a36Sopenharmony_ci return -EINVAL; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci process_mm_walk_lock(walk.mm, ops->walk_lock); 59962306a36Sopenharmony_ci process_vma_walk_lock(vma, ops->walk_lock); 60062306a36Sopenharmony_ci return __walk_page_range(vma->vm_start, vma->vm_end, &walk); 60162306a36Sopenharmony_ci} 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci/** 60462306a36Sopenharmony_ci * walk_page_mapping - walk all memory areas mapped into a struct address_space. 60562306a36Sopenharmony_ci * @mapping: Pointer to the struct address_space 60662306a36Sopenharmony_ci * @first_index: First page offset in the address_space 60762306a36Sopenharmony_ci * @nr: Number of incremental page offsets to cover 60862306a36Sopenharmony_ci * @ops: operation to call during the walk 60962306a36Sopenharmony_ci * @private: private data for callbacks' usage 61062306a36Sopenharmony_ci * 61162306a36Sopenharmony_ci * This function walks all memory areas mapped into a struct address_space. 61262306a36Sopenharmony_ci * The walk is limited to only the given page-size index range, but if 61362306a36Sopenharmony_ci * the index boundaries cross a huge page-table entry, that entry will be 61462306a36Sopenharmony_ci * included. 61562306a36Sopenharmony_ci * 61662306a36Sopenharmony_ci * Also see walk_page_range() for additional information. 61762306a36Sopenharmony_ci * 61862306a36Sopenharmony_ci * Locking: 61962306a36Sopenharmony_ci * This function can't require that the struct mm_struct::mmap_lock is held, 62062306a36Sopenharmony_ci * since @mapping may be mapped by multiple processes. Instead 62162306a36Sopenharmony_ci * @mapping->i_mmap_rwsem must be held. This might have implications in the 62262306a36Sopenharmony_ci * callbacks, and it's up tho the caller to ensure that the 62362306a36Sopenharmony_ci * struct mm_struct::mmap_lock is not needed. 62462306a36Sopenharmony_ci * 62562306a36Sopenharmony_ci * Also this means that a caller can't rely on the struct 62662306a36Sopenharmony_ci * vm_area_struct::vm_flags to be constant across a call, 62762306a36Sopenharmony_ci * except for immutable flags. Callers requiring this shouldn't use 62862306a36Sopenharmony_ci * this function. 62962306a36Sopenharmony_ci * 63062306a36Sopenharmony_ci * Return: 0 on success, negative error code on failure, positive number on 63162306a36Sopenharmony_ci * caller defined premature termination. 63262306a36Sopenharmony_ci */ 63362306a36Sopenharmony_ciint walk_page_mapping(struct address_space *mapping, pgoff_t first_index, 63462306a36Sopenharmony_ci pgoff_t nr, const struct mm_walk_ops *ops, 63562306a36Sopenharmony_ci void *private) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci struct mm_walk walk = { 63862306a36Sopenharmony_ci .ops = ops, 63962306a36Sopenharmony_ci .private = private, 64062306a36Sopenharmony_ci }; 64162306a36Sopenharmony_ci struct vm_area_struct *vma; 64262306a36Sopenharmony_ci pgoff_t vba, vea, cba, cea; 64362306a36Sopenharmony_ci unsigned long start_addr, end_addr; 64462306a36Sopenharmony_ci int err = 0; 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci lockdep_assert_held(&mapping->i_mmap_rwsem); 64762306a36Sopenharmony_ci vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, 64862306a36Sopenharmony_ci first_index + nr - 1) { 64962306a36Sopenharmony_ci /* Clip to the vma */ 65062306a36Sopenharmony_ci vba = vma->vm_pgoff; 65162306a36Sopenharmony_ci vea = vba + vma_pages(vma); 65262306a36Sopenharmony_ci cba = first_index; 65362306a36Sopenharmony_ci cba = max(cba, vba); 65462306a36Sopenharmony_ci cea = first_index + nr; 65562306a36Sopenharmony_ci cea = min(cea, vea); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; 65862306a36Sopenharmony_ci end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; 65962306a36Sopenharmony_ci if (start_addr >= end_addr) 66062306a36Sopenharmony_ci continue; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci walk.vma = vma; 66362306a36Sopenharmony_ci walk.mm = vma->vm_mm; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci err = walk_page_test(vma->vm_start, vma->vm_end, &walk); 66662306a36Sopenharmony_ci if (err > 0) { 66762306a36Sopenharmony_ci err = 0; 66862306a36Sopenharmony_ci break; 66962306a36Sopenharmony_ci } else if (err < 0) 67062306a36Sopenharmony_ci break; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci err = __walk_page_range(start_addr, end_addr, &walk); 67362306a36Sopenharmony_ci if (err) 67462306a36Sopenharmony_ci break; 67562306a36Sopenharmony_ci } 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci return err; 67862306a36Sopenharmony_ci} 679