162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/mm.h> 362306a36Sopenharmony_ci#include <linux/rmap.h> 462306a36Sopenharmony_ci#include <linux/hugetlb.h> 562306a36Sopenharmony_ci#include <linux/swap.h> 662306a36Sopenharmony_ci#include <linux/swapops.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "internal.h" 962306a36Sopenharmony_ci 1062306a36Sopenharmony_cistatic inline bool not_found(struct page_vma_mapped_walk *pvmw) 1162306a36Sopenharmony_ci{ 1262306a36Sopenharmony_ci page_vma_mapped_walk_done(pvmw); 1362306a36Sopenharmony_ci return false; 1462306a36Sopenharmony_ci} 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_cistatic bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) 1762306a36Sopenharmony_ci{ 1862306a36Sopenharmony_ci pte_t ptent; 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci if (pvmw->flags & PVMW_SYNC) { 2162306a36Sopenharmony_ci /* Use the stricter lookup */ 2262306a36Sopenharmony_ci pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd, 2362306a36Sopenharmony_ci pvmw->address, &pvmw->ptl); 2462306a36Sopenharmony_ci *ptlp = pvmw->ptl; 2562306a36Sopenharmony_ci return !!pvmw->pte; 2662306a36Sopenharmony_ci } 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci /* 2962306a36Sopenharmony_ci * It is important to return the ptl corresponding to pte, 3062306a36Sopenharmony_ci * in case *pvmw->pmd changes underneath us; so we need to 3162306a36Sopenharmony_ci * return it even when choosing not to lock, in case caller 3262306a36Sopenharmony_ci * proceeds to loop over next ptes, and finds a match later. 3362306a36Sopenharmony_ci * Though, in most cases, page lock already protects this. 3462306a36Sopenharmony_ci */ 3562306a36Sopenharmony_ci pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd, 3662306a36Sopenharmony_ci pvmw->address, ptlp); 3762306a36Sopenharmony_ci if (!pvmw->pte) 3862306a36Sopenharmony_ci return false; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci ptent = ptep_get(pvmw->pte); 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci if (pvmw->flags & PVMW_MIGRATION) { 4362306a36Sopenharmony_ci if (!is_swap_pte(ptent)) 4462306a36Sopenharmony_ci return false; 4562306a36Sopenharmony_ci } else if (is_swap_pte(ptent)) { 4662306a36Sopenharmony_ci swp_entry_t entry; 4762306a36Sopenharmony_ci /* 4862306a36Sopenharmony_ci * Handle un-addressable ZONE_DEVICE memory. 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * We get here when we are trying to unmap a private 5162306a36Sopenharmony_ci * device page from the process address space. Such 5262306a36Sopenharmony_ci * page is not CPU accessible and thus is mapped as 5362306a36Sopenharmony_ci * a special swap entry, nonetheless it still does 5462306a36Sopenharmony_ci * count as a valid regular mapping for the page 5562306a36Sopenharmony_ci * (and is accounted as such in page maps count). 5662306a36Sopenharmony_ci * 5762306a36Sopenharmony_ci * So handle this special case as if it was a normal 5862306a36Sopenharmony_ci * page mapping ie lock CPU page table and return true. 5962306a36Sopenharmony_ci * 6062306a36Sopenharmony_ci * For more details on device private memory see HMM 6162306a36Sopenharmony_ci * (include/linux/hmm.h or mm/hmm.c). 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_ci entry = pte_to_swp_entry(ptent); 6462306a36Sopenharmony_ci if (!is_device_private_entry(entry) && 6562306a36Sopenharmony_ci !is_device_exclusive_entry(entry)) 6662306a36Sopenharmony_ci return false; 6762306a36Sopenharmony_ci } else if (!pte_present(ptent)) { 6862306a36Sopenharmony_ci return false; 6962306a36Sopenharmony_ci } 7062306a36Sopenharmony_ci pvmw->ptl = *ptlp; 7162306a36Sopenharmony_ci spin_lock(pvmw->ptl); 7262306a36Sopenharmony_ci return true; 7362306a36Sopenharmony_ci} 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci/** 7662306a36Sopenharmony_ci * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is 7762306a36Sopenharmony_ci * mapped at the @pvmw->pte 7862306a36Sopenharmony_ci * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range 7962306a36Sopenharmony_ci * for checking 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * page_vma_mapped_walk() found a place where pfn range is *potentially* 8262306a36Sopenharmony_ci * mapped. check_pte() has to validate this. 8362306a36Sopenharmony_ci * 8462306a36Sopenharmony_ci * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to 8562306a36Sopenharmony_ci * arbitrary page. 8662306a36Sopenharmony_ci * 8762306a36Sopenharmony_ci * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration 8862306a36Sopenharmony_ci * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) 8962306a36Sopenharmony_ci * 9062306a36Sopenharmony_ci * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to 9162306a36Sopenharmony_ci * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) 9262306a36Sopenharmony_ci * 9362306a36Sopenharmony_ci * Otherwise, return false. 9462306a36Sopenharmony_ci * 9562306a36Sopenharmony_ci */ 9662306a36Sopenharmony_cistatic bool check_pte(struct page_vma_mapped_walk *pvmw) 9762306a36Sopenharmony_ci{ 9862306a36Sopenharmony_ci unsigned long pfn; 9962306a36Sopenharmony_ci pte_t ptent = ptep_get(pvmw->pte); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci if (pvmw->flags & PVMW_MIGRATION) { 10262306a36Sopenharmony_ci swp_entry_t entry; 10362306a36Sopenharmony_ci if (!is_swap_pte(ptent)) 10462306a36Sopenharmony_ci return false; 10562306a36Sopenharmony_ci entry = pte_to_swp_entry(ptent); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci if (!is_migration_entry(entry) && 10862306a36Sopenharmony_ci !is_device_exclusive_entry(entry)) 10962306a36Sopenharmony_ci return false; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci pfn = swp_offset_pfn(entry); 11262306a36Sopenharmony_ci } else if (is_swap_pte(ptent)) { 11362306a36Sopenharmony_ci swp_entry_t entry; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* Handle un-addressable ZONE_DEVICE memory */ 11662306a36Sopenharmony_ci entry = pte_to_swp_entry(ptent); 11762306a36Sopenharmony_ci if (!is_device_private_entry(entry) && 11862306a36Sopenharmony_ci !is_device_exclusive_entry(entry)) 11962306a36Sopenharmony_ci return false; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci pfn = swp_offset_pfn(entry); 12262306a36Sopenharmony_ci } else { 12362306a36Sopenharmony_ci if (!pte_present(ptent)) 12462306a36Sopenharmony_ci return false; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci pfn = pte_pfn(ptent); 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci return (pfn - pvmw->pfn) < pvmw->nr_pages; 13062306a36Sopenharmony_ci} 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci/* Returns true if the two ranges overlap. Careful to not overflow. */ 13362306a36Sopenharmony_cistatic bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn) 13662306a36Sopenharmony_ci return false; 13762306a36Sopenharmony_ci if (pfn > pvmw->pfn + pvmw->nr_pages - 1) 13862306a36Sopenharmony_ci return false; 13962306a36Sopenharmony_ci return true; 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cistatic void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci pvmw->address = (pvmw->address + size) & ~(size - 1); 14562306a36Sopenharmony_ci if (!pvmw->address) 14662306a36Sopenharmony_ci pvmw->address = ULONG_MAX; 14762306a36Sopenharmony_ci} 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci/** 15062306a36Sopenharmony_ci * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at 15162306a36Sopenharmony_ci * @pvmw->address 15262306a36Sopenharmony_ci * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags 15362306a36Sopenharmony_ci * must be set. pmd, pte and ptl must be NULL. 15462306a36Sopenharmony_ci * 15562306a36Sopenharmony_ci * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point 15662306a36Sopenharmony_ci * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is 15762306a36Sopenharmony_ci * adjusted if needed (for PTE-mapped THPs). 15862306a36Sopenharmony_ci * 15962306a36Sopenharmony_ci * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page 16062306a36Sopenharmony_ci * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in 16162306a36Sopenharmony_ci * a loop to find all PTEs that map the THP. 16262306a36Sopenharmony_ci * 16362306a36Sopenharmony_ci * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry 16462306a36Sopenharmony_ci * regardless of which page table level the page is mapped at. @pvmw->pmd is 16562306a36Sopenharmony_ci * NULL. 16662306a36Sopenharmony_ci * 16762306a36Sopenharmony_ci * Returns false if there are no more page table entries for the page in 16862306a36Sopenharmony_ci * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. 16962306a36Sopenharmony_ci * 17062306a36Sopenharmony_ci * If you need to stop the walk before page_vma_mapped_walk() returned false, 17162306a36Sopenharmony_ci * use page_vma_mapped_walk_done(). It will do the housekeeping. 17262306a36Sopenharmony_ci */ 17362306a36Sopenharmony_cibool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) 17462306a36Sopenharmony_ci{ 17562306a36Sopenharmony_ci struct vm_area_struct *vma = pvmw->vma; 17662306a36Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 17762306a36Sopenharmony_ci unsigned long end; 17862306a36Sopenharmony_ci spinlock_t *ptl; 17962306a36Sopenharmony_ci pgd_t *pgd; 18062306a36Sopenharmony_ci p4d_t *p4d; 18162306a36Sopenharmony_ci pud_t *pud; 18262306a36Sopenharmony_ci pmd_t pmde; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci /* The only possible pmd mapping has been handled on last iteration */ 18562306a36Sopenharmony_ci if (pvmw->pmd && !pvmw->pte) 18662306a36Sopenharmony_ci return not_found(pvmw); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci if (unlikely(is_vm_hugetlb_page(vma))) { 18962306a36Sopenharmony_ci struct hstate *hstate = hstate_vma(vma); 19062306a36Sopenharmony_ci unsigned long size = huge_page_size(hstate); 19162306a36Sopenharmony_ci /* The only possible mapping was handled on last iteration */ 19262306a36Sopenharmony_ci if (pvmw->pte) 19362306a36Sopenharmony_ci return not_found(pvmw); 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * All callers that get here will already hold the 19662306a36Sopenharmony_ci * i_mmap_rwsem. Therefore, no additional locks need to be 19762306a36Sopenharmony_ci * taken before calling hugetlb_walk(). 19862306a36Sopenharmony_ci */ 19962306a36Sopenharmony_ci pvmw->pte = hugetlb_walk(vma, pvmw->address, size); 20062306a36Sopenharmony_ci if (!pvmw->pte) 20162306a36Sopenharmony_ci return false; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte); 20462306a36Sopenharmony_ci if (!check_pte(pvmw)) 20562306a36Sopenharmony_ci return not_found(pvmw); 20662306a36Sopenharmony_ci return true; 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci end = vma_address_end(pvmw); 21062306a36Sopenharmony_ci if (pvmw->pte) 21162306a36Sopenharmony_ci goto next_pte; 21262306a36Sopenharmony_cirestart: 21362306a36Sopenharmony_ci do { 21462306a36Sopenharmony_ci pgd = pgd_offset(mm, pvmw->address); 21562306a36Sopenharmony_ci if (!pgd_present(*pgd)) { 21662306a36Sopenharmony_ci step_forward(pvmw, PGDIR_SIZE); 21762306a36Sopenharmony_ci continue; 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci p4d = p4d_offset(pgd, pvmw->address); 22062306a36Sopenharmony_ci if (!p4d_present(*p4d)) { 22162306a36Sopenharmony_ci step_forward(pvmw, P4D_SIZE); 22262306a36Sopenharmony_ci continue; 22362306a36Sopenharmony_ci } 22462306a36Sopenharmony_ci pud = pud_offset(p4d, pvmw->address); 22562306a36Sopenharmony_ci if (!pud_present(*pud)) { 22662306a36Sopenharmony_ci step_forward(pvmw, PUD_SIZE); 22762306a36Sopenharmony_ci continue; 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci pvmw->pmd = pmd_offset(pud, pvmw->address); 23162306a36Sopenharmony_ci /* 23262306a36Sopenharmony_ci * Make sure the pmd value isn't cached in a register by the 23362306a36Sopenharmony_ci * compiler and used as a stale value after we've observed a 23462306a36Sopenharmony_ci * subsequent update. 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ci pmde = pmdp_get_lockless(pvmw->pmd); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) || 23962306a36Sopenharmony_ci (pmd_present(pmde) && pmd_devmap(pmde))) { 24062306a36Sopenharmony_ci pvmw->ptl = pmd_lock(mm, pvmw->pmd); 24162306a36Sopenharmony_ci pmde = *pvmw->pmd; 24262306a36Sopenharmony_ci if (!pmd_present(pmde)) { 24362306a36Sopenharmony_ci swp_entry_t entry; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci if (!thp_migration_supported() || 24662306a36Sopenharmony_ci !(pvmw->flags & PVMW_MIGRATION)) 24762306a36Sopenharmony_ci return not_found(pvmw); 24862306a36Sopenharmony_ci entry = pmd_to_swp_entry(pmde); 24962306a36Sopenharmony_ci if (!is_migration_entry(entry) || 25062306a36Sopenharmony_ci !check_pmd(swp_offset_pfn(entry), pvmw)) 25162306a36Sopenharmony_ci return not_found(pvmw); 25262306a36Sopenharmony_ci return true; 25362306a36Sopenharmony_ci } 25462306a36Sopenharmony_ci if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) { 25562306a36Sopenharmony_ci if (pvmw->flags & PVMW_MIGRATION) 25662306a36Sopenharmony_ci return not_found(pvmw); 25762306a36Sopenharmony_ci if (!check_pmd(pmd_pfn(pmde), pvmw)) 25862306a36Sopenharmony_ci return not_found(pvmw); 25962306a36Sopenharmony_ci return true; 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ci /* THP pmd was split under us: handle on pte level */ 26262306a36Sopenharmony_ci spin_unlock(pvmw->ptl); 26362306a36Sopenharmony_ci pvmw->ptl = NULL; 26462306a36Sopenharmony_ci } else if (!pmd_present(pmde)) { 26562306a36Sopenharmony_ci /* 26662306a36Sopenharmony_ci * If PVMW_SYNC, take and drop THP pmd lock so that we 26762306a36Sopenharmony_ci * cannot return prematurely, while zap_huge_pmd() has 26862306a36Sopenharmony_ci * cleared *pmd but not decremented compound_mapcount(). 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_ci if ((pvmw->flags & PVMW_SYNC) && 27162306a36Sopenharmony_ci transhuge_vma_suitable(vma, pvmw->address) && 27262306a36Sopenharmony_ci (pvmw->nr_pages >= HPAGE_PMD_NR)) { 27362306a36Sopenharmony_ci spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci spin_unlock(ptl); 27662306a36Sopenharmony_ci } 27762306a36Sopenharmony_ci step_forward(pvmw, PMD_SIZE); 27862306a36Sopenharmony_ci continue; 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci if (!map_pte(pvmw, &ptl)) { 28162306a36Sopenharmony_ci if (!pvmw->pte) 28262306a36Sopenharmony_ci goto restart; 28362306a36Sopenharmony_ci goto next_pte; 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_cithis_pte: 28662306a36Sopenharmony_ci if (check_pte(pvmw)) 28762306a36Sopenharmony_ci return true; 28862306a36Sopenharmony_cinext_pte: 28962306a36Sopenharmony_ci do { 29062306a36Sopenharmony_ci pvmw->address += PAGE_SIZE; 29162306a36Sopenharmony_ci if (pvmw->address >= end) 29262306a36Sopenharmony_ci return not_found(pvmw); 29362306a36Sopenharmony_ci /* Did we cross page table boundary? */ 29462306a36Sopenharmony_ci if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { 29562306a36Sopenharmony_ci if (pvmw->ptl) { 29662306a36Sopenharmony_ci spin_unlock(pvmw->ptl); 29762306a36Sopenharmony_ci pvmw->ptl = NULL; 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci pte_unmap(pvmw->pte); 30062306a36Sopenharmony_ci pvmw->pte = NULL; 30162306a36Sopenharmony_ci goto restart; 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci pvmw->pte++; 30462306a36Sopenharmony_ci } while (pte_none(ptep_get(pvmw->pte))); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci if (!pvmw->ptl) { 30762306a36Sopenharmony_ci pvmw->ptl = ptl; 30862306a36Sopenharmony_ci spin_lock(pvmw->ptl); 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci goto this_pte; 31162306a36Sopenharmony_ci } while (pvmw->address < end); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci return false; 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci/** 31762306a36Sopenharmony_ci * page_mapped_in_vma - check whether a page is really mapped in a VMA 31862306a36Sopenharmony_ci * @page: the page to test 31962306a36Sopenharmony_ci * @vma: the VMA to test 32062306a36Sopenharmony_ci * 32162306a36Sopenharmony_ci * Returns 1 if the page is mapped into the page tables of the VMA, 0 32262306a36Sopenharmony_ci * if the page is not mapped into the page tables of this VMA. Only 32362306a36Sopenharmony_ci * valid for normal file or anonymous VMAs. 32462306a36Sopenharmony_ci */ 32562306a36Sopenharmony_ciint page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci struct page_vma_mapped_walk pvmw = { 32862306a36Sopenharmony_ci .pfn = page_to_pfn(page), 32962306a36Sopenharmony_ci .nr_pages = 1, 33062306a36Sopenharmony_ci .vma = vma, 33162306a36Sopenharmony_ci .flags = PVMW_SYNC, 33262306a36Sopenharmony_ci }; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci pvmw.address = vma_address(page, vma); 33562306a36Sopenharmony_ci if (pvmw.address == -EFAULT) 33662306a36Sopenharmony_ci return 0; 33762306a36Sopenharmony_ci if (!page_vma_mapped_walk(&pvmw)) 33862306a36Sopenharmony_ci return 0; 33962306a36Sopenharmony_ci page_vma_mapped_walk_done(&pvmw); 34062306a36Sopenharmony_ci return 1; 34162306a36Sopenharmony_ci} 342