18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci#include <linux/mm.h> 38c2ecf20Sopenharmony_ci#include <linux/rmap.h> 48c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 58c2ecf20Sopenharmony_ci#include <linux/swap.h> 68c2ecf20Sopenharmony_ci#include <linux/swapops.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include "internal.h" 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_cistatic inline bool not_found(struct page_vma_mapped_walk *pvmw) 118c2ecf20Sopenharmony_ci{ 128c2ecf20Sopenharmony_ci page_vma_mapped_walk_done(pvmw); 138c2ecf20Sopenharmony_ci return false; 148c2ecf20Sopenharmony_ci} 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_cistatic bool map_pte(struct page_vma_mapped_walk *pvmw) 178c2ecf20Sopenharmony_ci{ 188c2ecf20Sopenharmony_ci pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address); 198c2ecf20Sopenharmony_ci if (!(pvmw->flags & PVMW_SYNC)) { 208c2ecf20Sopenharmony_ci if (pvmw->flags & PVMW_MIGRATION) { 218c2ecf20Sopenharmony_ci if (!is_swap_pte(*pvmw->pte)) 228c2ecf20Sopenharmony_ci return false; 238c2ecf20Sopenharmony_ci } else { 248c2ecf20Sopenharmony_ci /* 258c2ecf20Sopenharmony_ci * We get here when we are trying to unmap a private 268c2ecf20Sopenharmony_ci * device page from the process address space. Such 278c2ecf20Sopenharmony_ci * page is not CPU accessible and thus is mapped as 288c2ecf20Sopenharmony_ci * a special swap entry, nonetheless it still does 298c2ecf20Sopenharmony_ci * count as a valid regular mapping for the page (and 308c2ecf20Sopenharmony_ci * is accounted as such in page maps count). 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci * So handle this special case as if it was a normal 338c2ecf20Sopenharmony_ci * page mapping ie lock CPU page table and returns 348c2ecf20Sopenharmony_ci * true. 358c2ecf20Sopenharmony_ci * 368c2ecf20Sopenharmony_ci * For more details on device private memory see HMM 378c2ecf20Sopenharmony_ci * (include/linux/hmm.h or mm/hmm.c). 388c2ecf20Sopenharmony_ci */ 398c2ecf20Sopenharmony_ci if (is_swap_pte(*pvmw->pte)) { 408c2ecf20Sopenharmony_ci swp_entry_t entry; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci /* Handle un-addressable ZONE_DEVICE memory */ 438c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(*pvmw->pte); 448c2ecf20Sopenharmony_ci if (!is_device_private_entry(entry)) 458c2ecf20Sopenharmony_ci return false; 468c2ecf20Sopenharmony_ci } else if (!pte_present(*pvmw->pte)) 478c2ecf20Sopenharmony_ci return false; 488c2ecf20Sopenharmony_ci } 498c2ecf20Sopenharmony_ci } 508c2ecf20Sopenharmony_ci pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd); 518c2ecf20Sopenharmony_ci spin_lock(pvmw->ptl); 528c2ecf20Sopenharmony_ci return true; 538c2ecf20Sopenharmony_ci} 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cistatic inline bool pfn_is_match(struct page *page, unsigned long pfn) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci unsigned long page_pfn = page_to_pfn(page); 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci /* normal page and hugetlbfs page */ 608c2ecf20Sopenharmony_ci if (!PageTransCompound(page) || PageHuge(page)) 618c2ecf20Sopenharmony_ci return page_pfn == pfn; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci /* THP can be referenced by any subpage */ 648c2ecf20Sopenharmony_ci return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page); 658c2ecf20Sopenharmony_ci} 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci/** 688c2ecf20Sopenharmony_ci * check_pte - check if @pvmw->page is mapped at the @pvmw->pte 698c2ecf20Sopenharmony_ci * 708c2ecf20Sopenharmony_ci * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* 718c2ecf20Sopenharmony_ci * mapped. check_pte() has to validate this. 728c2ecf20Sopenharmony_ci * 738c2ecf20Sopenharmony_ci * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary 748c2ecf20Sopenharmony_ci * page. 758c2ecf20Sopenharmony_ci * 768c2ecf20Sopenharmony_ci * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration 778c2ecf20Sopenharmony_ci * entry that points to @pvmw->page or any subpage in case of THP. 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to 808c2ecf20Sopenharmony_ci * @pvmw->page or any subpage in case of THP. 818c2ecf20Sopenharmony_ci * 828c2ecf20Sopenharmony_ci * Otherwise, return false. 838c2ecf20Sopenharmony_ci * 848c2ecf20Sopenharmony_ci */ 858c2ecf20Sopenharmony_cistatic bool check_pte(struct page_vma_mapped_walk *pvmw) 868c2ecf20Sopenharmony_ci{ 878c2ecf20Sopenharmony_ci unsigned long pfn; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci if (pvmw->flags & PVMW_MIGRATION) { 908c2ecf20Sopenharmony_ci swp_entry_t entry; 918c2ecf20Sopenharmony_ci if (!is_swap_pte(*pvmw->pte)) 928c2ecf20Sopenharmony_ci return false; 938c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(*pvmw->pte); 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci if (!is_migration_entry(entry)) 968c2ecf20Sopenharmony_ci return false; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci pfn = migration_entry_to_pfn(entry); 998c2ecf20Sopenharmony_ci } else if (is_swap_pte(*pvmw->pte)) { 1008c2ecf20Sopenharmony_ci swp_entry_t entry; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci /* Handle un-addressable ZONE_DEVICE memory */ 1038c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(*pvmw->pte); 1048c2ecf20Sopenharmony_ci if (!is_device_private_entry(entry)) 1058c2ecf20Sopenharmony_ci return false; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci pfn = device_private_entry_to_pfn(entry); 1088c2ecf20Sopenharmony_ci } else { 1098c2ecf20Sopenharmony_ci if (!pte_present(*pvmw->pte)) 1108c2ecf20Sopenharmony_ci return false; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci pfn = pte_pfn(*pvmw->pte); 1138c2ecf20Sopenharmony_ci } 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci return pfn_is_match(pvmw->page, pfn); 1168c2ecf20Sopenharmony_ci} 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_cistatic void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci pvmw->address = (pvmw->address + size) & ~(size - 1); 1218c2ecf20Sopenharmony_ci if (!pvmw->address) 1228c2ecf20Sopenharmony_ci pvmw->address = ULONG_MAX; 1238c2ecf20Sopenharmony_ci} 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci/** 1268c2ecf20Sopenharmony_ci * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at 1278c2ecf20Sopenharmony_ci * @pvmw->address 1288c2ecf20Sopenharmony_ci * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags 1298c2ecf20Sopenharmony_ci * must be set. pmd, pte and ptl must be NULL. 1308c2ecf20Sopenharmony_ci * 1318c2ecf20Sopenharmony_ci * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point 1328c2ecf20Sopenharmony_ci * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is 1338c2ecf20Sopenharmony_ci * adjusted if needed (for PTE-mapped THPs). 1348c2ecf20Sopenharmony_ci * 1358c2ecf20Sopenharmony_ci * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page 1368c2ecf20Sopenharmony_ci * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in 1378c2ecf20Sopenharmony_ci * a loop to find all PTEs that map the THP. 1388c2ecf20Sopenharmony_ci * 1398c2ecf20Sopenharmony_ci * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry 1408c2ecf20Sopenharmony_ci * regardless of which page table level the page is mapped at. @pvmw->pmd is 1418c2ecf20Sopenharmony_ci * NULL. 1428c2ecf20Sopenharmony_ci * 1438c2ecf20Sopenharmony_ci * Retruns false if there are no more page table entries for the page in 1448c2ecf20Sopenharmony_ci * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. 1458c2ecf20Sopenharmony_ci * 1468c2ecf20Sopenharmony_ci * If you need to stop the walk before page_vma_mapped_walk() returned false, 1478c2ecf20Sopenharmony_ci * use page_vma_mapped_walk_done(). It will do the housekeeping. 1488c2ecf20Sopenharmony_ci */ 1498c2ecf20Sopenharmony_cibool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci struct mm_struct *mm = pvmw->vma->vm_mm; 1528c2ecf20Sopenharmony_ci struct page *page = pvmw->page; 1538c2ecf20Sopenharmony_ci unsigned long end; 1548c2ecf20Sopenharmony_ci pgd_t *pgd; 1558c2ecf20Sopenharmony_ci p4d_t *p4d; 1568c2ecf20Sopenharmony_ci pud_t *pud; 1578c2ecf20Sopenharmony_ci pmd_t pmde; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci /* The only possible pmd mapping has been handled on last iteration */ 1608c2ecf20Sopenharmony_ci if (pvmw->pmd && !pvmw->pte) 1618c2ecf20Sopenharmony_ci return not_found(pvmw); 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci if (unlikely(PageHuge(page))) { 1648c2ecf20Sopenharmony_ci /* The only possible mapping was handled on last iteration */ 1658c2ecf20Sopenharmony_ci if (pvmw->pte) 1668c2ecf20Sopenharmony_ci return not_found(pvmw); 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci /* when pud is not present, pte will be NULL */ 1698c2ecf20Sopenharmony_ci pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); 1708c2ecf20Sopenharmony_ci if (!pvmw->pte) 1718c2ecf20Sopenharmony_ci return false; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte); 1748c2ecf20Sopenharmony_ci spin_lock(pvmw->ptl); 1758c2ecf20Sopenharmony_ci if (!check_pte(pvmw)) 1768c2ecf20Sopenharmony_ci return not_found(pvmw); 1778c2ecf20Sopenharmony_ci return true; 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci /* 1818c2ecf20Sopenharmony_ci * Seek to next pte only makes sense for THP. 1828c2ecf20Sopenharmony_ci * But more important than that optimization, is to filter out 1838c2ecf20Sopenharmony_ci * any PageKsm page: whose page->index misleads vma_address() 1848c2ecf20Sopenharmony_ci * and vma_address_end() to disaster. 1858c2ecf20Sopenharmony_ci */ 1868c2ecf20Sopenharmony_ci end = PageTransCompound(page) ? 1878c2ecf20Sopenharmony_ci vma_address_end(page, pvmw->vma) : 1888c2ecf20Sopenharmony_ci pvmw->address + PAGE_SIZE; 1898c2ecf20Sopenharmony_ci if (pvmw->pte) 1908c2ecf20Sopenharmony_ci goto next_pte; 1918c2ecf20Sopenharmony_cirestart: 1928c2ecf20Sopenharmony_ci do { 1938c2ecf20Sopenharmony_ci pgd = pgd_offset(mm, pvmw->address); 1948c2ecf20Sopenharmony_ci if (!pgd_present(*pgd)) { 1958c2ecf20Sopenharmony_ci step_forward(pvmw, PGDIR_SIZE); 1968c2ecf20Sopenharmony_ci continue; 1978c2ecf20Sopenharmony_ci } 1988c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, pvmw->address); 1998c2ecf20Sopenharmony_ci if (!p4d_present(*p4d)) { 2008c2ecf20Sopenharmony_ci step_forward(pvmw, P4D_SIZE); 2018c2ecf20Sopenharmony_ci continue; 2028c2ecf20Sopenharmony_ci } 2038c2ecf20Sopenharmony_ci pud = pud_offset(p4d, pvmw->address); 2048c2ecf20Sopenharmony_ci if (!pud_present(*pud)) { 2058c2ecf20Sopenharmony_ci step_forward(pvmw, PUD_SIZE); 2068c2ecf20Sopenharmony_ci continue; 2078c2ecf20Sopenharmony_ci } 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci pvmw->pmd = pmd_offset(pud, pvmw->address); 2108c2ecf20Sopenharmony_ci /* 2118c2ecf20Sopenharmony_ci * Make sure the pmd value isn't cached in a register by the 2128c2ecf20Sopenharmony_ci * compiler and used as a stale value after we've observed a 2138c2ecf20Sopenharmony_ci * subsequent update. 2148c2ecf20Sopenharmony_ci */ 2158c2ecf20Sopenharmony_ci pmde = READ_ONCE(*pvmw->pmd); 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { 2188c2ecf20Sopenharmony_ci pvmw->ptl = pmd_lock(mm, pvmw->pmd); 2198c2ecf20Sopenharmony_ci pmde = *pvmw->pmd; 2208c2ecf20Sopenharmony_ci if (likely(pmd_trans_huge(pmde))) { 2218c2ecf20Sopenharmony_ci if (pvmw->flags & PVMW_MIGRATION) 2228c2ecf20Sopenharmony_ci return not_found(pvmw); 2238c2ecf20Sopenharmony_ci if (pmd_page(pmde) != page) 2248c2ecf20Sopenharmony_ci return not_found(pvmw); 2258c2ecf20Sopenharmony_ci return true; 2268c2ecf20Sopenharmony_ci } 2278c2ecf20Sopenharmony_ci if (!pmd_present(pmde)) { 2288c2ecf20Sopenharmony_ci swp_entry_t entry; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci if (!thp_migration_supported() || 2318c2ecf20Sopenharmony_ci !(pvmw->flags & PVMW_MIGRATION)) 2328c2ecf20Sopenharmony_ci return not_found(pvmw); 2338c2ecf20Sopenharmony_ci entry = pmd_to_swp_entry(pmde); 2348c2ecf20Sopenharmony_ci if (!is_migration_entry(entry) || 2358c2ecf20Sopenharmony_ci migration_entry_to_page(entry) != page) 2368c2ecf20Sopenharmony_ci return not_found(pvmw); 2378c2ecf20Sopenharmony_ci return true; 2388c2ecf20Sopenharmony_ci } 2398c2ecf20Sopenharmony_ci /* THP pmd was split under us: handle on pte level */ 2408c2ecf20Sopenharmony_ci spin_unlock(pvmw->ptl); 2418c2ecf20Sopenharmony_ci pvmw->ptl = NULL; 2428c2ecf20Sopenharmony_ci } else if (!pmd_present(pmde)) { 2438c2ecf20Sopenharmony_ci /* 2448c2ecf20Sopenharmony_ci * If PVMW_SYNC, take and drop THP pmd lock so that we 2458c2ecf20Sopenharmony_ci * cannot return prematurely, while zap_huge_pmd() has 2468c2ecf20Sopenharmony_ci * cleared *pmd but not decremented compound_mapcount(). 2478c2ecf20Sopenharmony_ci */ 2488c2ecf20Sopenharmony_ci if ((pvmw->flags & PVMW_SYNC) && 2498c2ecf20Sopenharmony_ci PageTransCompound(page)) { 2508c2ecf20Sopenharmony_ci spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci spin_unlock(ptl); 2538c2ecf20Sopenharmony_ci } 2548c2ecf20Sopenharmony_ci step_forward(pvmw, PMD_SIZE); 2558c2ecf20Sopenharmony_ci continue; 2568c2ecf20Sopenharmony_ci } 2578c2ecf20Sopenharmony_ci if (!map_pte(pvmw)) 2588c2ecf20Sopenharmony_ci goto next_pte; 2598c2ecf20Sopenharmony_cithis_pte: 2608c2ecf20Sopenharmony_ci if (check_pte(pvmw)) 2618c2ecf20Sopenharmony_ci return true; 2628c2ecf20Sopenharmony_cinext_pte: 2638c2ecf20Sopenharmony_ci do { 2648c2ecf20Sopenharmony_ci pvmw->address += PAGE_SIZE; 2658c2ecf20Sopenharmony_ci if (pvmw->address >= end) 2668c2ecf20Sopenharmony_ci return not_found(pvmw); 2678c2ecf20Sopenharmony_ci /* Did we cross page table boundary? */ 2688c2ecf20Sopenharmony_ci if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { 2698c2ecf20Sopenharmony_ci if (pvmw->ptl) { 2708c2ecf20Sopenharmony_ci spin_unlock(pvmw->ptl); 2718c2ecf20Sopenharmony_ci pvmw->ptl = NULL; 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci pte_unmap(pvmw->pte); 2748c2ecf20Sopenharmony_ci pvmw->pte = NULL; 2758c2ecf20Sopenharmony_ci goto restart; 2768c2ecf20Sopenharmony_ci } 2778c2ecf20Sopenharmony_ci pvmw->pte++; 2788c2ecf20Sopenharmony_ci if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { 2798c2ecf20Sopenharmony_ci pvmw->ptl = pte_lockptr(mm, pvmw->pmd); 2808c2ecf20Sopenharmony_ci spin_lock(pvmw->ptl); 2818c2ecf20Sopenharmony_ci } 2828c2ecf20Sopenharmony_ci } while (pte_none(*pvmw->pte)); 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci if (!pvmw->ptl) { 2858c2ecf20Sopenharmony_ci pvmw->ptl = pte_lockptr(mm, pvmw->pmd); 2868c2ecf20Sopenharmony_ci spin_lock(pvmw->ptl); 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci goto this_pte; 2898c2ecf20Sopenharmony_ci } while (pvmw->address < end); 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci return false; 2928c2ecf20Sopenharmony_ci} 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci/** 2958c2ecf20Sopenharmony_ci * page_mapped_in_vma - check whether a page is really mapped in a VMA 2968c2ecf20Sopenharmony_ci * @page: the page to test 2978c2ecf20Sopenharmony_ci * @vma: the VMA to test 2988c2ecf20Sopenharmony_ci * 2998c2ecf20Sopenharmony_ci * Returns 1 if the page is mapped into the page tables of the VMA, 0 3008c2ecf20Sopenharmony_ci * if the page is not mapped into the page tables of this VMA. Only 3018c2ecf20Sopenharmony_ci * valid for normal file or anonymous VMAs. 3028c2ecf20Sopenharmony_ci */ 3038c2ecf20Sopenharmony_ciint page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) 3048c2ecf20Sopenharmony_ci{ 3058c2ecf20Sopenharmony_ci struct page_vma_mapped_walk pvmw = { 3068c2ecf20Sopenharmony_ci .page = page, 3078c2ecf20Sopenharmony_ci .vma = vma, 3088c2ecf20Sopenharmony_ci .flags = PVMW_SYNC, 3098c2ecf20Sopenharmony_ci }; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci pvmw.address = vma_address(page, vma); 3128c2ecf20Sopenharmony_ci if (pvmw.address == -EFAULT) 3138c2ecf20Sopenharmony_ci return 0; 3148c2ecf20Sopenharmony_ci if (!page_vma_mapped_walk(&pvmw)) 3158c2ecf20Sopenharmony_ci return 0; 3168c2ecf20Sopenharmony_ci page_vma_mapped_walk_done(&pvmw); 3178c2ecf20Sopenharmony_ci return 1; 3188c2ecf20Sopenharmony_ci} 319