162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/mm.h>
362306a36Sopenharmony_ci#include <linux/rmap.h>
462306a36Sopenharmony_ci#include <linux/hugetlb.h>
562306a36Sopenharmony_ci#include <linux/swap.h>
662306a36Sopenharmony_ci#include <linux/swapops.h>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "internal.h"
962306a36Sopenharmony_ci
1062306a36Sopenharmony_cistatic inline bool not_found(struct page_vma_mapped_walk *pvmw)
1162306a36Sopenharmony_ci{
1262306a36Sopenharmony_ci	page_vma_mapped_walk_done(pvmw);
1362306a36Sopenharmony_ci	return false;
1462306a36Sopenharmony_ci}
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_cistatic bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
1762306a36Sopenharmony_ci{
1862306a36Sopenharmony_ci	pte_t ptent;
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci	if (pvmw->flags & PVMW_SYNC) {
2162306a36Sopenharmony_ci		/* Use the stricter lookup */
2262306a36Sopenharmony_ci		pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd,
2362306a36Sopenharmony_ci						pvmw->address, &pvmw->ptl);
2462306a36Sopenharmony_ci		*ptlp = pvmw->ptl;
2562306a36Sopenharmony_ci		return !!pvmw->pte;
2662306a36Sopenharmony_ci	}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	/*
2962306a36Sopenharmony_ci	 * It is important to return the ptl corresponding to pte,
3062306a36Sopenharmony_ci	 * in case *pvmw->pmd changes underneath us; so we need to
3162306a36Sopenharmony_ci	 * return it even when choosing not to lock, in case caller
3262306a36Sopenharmony_ci	 * proceeds to loop over next ptes, and finds a match later.
3362306a36Sopenharmony_ci	 * Though, in most cases, page lock already protects this.
3462306a36Sopenharmony_ci	 */
3562306a36Sopenharmony_ci	pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd,
3662306a36Sopenharmony_ci					  pvmw->address, ptlp);
3762306a36Sopenharmony_ci	if (!pvmw->pte)
3862306a36Sopenharmony_ci		return false;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	ptent = ptep_get(pvmw->pte);
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	if (pvmw->flags & PVMW_MIGRATION) {
4362306a36Sopenharmony_ci		if (!is_swap_pte(ptent))
4462306a36Sopenharmony_ci			return false;
4562306a36Sopenharmony_ci	} else if (is_swap_pte(ptent)) {
4662306a36Sopenharmony_ci		swp_entry_t entry;
4762306a36Sopenharmony_ci		/*
4862306a36Sopenharmony_ci		 * Handle un-addressable ZONE_DEVICE memory.
4962306a36Sopenharmony_ci		 *
5062306a36Sopenharmony_ci		 * We get here when we are trying to unmap a private
5162306a36Sopenharmony_ci		 * device page from the process address space. Such
5262306a36Sopenharmony_ci		 * page is not CPU accessible and thus is mapped as
5362306a36Sopenharmony_ci		 * a special swap entry, nonetheless it still does
5462306a36Sopenharmony_ci		 * count as a valid regular mapping for the page
5562306a36Sopenharmony_ci		 * (and is accounted as such in page maps count).
5662306a36Sopenharmony_ci		 *
5762306a36Sopenharmony_ci		 * So handle this special case as if it was a normal
5862306a36Sopenharmony_ci		 * page mapping ie lock CPU page table and return true.
5962306a36Sopenharmony_ci		 *
6062306a36Sopenharmony_ci		 * For more details on device private memory see HMM
6162306a36Sopenharmony_ci		 * (include/linux/hmm.h or mm/hmm.c).
6262306a36Sopenharmony_ci		 */
6362306a36Sopenharmony_ci		entry = pte_to_swp_entry(ptent);
6462306a36Sopenharmony_ci		if (!is_device_private_entry(entry) &&
6562306a36Sopenharmony_ci		    !is_device_exclusive_entry(entry))
6662306a36Sopenharmony_ci			return false;
6762306a36Sopenharmony_ci	} else if (!pte_present(ptent)) {
6862306a36Sopenharmony_ci		return false;
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci	pvmw->ptl = *ptlp;
7162306a36Sopenharmony_ci	spin_lock(pvmw->ptl);
7262306a36Sopenharmony_ci	return true;
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci/**
7662306a36Sopenharmony_ci * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is
7762306a36Sopenharmony_ci * mapped at the @pvmw->pte
7862306a36Sopenharmony_ci * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range
7962306a36Sopenharmony_ci * for checking
8062306a36Sopenharmony_ci *
8162306a36Sopenharmony_ci * page_vma_mapped_walk() found a place where pfn range is *potentially*
8262306a36Sopenharmony_ci * mapped. check_pte() has to validate this.
8362306a36Sopenharmony_ci *
8462306a36Sopenharmony_ci * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to
8562306a36Sopenharmony_ci * arbitrary page.
8662306a36Sopenharmony_ci *
8762306a36Sopenharmony_ci * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
8862306a36Sopenharmony_ci * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
8962306a36Sopenharmony_ci *
9062306a36Sopenharmony_ci * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to
9162306a36Sopenharmony_ci * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
9262306a36Sopenharmony_ci *
9362306a36Sopenharmony_ci * Otherwise, return false.
9462306a36Sopenharmony_ci *
9562306a36Sopenharmony_ci */
9662306a36Sopenharmony_cistatic bool check_pte(struct page_vma_mapped_walk *pvmw)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	unsigned long pfn;
9962306a36Sopenharmony_ci	pte_t ptent = ptep_get(pvmw->pte);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	if (pvmw->flags & PVMW_MIGRATION) {
10262306a36Sopenharmony_ci		swp_entry_t entry;
10362306a36Sopenharmony_ci		if (!is_swap_pte(ptent))
10462306a36Sopenharmony_ci			return false;
10562306a36Sopenharmony_ci		entry = pte_to_swp_entry(ptent);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci		if (!is_migration_entry(entry) &&
10862306a36Sopenharmony_ci		    !is_device_exclusive_entry(entry))
10962306a36Sopenharmony_ci			return false;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci		pfn = swp_offset_pfn(entry);
11262306a36Sopenharmony_ci	} else if (is_swap_pte(ptent)) {
11362306a36Sopenharmony_ci		swp_entry_t entry;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci		/* Handle un-addressable ZONE_DEVICE memory */
11662306a36Sopenharmony_ci		entry = pte_to_swp_entry(ptent);
11762306a36Sopenharmony_ci		if (!is_device_private_entry(entry) &&
11862306a36Sopenharmony_ci		    !is_device_exclusive_entry(entry))
11962306a36Sopenharmony_ci			return false;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci		pfn = swp_offset_pfn(entry);
12262306a36Sopenharmony_ci	} else {
12362306a36Sopenharmony_ci		if (!pte_present(ptent))
12462306a36Sopenharmony_ci			return false;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci		pfn = pte_pfn(ptent);
12762306a36Sopenharmony_ci	}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	return (pfn - pvmw->pfn) < pvmw->nr_pages;
13062306a36Sopenharmony_ci}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci/* Returns true if the two ranges overlap.  Careful to not overflow. */
13362306a36Sopenharmony_cistatic bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn)
13662306a36Sopenharmony_ci		return false;
13762306a36Sopenharmony_ci	if (pfn > pvmw->pfn + pvmw->nr_pages - 1)
13862306a36Sopenharmony_ci		return false;
13962306a36Sopenharmony_ci	return true;
14062306a36Sopenharmony_ci}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cistatic void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
14362306a36Sopenharmony_ci{
14462306a36Sopenharmony_ci	pvmw->address = (pvmw->address + size) & ~(size - 1);
14562306a36Sopenharmony_ci	if (!pvmw->address)
14662306a36Sopenharmony_ci		pvmw->address = ULONG_MAX;
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci/**
15062306a36Sopenharmony_ci * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
15162306a36Sopenharmony_ci * @pvmw->address
15262306a36Sopenharmony_ci * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
15362306a36Sopenharmony_ci * must be set. pmd, pte and ptl must be NULL.
15462306a36Sopenharmony_ci *
15562306a36Sopenharmony_ci * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
15662306a36Sopenharmony_ci * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
15762306a36Sopenharmony_ci * adjusted if needed (for PTE-mapped THPs).
15862306a36Sopenharmony_ci *
15962306a36Sopenharmony_ci * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
16062306a36Sopenharmony_ci * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
16162306a36Sopenharmony_ci * a loop to find all PTEs that map the THP.
16262306a36Sopenharmony_ci *
16362306a36Sopenharmony_ci * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
16462306a36Sopenharmony_ci * regardless of which page table level the page is mapped at. @pvmw->pmd is
16562306a36Sopenharmony_ci * NULL.
16662306a36Sopenharmony_ci *
16762306a36Sopenharmony_ci * Returns false if there are no more page table entries for the page in
16862306a36Sopenharmony_ci * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
16962306a36Sopenharmony_ci *
17062306a36Sopenharmony_ci * If you need to stop the walk before page_vma_mapped_walk() returned false,
17162306a36Sopenharmony_ci * use page_vma_mapped_walk_done(). It will do the housekeeping.
17262306a36Sopenharmony_ci */
17362306a36Sopenharmony_cibool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
17462306a36Sopenharmony_ci{
17562306a36Sopenharmony_ci	struct vm_area_struct *vma = pvmw->vma;
17662306a36Sopenharmony_ci	struct mm_struct *mm = vma->vm_mm;
17762306a36Sopenharmony_ci	unsigned long end;
17862306a36Sopenharmony_ci	spinlock_t *ptl;
17962306a36Sopenharmony_ci	pgd_t *pgd;
18062306a36Sopenharmony_ci	p4d_t *p4d;
18162306a36Sopenharmony_ci	pud_t *pud;
18262306a36Sopenharmony_ci	pmd_t pmde;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	/* The only possible pmd mapping has been handled on last iteration */
18562306a36Sopenharmony_ci	if (pvmw->pmd && !pvmw->pte)
18662306a36Sopenharmony_ci		return not_found(pvmw);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	if (unlikely(is_vm_hugetlb_page(vma))) {
18962306a36Sopenharmony_ci		struct hstate *hstate = hstate_vma(vma);
19062306a36Sopenharmony_ci		unsigned long size = huge_page_size(hstate);
19162306a36Sopenharmony_ci		/* The only possible mapping was handled on last iteration */
19262306a36Sopenharmony_ci		if (pvmw->pte)
19362306a36Sopenharmony_ci			return not_found(pvmw);
19462306a36Sopenharmony_ci		/*
19562306a36Sopenharmony_ci		 * All callers that get here will already hold the
19662306a36Sopenharmony_ci		 * i_mmap_rwsem.  Therefore, no additional locks need to be
19762306a36Sopenharmony_ci		 * taken before calling hugetlb_walk().
19862306a36Sopenharmony_ci		 */
19962306a36Sopenharmony_ci		pvmw->pte = hugetlb_walk(vma, pvmw->address, size);
20062306a36Sopenharmony_ci		if (!pvmw->pte)
20162306a36Sopenharmony_ci			return false;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci		pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte);
20462306a36Sopenharmony_ci		if (!check_pte(pvmw))
20562306a36Sopenharmony_ci			return not_found(pvmw);
20662306a36Sopenharmony_ci		return true;
20762306a36Sopenharmony_ci	}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	end = vma_address_end(pvmw);
21062306a36Sopenharmony_ci	if (pvmw->pte)
21162306a36Sopenharmony_ci		goto next_pte;
21262306a36Sopenharmony_cirestart:
21362306a36Sopenharmony_ci	do {
21462306a36Sopenharmony_ci		pgd = pgd_offset(mm, pvmw->address);
21562306a36Sopenharmony_ci		if (!pgd_present(*pgd)) {
21662306a36Sopenharmony_ci			step_forward(pvmw, PGDIR_SIZE);
21762306a36Sopenharmony_ci			continue;
21862306a36Sopenharmony_ci		}
21962306a36Sopenharmony_ci		p4d = p4d_offset(pgd, pvmw->address);
22062306a36Sopenharmony_ci		if (!p4d_present(*p4d)) {
22162306a36Sopenharmony_ci			step_forward(pvmw, P4D_SIZE);
22262306a36Sopenharmony_ci			continue;
22362306a36Sopenharmony_ci		}
22462306a36Sopenharmony_ci		pud = pud_offset(p4d, pvmw->address);
22562306a36Sopenharmony_ci		if (!pud_present(*pud)) {
22662306a36Sopenharmony_ci			step_forward(pvmw, PUD_SIZE);
22762306a36Sopenharmony_ci			continue;
22862306a36Sopenharmony_ci		}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci		pvmw->pmd = pmd_offset(pud, pvmw->address);
23162306a36Sopenharmony_ci		/*
23262306a36Sopenharmony_ci		 * Make sure the pmd value isn't cached in a register by the
23362306a36Sopenharmony_ci		 * compiler and used as a stale value after we've observed a
23462306a36Sopenharmony_ci		 * subsequent update.
23562306a36Sopenharmony_ci		 */
23662306a36Sopenharmony_ci		pmde = pmdp_get_lockless(pvmw->pmd);
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci		if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) ||
23962306a36Sopenharmony_ci		    (pmd_present(pmde) && pmd_devmap(pmde))) {
24062306a36Sopenharmony_ci			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
24162306a36Sopenharmony_ci			pmde = *pvmw->pmd;
24262306a36Sopenharmony_ci			if (!pmd_present(pmde)) {
24362306a36Sopenharmony_ci				swp_entry_t entry;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci				if (!thp_migration_supported() ||
24662306a36Sopenharmony_ci				    !(pvmw->flags & PVMW_MIGRATION))
24762306a36Sopenharmony_ci					return not_found(pvmw);
24862306a36Sopenharmony_ci				entry = pmd_to_swp_entry(pmde);
24962306a36Sopenharmony_ci				if (!is_migration_entry(entry) ||
25062306a36Sopenharmony_ci				    !check_pmd(swp_offset_pfn(entry), pvmw))
25162306a36Sopenharmony_ci					return not_found(pvmw);
25262306a36Sopenharmony_ci				return true;
25362306a36Sopenharmony_ci			}
25462306a36Sopenharmony_ci			if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) {
25562306a36Sopenharmony_ci				if (pvmw->flags & PVMW_MIGRATION)
25662306a36Sopenharmony_ci					return not_found(pvmw);
25762306a36Sopenharmony_ci				if (!check_pmd(pmd_pfn(pmde), pvmw))
25862306a36Sopenharmony_ci					return not_found(pvmw);
25962306a36Sopenharmony_ci				return true;
26062306a36Sopenharmony_ci			}
26162306a36Sopenharmony_ci			/* THP pmd was split under us: handle on pte level */
26262306a36Sopenharmony_ci			spin_unlock(pvmw->ptl);
26362306a36Sopenharmony_ci			pvmw->ptl = NULL;
26462306a36Sopenharmony_ci		} else if (!pmd_present(pmde)) {
26562306a36Sopenharmony_ci			/*
26662306a36Sopenharmony_ci			 * If PVMW_SYNC, take and drop THP pmd lock so that we
26762306a36Sopenharmony_ci			 * cannot return prematurely, while zap_huge_pmd() has
26862306a36Sopenharmony_ci			 * cleared *pmd but not decremented compound_mapcount().
26962306a36Sopenharmony_ci			 */
27062306a36Sopenharmony_ci			if ((pvmw->flags & PVMW_SYNC) &&
27162306a36Sopenharmony_ci			    transhuge_vma_suitable(vma, pvmw->address) &&
27262306a36Sopenharmony_ci			    (pvmw->nr_pages >= HPAGE_PMD_NR)) {
27362306a36Sopenharmony_ci				spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci				spin_unlock(ptl);
27662306a36Sopenharmony_ci			}
27762306a36Sopenharmony_ci			step_forward(pvmw, PMD_SIZE);
27862306a36Sopenharmony_ci			continue;
27962306a36Sopenharmony_ci		}
28062306a36Sopenharmony_ci		if (!map_pte(pvmw, &ptl)) {
28162306a36Sopenharmony_ci			if (!pvmw->pte)
28262306a36Sopenharmony_ci				goto restart;
28362306a36Sopenharmony_ci			goto next_pte;
28462306a36Sopenharmony_ci		}
28562306a36Sopenharmony_cithis_pte:
28662306a36Sopenharmony_ci		if (check_pte(pvmw))
28762306a36Sopenharmony_ci			return true;
28862306a36Sopenharmony_cinext_pte:
28962306a36Sopenharmony_ci		do {
29062306a36Sopenharmony_ci			pvmw->address += PAGE_SIZE;
29162306a36Sopenharmony_ci			if (pvmw->address >= end)
29262306a36Sopenharmony_ci				return not_found(pvmw);
29362306a36Sopenharmony_ci			/* Did we cross page table boundary? */
29462306a36Sopenharmony_ci			if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
29562306a36Sopenharmony_ci				if (pvmw->ptl) {
29662306a36Sopenharmony_ci					spin_unlock(pvmw->ptl);
29762306a36Sopenharmony_ci					pvmw->ptl = NULL;
29862306a36Sopenharmony_ci				}
29962306a36Sopenharmony_ci				pte_unmap(pvmw->pte);
30062306a36Sopenharmony_ci				pvmw->pte = NULL;
30162306a36Sopenharmony_ci				goto restart;
30262306a36Sopenharmony_ci			}
30362306a36Sopenharmony_ci			pvmw->pte++;
30462306a36Sopenharmony_ci		} while (pte_none(ptep_get(pvmw->pte)));
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci		if (!pvmw->ptl) {
30762306a36Sopenharmony_ci			pvmw->ptl = ptl;
30862306a36Sopenharmony_ci			spin_lock(pvmw->ptl);
30962306a36Sopenharmony_ci		}
31062306a36Sopenharmony_ci		goto this_pte;
31162306a36Sopenharmony_ci	} while (pvmw->address < end);
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	return false;
31462306a36Sopenharmony_ci}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci/**
31762306a36Sopenharmony_ci * page_mapped_in_vma - check whether a page is really mapped in a VMA
31862306a36Sopenharmony_ci * @page: the page to test
31962306a36Sopenharmony_ci * @vma: the VMA to test
32062306a36Sopenharmony_ci *
32162306a36Sopenharmony_ci * Returns 1 if the page is mapped into the page tables of the VMA, 0
32262306a36Sopenharmony_ci * if the page is not mapped into the page tables of this VMA.  Only
32362306a36Sopenharmony_ci * valid for normal file or anonymous VMAs.
32462306a36Sopenharmony_ci */
32562306a36Sopenharmony_ciint page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
32662306a36Sopenharmony_ci{
32762306a36Sopenharmony_ci	struct page_vma_mapped_walk pvmw = {
32862306a36Sopenharmony_ci		.pfn = page_to_pfn(page),
32962306a36Sopenharmony_ci		.nr_pages = 1,
33062306a36Sopenharmony_ci		.vma = vma,
33162306a36Sopenharmony_ci		.flags = PVMW_SYNC,
33262306a36Sopenharmony_ci	};
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	pvmw.address = vma_address(page, vma);
33562306a36Sopenharmony_ci	if (pvmw.address == -EFAULT)
33662306a36Sopenharmony_ci		return 0;
33762306a36Sopenharmony_ci	if (!page_vma_mapped_walk(&pvmw))
33862306a36Sopenharmony_ci		return 0;
33962306a36Sopenharmony_ci	page_vma_mapped_walk_done(&pvmw);
34062306a36Sopenharmony_ci	return 1;
34162306a36Sopenharmony_ci}
342