162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/mm.h>
362306a36Sopenharmony_ci#include <linux/gfp.h>
462306a36Sopenharmony_ci#include <linux/hugetlb.h>
562306a36Sopenharmony_ci#include <asm/pgalloc.h>
662306a36Sopenharmony_ci#include <asm/tlb.h>
762306a36Sopenharmony_ci#include <asm/fixmap.h>
862306a36Sopenharmony_ci#include <asm/mtrr.h>
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
1162306a36Sopenharmony_ciphys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
1262306a36Sopenharmony_ciEXPORT_SYMBOL(physical_mask);
1362306a36Sopenharmony_ci#endif
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#ifdef CONFIG_HIGHPTE
1662306a36Sopenharmony_ci#define PGTABLE_HIGHMEM __GFP_HIGHMEM
1762306a36Sopenharmony_ci#else
1862306a36Sopenharmony_ci#define PGTABLE_HIGHMEM 0
1962306a36Sopenharmony_ci#endif
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#ifndef CONFIG_PARAVIRT
2262306a36Sopenharmony_cistatic inline
2362306a36Sopenharmony_civoid paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	tlb_remove_page(tlb, table);
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci#endif
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cigfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cipgtable_t pte_alloc_one(struct mm_struct *mm)
3262306a36Sopenharmony_ci{
3362306a36Sopenharmony_ci	return __pte_alloc_one(mm, __userpte_alloc_gfp);
3462306a36Sopenharmony_ci}
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic int __init setup_userpte(char *arg)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	if (!arg)
3962306a36Sopenharmony_ci		return -EINVAL;
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	/*
4262306a36Sopenharmony_ci	 * "userpte=nohigh" disables allocation of user pagetables in
4362306a36Sopenharmony_ci	 * high memory.
4462306a36Sopenharmony_ci	 */
4562306a36Sopenharmony_ci	if (strcmp(arg, "nohigh") == 0)
4662306a36Sopenharmony_ci		__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
4762306a36Sopenharmony_ci	else
4862306a36Sopenharmony_ci		return -EINVAL;
4962306a36Sopenharmony_ci	return 0;
5062306a36Sopenharmony_ci}
5162306a36Sopenharmony_ciearly_param("userpte", setup_userpte);
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_civoid ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	pagetable_pte_dtor(page_ptdesc(pte));
5662306a36Sopenharmony_ci	paravirt_release_pte(page_to_pfn(pte));
5762306a36Sopenharmony_ci	paravirt_tlb_remove_table(tlb, pte);
5862306a36Sopenharmony_ci}
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS > 2
6162306a36Sopenharmony_civoid ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
6462306a36Sopenharmony_ci	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
6562306a36Sopenharmony_ci	/*
6662306a36Sopenharmony_ci	 * NOTE! For PAE, any changes to the top page-directory-pointer-table
6762306a36Sopenharmony_ci	 * entries need a full cr3 reload to flush.
6862306a36Sopenharmony_ci	 */
6962306a36Sopenharmony_ci#ifdef CONFIG_X86_PAE
7062306a36Sopenharmony_ci	tlb->need_flush_all = 1;
7162306a36Sopenharmony_ci#endif
7262306a36Sopenharmony_ci	pagetable_pmd_dtor(ptdesc);
7362306a36Sopenharmony_ci	paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc));
7462306a36Sopenharmony_ci}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS > 3
7762306a36Sopenharmony_civoid ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
7862306a36Sopenharmony_ci{
7962306a36Sopenharmony_ci	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
8062306a36Sopenharmony_ci	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS > 4
8462306a36Sopenharmony_civoid ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
8762306a36Sopenharmony_ci	paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
8862306a36Sopenharmony_ci}
8962306a36Sopenharmony_ci#endif	/* CONFIG_PGTABLE_LEVELS > 4 */
9062306a36Sopenharmony_ci#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
9162306a36Sopenharmony_ci#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cistatic inline void pgd_list_add(pgd_t *pgd)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	list_add(&ptdesc->pt_list, &pgd_list);
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_cistatic inline void pgd_list_del(pgd_t *pgd)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	list_del(&ptdesc->pt_list);
10562306a36Sopenharmony_ci}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci#define UNSHARED_PTRS_PER_PGD				\
10862306a36Sopenharmony_ci	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
10962306a36Sopenharmony_ci#define MAX_UNSHARED_PTRS_PER_PGD			\
11062306a36Sopenharmony_ci	max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_cistatic void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci	virt_to_ptdesc(pgd)->pt_mm = mm;
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_cistruct mm_struct *pgd_page_get_mm(struct page *page)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	return page_ptdesc(page)->pt_mm;
12162306a36Sopenharmony_ci}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_cistatic void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	/* If the pgd points to a shared pagetable level (either the
12662306a36Sopenharmony_ci	   ptes in non-PAE, or shared PMD in PAE), then just copy the
12762306a36Sopenharmony_ci	   references from swapper_pg_dir. */
12862306a36Sopenharmony_ci	if (CONFIG_PGTABLE_LEVELS == 2 ||
12962306a36Sopenharmony_ci	    (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
13062306a36Sopenharmony_ci	    CONFIG_PGTABLE_LEVELS >= 4) {
13162306a36Sopenharmony_ci		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
13262306a36Sopenharmony_ci				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
13362306a36Sopenharmony_ci				KERNEL_PGD_PTRS);
13462306a36Sopenharmony_ci	}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	/* list required to sync kernel mapping updates */
13762306a36Sopenharmony_ci	if (!SHARED_KERNEL_PMD) {
13862306a36Sopenharmony_ci		pgd_set_mm(pgd, mm);
13962306a36Sopenharmony_ci		pgd_list_add(pgd);
14062306a36Sopenharmony_ci	}
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_cistatic void pgd_dtor(pgd_t *pgd)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	if (SHARED_KERNEL_PMD)
14662306a36Sopenharmony_ci		return;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	spin_lock(&pgd_lock);
14962306a36Sopenharmony_ci	pgd_list_del(pgd);
15062306a36Sopenharmony_ci	spin_unlock(&pgd_lock);
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci/*
15462306a36Sopenharmony_ci * List of all pgd's needed for non-PAE so it can invalidate entries
15562306a36Sopenharmony_ci * in both cached and uncached pgd's; not needed for PAE since the
15662306a36Sopenharmony_ci * kernel pmd is shared. If PAE were not to share the pmd a similar
15762306a36Sopenharmony_ci * tactic would be needed. This is essentially codepath-based locking
15862306a36Sopenharmony_ci * against pageattr.c; it is the unique case in which a valid change
15962306a36Sopenharmony_ci * of kernel pagetables can't be lazily synchronized by vmalloc faults.
16062306a36Sopenharmony_ci * vmalloc faults work because attached pagetables are never freed.
16162306a36Sopenharmony_ci * -- nyc
16262306a36Sopenharmony_ci */
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci#ifdef CONFIG_X86_PAE
16562306a36Sopenharmony_ci/*
16662306a36Sopenharmony_ci * In PAE mode, we need to do a cr3 reload (=tlb flush) when
16762306a36Sopenharmony_ci * updating the top-level pagetable entries to guarantee the
16862306a36Sopenharmony_ci * processor notices the update.  Since this is expensive, and
16962306a36Sopenharmony_ci * all 4 top-level entries are used almost immediately in a
17062306a36Sopenharmony_ci * new process's life, we just pre-populate them here.
17162306a36Sopenharmony_ci *
17262306a36Sopenharmony_ci * Also, if we're in a paravirt environment where the kernel pmd is
17362306a36Sopenharmony_ci * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
17462306a36Sopenharmony_ci * and initialize the kernel pmds here.
17562306a36Sopenharmony_ci */
17662306a36Sopenharmony_ci#define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD
17762306a36Sopenharmony_ci#define MAX_PREALLOCATED_PMDS	MAX_UNSHARED_PTRS_PER_PGD
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci/*
18062306a36Sopenharmony_ci * We allocate separate PMDs for the kernel part of the user page-table
18162306a36Sopenharmony_ci * when PTI is enabled. We need them to map the per-process LDT into the
18262306a36Sopenharmony_ci * user-space page-table.
18362306a36Sopenharmony_ci */
18462306a36Sopenharmony_ci#define PREALLOCATED_USER_PMDS	 (boot_cpu_has(X86_FEATURE_PTI) ? \
18562306a36Sopenharmony_ci					KERNEL_PGD_PTRS : 0)
18662306a36Sopenharmony_ci#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_civoid pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	/* Note: almost everything apart from _PAGE_PRESENT is
19362306a36Sopenharmony_ci	   reserved at the pmd (PDPT) level. */
19462306a36Sopenharmony_ci	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	/*
19762306a36Sopenharmony_ci	 * According to Intel App note "TLBs, Paging-Structure Caches,
19862306a36Sopenharmony_ci	 * and Their Invalidation", April 2007, document 317080-001,
19962306a36Sopenharmony_ci	 * section 8.1: in PAE mode we explicitly have to flush the
20062306a36Sopenharmony_ci	 * TLB via cr3 if the top-level pgd is changed...
20162306a36Sopenharmony_ci	 */
20262306a36Sopenharmony_ci	flush_tlb_mm(mm);
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci#else  /* !CONFIG_X86_PAE */
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci/* No need to prepopulate any pagetable entries in non-PAE modes. */
20762306a36Sopenharmony_ci#define PREALLOCATED_PMDS	0
20862306a36Sopenharmony_ci#define MAX_PREALLOCATED_PMDS	0
20962306a36Sopenharmony_ci#define PREALLOCATED_USER_PMDS	 0
21062306a36Sopenharmony_ci#define MAX_PREALLOCATED_USER_PMDS 0
21162306a36Sopenharmony_ci#endif	/* CONFIG_X86_PAE */
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_cistatic void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
21462306a36Sopenharmony_ci{
21562306a36Sopenharmony_ci	int i;
21662306a36Sopenharmony_ci	struct ptdesc *ptdesc;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	for (i = 0; i < count; i++)
21962306a36Sopenharmony_ci		if (pmds[i]) {
22062306a36Sopenharmony_ci			ptdesc = virt_to_ptdesc(pmds[i]);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci			pagetable_pmd_dtor(ptdesc);
22362306a36Sopenharmony_ci			pagetable_free(ptdesc);
22462306a36Sopenharmony_ci			mm_dec_nr_pmds(mm);
22562306a36Sopenharmony_ci		}
22662306a36Sopenharmony_ci}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_cistatic int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	int i;
23162306a36Sopenharmony_ci	bool failed = false;
23262306a36Sopenharmony_ci	gfp_t gfp = GFP_PGTABLE_USER;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	if (mm == &init_mm)
23562306a36Sopenharmony_ci		gfp &= ~__GFP_ACCOUNT;
23662306a36Sopenharmony_ci	gfp &= ~__GFP_HIGHMEM;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
23962306a36Sopenharmony_ci		pmd_t *pmd = NULL;
24062306a36Sopenharmony_ci		struct ptdesc *ptdesc = pagetable_alloc(gfp, 0);
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci		if (!ptdesc)
24362306a36Sopenharmony_ci			failed = true;
24462306a36Sopenharmony_ci		if (ptdesc && !pagetable_pmd_ctor(ptdesc)) {
24562306a36Sopenharmony_ci			pagetable_free(ptdesc);
24662306a36Sopenharmony_ci			ptdesc = NULL;
24762306a36Sopenharmony_ci			failed = true;
24862306a36Sopenharmony_ci		}
24962306a36Sopenharmony_ci		if (ptdesc) {
25062306a36Sopenharmony_ci			mm_inc_nr_pmds(mm);
25162306a36Sopenharmony_ci			pmd = ptdesc_address(ptdesc);
25262306a36Sopenharmony_ci		}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci		pmds[i] = pmd;
25562306a36Sopenharmony_ci	}
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	if (failed) {
25862306a36Sopenharmony_ci		free_pmds(mm, pmds, count);
25962306a36Sopenharmony_ci		return -ENOMEM;
26062306a36Sopenharmony_ci	}
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	return 0;
26362306a36Sopenharmony_ci}
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci/*
26662306a36Sopenharmony_ci * Mop up any pmd pages which may still be attached to the pgd.
26762306a36Sopenharmony_ci * Normally they will be freed by munmap/exit_mmap, but any pmd we
26862306a36Sopenharmony_ci * preallocate which never got a corresponding vma will need to be
26962306a36Sopenharmony_ci * freed manually.
27062306a36Sopenharmony_ci */
27162306a36Sopenharmony_cistatic void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	pgd_t pgd = *pgdp;
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	if (pgd_val(pgd) != 0) {
27662306a36Sopenharmony_ci		pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci		pgd_clear(pgdp);
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci		paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
28162306a36Sopenharmony_ci		pmd_free(mm, pmd);
28262306a36Sopenharmony_ci		mm_dec_nr_pmds(mm);
28362306a36Sopenharmony_ci	}
28462306a36Sopenharmony_ci}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_cistatic void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	int i;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	for (i = 0; i < PREALLOCATED_PMDS; i++)
29162306a36Sopenharmony_ci		mop_up_one_pmd(mm, &pgdp[i]);
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci#ifdef CONFIG_PAGE_TABLE_ISOLATION
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	if (!boot_cpu_has(X86_FEATURE_PTI))
29662306a36Sopenharmony_ci		return;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	pgdp = kernel_to_user_pgdp(pgdp);
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	for (i = 0; i < PREALLOCATED_USER_PMDS; i++)
30162306a36Sopenharmony_ci		mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]);
30262306a36Sopenharmony_ci#endif
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_cistatic void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	p4d_t *p4d;
30862306a36Sopenharmony_ci	pud_t *pud;
30962306a36Sopenharmony_ci	int i;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	p4d = p4d_offset(pgd, 0);
31262306a36Sopenharmony_ci	pud = pud_offset(p4d, 0);
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
31562306a36Sopenharmony_ci		pmd_t *pmd = pmds[i];
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci		if (i >= KERNEL_PGD_BOUNDARY)
31862306a36Sopenharmony_ci			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
31962306a36Sopenharmony_ci			       sizeof(pmd_t) * PTRS_PER_PMD);
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci		pud_populate(mm, pud, pmd);
32262306a36Sopenharmony_ci	}
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_TABLE_ISOLATION
32662306a36Sopenharmony_cistatic void pgd_prepopulate_user_pmd(struct mm_struct *mm,
32762306a36Sopenharmony_ci				     pgd_t *k_pgd, pmd_t *pmds[])
32862306a36Sopenharmony_ci{
32962306a36Sopenharmony_ci	pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir);
33062306a36Sopenharmony_ci	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
33162306a36Sopenharmony_ci	p4d_t *u_p4d;
33262306a36Sopenharmony_ci	pud_t *u_pud;
33362306a36Sopenharmony_ci	int i;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	u_p4d = p4d_offset(u_pgd, 0);
33662306a36Sopenharmony_ci	u_pud = pud_offset(u_p4d, 0);
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	s_pgd += KERNEL_PGD_BOUNDARY;
33962306a36Sopenharmony_ci	u_pud += KERNEL_PGD_BOUNDARY;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) {
34262306a36Sopenharmony_ci		pmd_t *pmd = pmds[i];
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci		memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd),
34562306a36Sopenharmony_ci		       sizeof(pmd_t) * PTRS_PER_PMD);
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci		pud_populate(mm, u_pud, pmd);
34862306a36Sopenharmony_ci	}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci}
35162306a36Sopenharmony_ci#else
35262306a36Sopenharmony_cistatic void pgd_prepopulate_user_pmd(struct mm_struct *mm,
35362306a36Sopenharmony_ci				     pgd_t *k_pgd, pmd_t *pmds[])
35462306a36Sopenharmony_ci{
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci#endif
35762306a36Sopenharmony_ci/*
35862306a36Sopenharmony_ci * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
35962306a36Sopenharmony_ci * assumes that pgd should be in one page.
36062306a36Sopenharmony_ci *
36162306a36Sopenharmony_ci * But kernel with PAE paging that is not running as a Xen domain
36262306a36Sopenharmony_ci * only needs to allocate 32 bytes for pgd instead of one page.
36362306a36Sopenharmony_ci */
36462306a36Sopenharmony_ci#ifdef CONFIG_X86_PAE
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci#include <linux/slab.h>
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
36962306a36Sopenharmony_ci#define PGD_ALIGN	32
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_cistatic struct kmem_cache *pgd_cache;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_civoid __init pgtable_cache_init(void)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	/*
37662306a36Sopenharmony_ci	 * When PAE kernel is running as a Xen domain, it does not use
37762306a36Sopenharmony_ci	 * shared kernel pmd. And this requires a whole page for pgd.
37862306a36Sopenharmony_ci	 */
37962306a36Sopenharmony_ci	if (!SHARED_KERNEL_PMD)
38062306a36Sopenharmony_ci		return;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	/*
38362306a36Sopenharmony_ci	 * when PAE kernel is not running as a Xen domain, it uses
38462306a36Sopenharmony_ci	 * shared kernel pmd. Shared kernel pmd does not require a whole
38562306a36Sopenharmony_ci	 * page for pgd. We are able to just allocate a 32-byte for pgd.
38662306a36Sopenharmony_ci	 * During boot time, we create a 32-byte slab for pgd table allocation.
38762306a36Sopenharmony_ci	 */
38862306a36Sopenharmony_ci	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
38962306a36Sopenharmony_ci				      SLAB_PANIC, NULL);
39062306a36Sopenharmony_ci}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_cistatic inline pgd_t *_pgd_alloc(void)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	/*
39562306a36Sopenharmony_ci	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
39662306a36Sopenharmony_ci	 * We allocate one page for pgd.
39762306a36Sopenharmony_ci	 */
39862306a36Sopenharmony_ci	if (!SHARED_KERNEL_PMD)
39962306a36Sopenharmony_ci		return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
40062306a36Sopenharmony_ci						 PGD_ALLOCATION_ORDER);
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	/*
40362306a36Sopenharmony_ci	 * Now PAE kernel is not running as a Xen domain. We can allocate
40462306a36Sopenharmony_ci	 * a 32-byte slab for pgd to save memory space.
40562306a36Sopenharmony_ci	 */
40662306a36Sopenharmony_ci	return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
40762306a36Sopenharmony_ci}
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_cistatic inline void _pgd_free(pgd_t *pgd)
41062306a36Sopenharmony_ci{
41162306a36Sopenharmony_ci	if (!SHARED_KERNEL_PMD)
41262306a36Sopenharmony_ci		free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
41362306a36Sopenharmony_ci	else
41462306a36Sopenharmony_ci		kmem_cache_free(pgd_cache, pgd);
41562306a36Sopenharmony_ci}
41662306a36Sopenharmony_ci#else
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cistatic inline pgd_t *_pgd_alloc(void)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
42162306a36Sopenharmony_ci					 PGD_ALLOCATION_ORDER);
42262306a36Sopenharmony_ci}
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_cistatic inline void _pgd_free(pgd_t *pgd)
42562306a36Sopenharmony_ci{
42662306a36Sopenharmony_ci	free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
42762306a36Sopenharmony_ci}
42862306a36Sopenharmony_ci#endif /* CONFIG_X86_PAE */
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_cipgd_t *pgd_alloc(struct mm_struct *mm)
43162306a36Sopenharmony_ci{
43262306a36Sopenharmony_ci	pgd_t *pgd;
43362306a36Sopenharmony_ci	pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
43462306a36Sopenharmony_ci	pmd_t *pmds[MAX_PREALLOCATED_PMDS];
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	pgd = _pgd_alloc();
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	if (pgd == NULL)
43962306a36Sopenharmony_ci		goto out;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	mm->pgd = pgd;
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	if (sizeof(pmds) != 0 &&
44462306a36Sopenharmony_ci			preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
44562306a36Sopenharmony_ci		goto out_free_pgd;
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	if (sizeof(u_pmds) != 0 &&
44862306a36Sopenharmony_ci			preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
44962306a36Sopenharmony_ci		goto out_free_pmds;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	if (paravirt_pgd_alloc(mm) != 0)
45262306a36Sopenharmony_ci		goto out_free_user_pmds;
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	/*
45562306a36Sopenharmony_ci	 * Make sure that pre-populating the pmds is atomic with
45662306a36Sopenharmony_ci	 * respect to anything walking the pgd_list, so that they
45762306a36Sopenharmony_ci	 * never see a partially populated pgd.
45862306a36Sopenharmony_ci	 */
45962306a36Sopenharmony_ci	spin_lock(&pgd_lock);
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	pgd_ctor(mm, pgd);
46262306a36Sopenharmony_ci	if (sizeof(pmds) != 0)
46362306a36Sopenharmony_ci		pgd_prepopulate_pmd(mm, pgd, pmds);
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	if (sizeof(u_pmds) != 0)
46662306a36Sopenharmony_ci		pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	spin_unlock(&pgd_lock);
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci	return pgd;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ciout_free_user_pmds:
47362306a36Sopenharmony_ci	if (sizeof(u_pmds) != 0)
47462306a36Sopenharmony_ci		free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
47562306a36Sopenharmony_ciout_free_pmds:
47662306a36Sopenharmony_ci	if (sizeof(pmds) != 0)
47762306a36Sopenharmony_ci		free_pmds(mm, pmds, PREALLOCATED_PMDS);
47862306a36Sopenharmony_ciout_free_pgd:
47962306a36Sopenharmony_ci	_pgd_free(pgd);
48062306a36Sopenharmony_ciout:
48162306a36Sopenharmony_ci	return NULL;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_civoid pgd_free(struct mm_struct *mm, pgd_t *pgd)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	pgd_mop_up_pmds(mm, pgd);
48762306a36Sopenharmony_ci	pgd_dtor(pgd);
48862306a36Sopenharmony_ci	paravirt_pgd_free(mm, pgd);
48962306a36Sopenharmony_ci	_pgd_free(pgd);
49062306a36Sopenharmony_ci}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci/*
49362306a36Sopenharmony_ci * Used to set accessed or dirty bits in the page table entries
49462306a36Sopenharmony_ci * on other architectures. On x86, the accessed and dirty bits
49562306a36Sopenharmony_ci * are tracked by hardware. However, do_wp_page calls this function
49662306a36Sopenharmony_ci * to also make the pte writeable at the same time the dirty bit is
49762306a36Sopenharmony_ci * set. In that case we do actually need to write the PTE.
49862306a36Sopenharmony_ci */
49962306a36Sopenharmony_ciint ptep_set_access_flags(struct vm_area_struct *vma,
50062306a36Sopenharmony_ci			  unsigned long address, pte_t *ptep,
50162306a36Sopenharmony_ci			  pte_t entry, int dirty)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	int changed = !pte_same(*ptep, entry);
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	if (changed && dirty)
50662306a36Sopenharmony_ci		set_pte(ptep, entry);
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	return changed;
50962306a36Sopenharmony_ci}
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
51262306a36Sopenharmony_ciint pmdp_set_access_flags(struct vm_area_struct *vma,
51362306a36Sopenharmony_ci			  unsigned long address, pmd_t *pmdp,
51462306a36Sopenharmony_ci			  pmd_t entry, int dirty)
51562306a36Sopenharmony_ci{
51662306a36Sopenharmony_ci	int changed = !pmd_same(*pmdp, entry);
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	if (changed && dirty) {
52162306a36Sopenharmony_ci		set_pmd(pmdp, entry);
52262306a36Sopenharmony_ci		/*
52362306a36Sopenharmony_ci		 * We had a write-protection fault here and changed the pmd
52462306a36Sopenharmony_ci		 * to to more permissive. No need to flush the TLB for that,
52562306a36Sopenharmony_ci		 * #PF is architecturally guaranteed to do that and in the
52662306a36Sopenharmony_ci		 * worst-case we'll generate a spurious fault.
52762306a36Sopenharmony_ci		 */
52862306a36Sopenharmony_ci	}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci	return changed;
53162306a36Sopenharmony_ci}
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ciint pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
53462306a36Sopenharmony_ci			  pud_t *pudp, pud_t entry, int dirty)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	int changed = !pud_same(*pudp, entry);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	VM_BUG_ON(address & ~HPAGE_PUD_MASK);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (changed && dirty) {
54162306a36Sopenharmony_ci		set_pud(pudp, entry);
54262306a36Sopenharmony_ci		/*
54362306a36Sopenharmony_ci		 * We had a write-protection fault here and changed the pud
54462306a36Sopenharmony_ci		 * to to more permissive. No need to flush the TLB for that,
54562306a36Sopenharmony_ci		 * #PF is architecturally guaranteed to do that and in the
54662306a36Sopenharmony_ci		 * worst-case we'll generate a spurious fault.
54762306a36Sopenharmony_ci		 */
54862306a36Sopenharmony_ci	}
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	return changed;
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci#endif
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ciint ptep_test_and_clear_young(struct vm_area_struct *vma,
55562306a36Sopenharmony_ci			      unsigned long addr, pte_t *ptep)
55662306a36Sopenharmony_ci{
55762306a36Sopenharmony_ci	int ret = 0;
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	if (pte_young(*ptep))
56062306a36Sopenharmony_ci		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
56162306a36Sopenharmony_ci					 (unsigned long *) &ptep->pte);
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	return ret;
56462306a36Sopenharmony_ci}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
56762306a36Sopenharmony_ciint pmdp_test_and_clear_young(struct vm_area_struct *vma,
56862306a36Sopenharmony_ci			      unsigned long addr, pmd_t *pmdp)
56962306a36Sopenharmony_ci{
57062306a36Sopenharmony_ci	int ret = 0;
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	if (pmd_young(*pmdp))
57362306a36Sopenharmony_ci		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
57462306a36Sopenharmony_ci					 (unsigned long *)pmdp);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	return ret;
57762306a36Sopenharmony_ci}
57862306a36Sopenharmony_ci#endif
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
58162306a36Sopenharmony_ciint pudp_test_and_clear_young(struct vm_area_struct *vma,
58262306a36Sopenharmony_ci			      unsigned long addr, pud_t *pudp)
58362306a36Sopenharmony_ci{
58462306a36Sopenharmony_ci	int ret = 0;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	if (pud_young(*pudp))
58762306a36Sopenharmony_ci		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
58862306a36Sopenharmony_ci					 (unsigned long *)pudp);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	return ret;
59162306a36Sopenharmony_ci}
59262306a36Sopenharmony_ci#endif
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ciint ptep_clear_flush_young(struct vm_area_struct *vma,
59562306a36Sopenharmony_ci			   unsigned long address, pte_t *ptep)
59662306a36Sopenharmony_ci{
59762306a36Sopenharmony_ci	/*
59862306a36Sopenharmony_ci	 * On x86 CPUs, clearing the accessed bit without a TLB flush
59962306a36Sopenharmony_ci	 * doesn't cause data corruption. [ It could cause incorrect
60062306a36Sopenharmony_ci	 * page aging and the (mistaken) reclaim of hot pages, but the
60162306a36Sopenharmony_ci	 * chance of that should be relatively low. ]
60262306a36Sopenharmony_ci	 *
60362306a36Sopenharmony_ci	 * So as a performance optimization don't flush the TLB when
60462306a36Sopenharmony_ci	 * clearing the accessed bit, it will eventually be flushed by
60562306a36Sopenharmony_ci	 * a context switch or a VM operation anyway. [ In the rare
60662306a36Sopenharmony_ci	 * event of it not getting flushed for a long time the delay
60762306a36Sopenharmony_ci	 * shouldn't really matter because there's no real memory
60862306a36Sopenharmony_ci	 * pressure for swapout to react to. ]
60962306a36Sopenharmony_ci	 */
61062306a36Sopenharmony_ci	return ptep_test_and_clear_young(vma, address, ptep);
61162306a36Sopenharmony_ci}
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
61462306a36Sopenharmony_ciint pmdp_clear_flush_young(struct vm_area_struct *vma,
61562306a36Sopenharmony_ci			   unsigned long address, pmd_t *pmdp)
61662306a36Sopenharmony_ci{
61762306a36Sopenharmony_ci	int young;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	young = pmdp_test_and_clear_young(vma, address, pmdp);
62262306a36Sopenharmony_ci	if (young)
62362306a36Sopenharmony_ci		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	return young;
62662306a36Sopenharmony_ci}
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_cipmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
62962306a36Sopenharmony_ci			 pmd_t *pmdp)
63062306a36Sopenharmony_ci{
63162306a36Sopenharmony_ci	/*
63262306a36Sopenharmony_ci	 * No flush is necessary. Once an invalid PTE is established, the PTE's
63362306a36Sopenharmony_ci	 * access and dirty bits cannot be updated.
63462306a36Sopenharmony_ci	 */
63562306a36Sopenharmony_ci	return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp));
63662306a36Sopenharmony_ci}
63762306a36Sopenharmony_ci#endif
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci/**
64062306a36Sopenharmony_ci * reserve_top_address - reserves a hole in the top of kernel address space
64162306a36Sopenharmony_ci * @reserve - size of hole to reserve
64262306a36Sopenharmony_ci *
64362306a36Sopenharmony_ci * Can be used to relocate the fixmap area and poke a hole in the top
64462306a36Sopenharmony_ci * of kernel address space to make room for a hypervisor.
64562306a36Sopenharmony_ci */
64662306a36Sopenharmony_civoid __init reserve_top_address(unsigned long reserve)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci#ifdef CONFIG_X86_32
64962306a36Sopenharmony_ci	BUG_ON(fixmaps_set > 0);
65062306a36Sopenharmony_ci	__FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
65162306a36Sopenharmony_ci	printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
65262306a36Sopenharmony_ci	       -reserve, __FIXADDR_TOP + PAGE_SIZE);
65362306a36Sopenharmony_ci#endif
65462306a36Sopenharmony_ci}
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ciint fixmaps_set;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_civoid __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
65962306a36Sopenharmony_ci{
66062306a36Sopenharmony_ci	unsigned long address = __fix_to_virt(idx);
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci#ifdef CONFIG_X86_64
66362306a36Sopenharmony_ci       /*
66462306a36Sopenharmony_ci	* Ensure that the static initial page tables are covering the
66562306a36Sopenharmony_ci	* fixmap completely.
66662306a36Sopenharmony_ci	*/
66762306a36Sopenharmony_ci	BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
66862306a36Sopenharmony_ci		     (FIXMAP_PMD_NUM * PTRS_PER_PTE));
66962306a36Sopenharmony_ci#endif
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	if (idx >= __end_of_fixed_addresses) {
67262306a36Sopenharmony_ci		BUG();
67362306a36Sopenharmony_ci		return;
67462306a36Sopenharmony_ci	}
67562306a36Sopenharmony_ci	set_pte_vaddr(address, pte);
67662306a36Sopenharmony_ci	fixmaps_set++;
67762306a36Sopenharmony_ci}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_civoid native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
68062306a36Sopenharmony_ci		       phys_addr_t phys, pgprot_t flags)
68162306a36Sopenharmony_ci{
68262306a36Sopenharmony_ci	/* Sanitize 'prot' against any unsupported bits: */
68362306a36Sopenharmony_ci	pgprot_val(flags) &= __default_kernel_pte_mask;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
68662306a36Sopenharmony_ci}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
68962306a36Sopenharmony_ci#ifdef CONFIG_X86_5LEVEL
69062306a36Sopenharmony_ci/**
69162306a36Sopenharmony_ci * p4d_set_huge - setup kernel P4D mapping
69262306a36Sopenharmony_ci *
69362306a36Sopenharmony_ci * No 512GB pages yet -- always return 0
69462306a36Sopenharmony_ci */
69562306a36Sopenharmony_ciint p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
69662306a36Sopenharmony_ci{
69762306a36Sopenharmony_ci	return 0;
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci/**
70162306a36Sopenharmony_ci * p4d_clear_huge - clear kernel P4D mapping when it is set
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci * No 512GB pages yet -- always return 0
70462306a36Sopenharmony_ci */
70562306a36Sopenharmony_civoid p4d_clear_huge(p4d_t *p4d)
70662306a36Sopenharmony_ci{
70762306a36Sopenharmony_ci}
70862306a36Sopenharmony_ci#endif
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci/**
71162306a36Sopenharmony_ci * pud_set_huge - setup kernel PUD mapping
71262306a36Sopenharmony_ci *
71362306a36Sopenharmony_ci * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
71462306a36Sopenharmony_ci * function sets up a huge page only if the complete range has the same MTRR
71562306a36Sopenharmony_ci * caching mode.
71662306a36Sopenharmony_ci *
71762306a36Sopenharmony_ci * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
71862306a36Sopenharmony_ci * page mapping attempt fails.
71962306a36Sopenharmony_ci *
72062306a36Sopenharmony_ci * Returns 1 on success and 0 on failure.
72162306a36Sopenharmony_ci */
72262306a36Sopenharmony_ciint pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
72362306a36Sopenharmony_ci{
72462306a36Sopenharmony_ci	u8 uniform;
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
72762306a36Sopenharmony_ci	if (!uniform)
72862306a36Sopenharmony_ci		return 0;
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	/* Bail out if we are we on a populated non-leaf entry: */
73162306a36Sopenharmony_ci	if (pud_present(*pud) && !pud_huge(*pud))
73262306a36Sopenharmony_ci		return 0;
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	set_pte((pte_t *)pud, pfn_pte(
73562306a36Sopenharmony_ci		(u64)addr >> PAGE_SHIFT,
73662306a36Sopenharmony_ci		__pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE)));
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	return 1;
73962306a36Sopenharmony_ci}
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci/**
74262306a36Sopenharmony_ci * pmd_set_huge - setup kernel PMD mapping
74362306a36Sopenharmony_ci *
74462306a36Sopenharmony_ci * See text over pud_set_huge() above.
74562306a36Sopenharmony_ci *
74662306a36Sopenharmony_ci * Returns 1 on success and 0 on failure.
74762306a36Sopenharmony_ci */
74862306a36Sopenharmony_ciint pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
74962306a36Sopenharmony_ci{
75062306a36Sopenharmony_ci	u8 uniform;
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci	mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
75362306a36Sopenharmony_ci	if (!uniform) {
75462306a36Sopenharmony_ci		pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
75562306a36Sopenharmony_ci			     __func__, addr, addr + PMD_SIZE);
75662306a36Sopenharmony_ci		return 0;
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	/* Bail out if we are we on a populated non-leaf entry: */
76062306a36Sopenharmony_ci	if (pmd_present(*pmd) && !pmd_huge(*pmd))
76162306a36Sopenharmony_ci		return 0;
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	set_pte((pte_t *)pmd, pfn_pte(
76462306a36Sopenharmony_ci		(u64)addr >> PAGE_SHIFT,
76562306a36Sopenharmony_ci		__pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE)));
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	return 1;
76862306a36Sopenharmony_ci}
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci/**
77162306a36Sopenharmony_ci * pud_clear_huge - clear kernel PUD mapping when it is set
77262306a36Sopenharmony_ci *
77362306a36Sopenharmony_ci * Returns 1 on success and 0 on failure (no PUD map is found).
77462306a36Sopenharmony_ci */
77562306a36Sopenharmony_ciint pud_clear_huge(pud_t *pud)
77662306a36Sopenharmony_ci{
77762306a36Sopenharmony_ci	if (pud_large(*pud)) {
77862306a36Sopenharmony_ci		pud_clear(pud);
77962306a36Sopenharmony_ci		return 1;
78062306a36Sopenharmony_ci	}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	return 0;
78362306a36Sopenharmony_ci}
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci/**
78662306a36Sopenharmony_ci * pmd_clear_huge - clear kernel PMD mapping when it is set
78762306a36Sopenharmony_ci *
78862306a36Sopenharmony_ci * Returns 1 on success and 0 on failure (no PMD map is found).
78962306a36Sopenharmony_ci */
79062306a36Sopenharmony_ciint pmd_clear_huge(pmd_t *pmd)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	if (pmd_large(*pmd)) {
79362306a36Sopenharmony_ci		pmd_clear(pmd);
79462306a36Sopenharmony_ci		return 1;
79562306a36Sopenharmony_ci	}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	return 0;
79862306a36Sopenharmony_ci}
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci#ifdef CONFIG_X86_64
80162306a36Sopenharmony_ci/**
80262306a36Sopenharmony_ci * pud_free_pmd_page - Clear pud entry and free pmd page.
80362306a36Sopenharmony_ci * @pud: Pointer to a PUD.
80462306a36Sopenharmony_ci * @addr: Virtual address associated with pud.
80562306a36Sopenharmony_ci *
80662306a36Sopenharmony_ci * Context: The pud range has been unmapped and TLB purged.
80762306a36Sopenharmony_ci * Return: 1 if clearing the entry succeeded. 0 otherwise.
80862306a36Sopenharmony_ci *
80962306a36Sopenharmony_ci * NOTE: Callers must allow a single page allocation.
81062306a36Sopenharmony_ci */
81162306a36Sopenharmony_ciint pud_free_pmd_page(pud_t *pud, unsigned long addr)
81262306a36Sopenharmony_ci{
81362306a36Sopenharmony_ci	pmd_t *pmd, *pmd_sv;
81462306a36Sopenharmony_ci	pte_t *pte;
81562306a36Sopenharmony_ci	int i;
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	pmd = pud_pgtable(*pud);
81862306a36Sopenharmony_ci	pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
81962306a36Sopenharmony_ci	if (!pmd_sv)
82062306a36Sopenharmony_ci		return 0;
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	for (i = 0; i < PTRS_PER_PMD; i++) {
82362306a36Sopenharmony_ci		pmd_sv[i] = pmd[i];
82462306a36Sopenharmony_ci		if (!pmd_none(pmd[i]))
82562306a36Sopenharmony_ci			pmd_clear(&pmd[i]);
82662306a36Sopenharmony_ci	}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	pud_clear(pud);
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	/* INVLPG to clear all paging-structure caches */
83162306a36Sopenharmony_ci	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci	for (i = 0; i < PTRS_PER_PMD; i++) {
83462306a36Sopenharmony_ci		if (!pmd_none(pmd_sv[i])) {
83562306a36Sopenharmony_ci			pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
83662306a36Sopenharmony_ci			free_page((unsigned long)pte);
83762306a36Sopenharmony_ci		}
83862306a36Sopenharmony_ci	}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	free_page((unsigned long)pmd_sv);
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	pagetable_pmd_dtor(virt_to_ptdesc(pmd));
84362306a36Sopenharmony_ci	free_page((unsigned long)pmd);
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	return 1;
84662306a36Sopenharmony_ci}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci/**
84962306a36Sopenharmony_ci * pmd_free_pte_page - Clear pmd entry and free pte page.
85062306a36Sopenharmony_ci * @pmd: Pointer to a PMD.
85162306a36Sopenharmony_ci * @addr: Virtual address associated with pmd.
85262306a36Sopenharmony_ci *
85362306a36Sopenharmony_ci * Context: The pmd range has been unmapped and TLB purged.
85462306a36Sopenharmony_ci * Return: 1 if clearing the entry succeeded. 0 otherwise.
85562306a36Sopenharmony_ci */
85662306a36Sopenharmony_ciint pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
85762306a36Sopenharmony_ci{
85862306a36Sopenharmony_ci	pte_t *pte;
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	pte = (pte_t *)pmd_page_vaddr(*pmd);
86162306a36Sopenharmony_ci	pmd_clear(pmd);
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	/* INVLPG to clear all paging-structure caches */
86462306a36Sopenharmony_ci	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	free_page((unsigned long)pte);
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci	return 1;
86962306a36Sopenharmony_ci}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci#else /* !CONFIG_X86_64 */
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci/*
87462306a36Sopenharmony_ci * Disable free page handling on x86-PAE. This assures that ioremap()
87562306a36Sopenharmony_ci * does not update sync'd pmd entries. See vmalloc_sync_one().
87662306a36Sopenharmony_ci */
87762306a36Sopenharmony_ciint pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
87862306a36Sopenharmony_ci{
87962306a36Sopenharmony_ci	return pmd_none(*pmd);
88062306a36Sopenharmony_ci}
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci#endif /* CONFIG_X86_64 */
88362306a36Sopenharmony_ci#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_cipte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
88662306a36Sopenharmony_ci{
88762306a36Sopenharmony_ci	if (vma->vm_flags & VM_SHADOW_STACK)
88862306a36Sopenharmony_ci		return pte_mkwrite_shstk(pte);
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	pte = pte_mkwrite_novma(pte);
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci	return pte_clear_saveddirty(pte);
89362306a36Sopenharmony_ci}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_cipmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
89662306a36Sopenharmony_ci{
89762306a36Sopenharmony_ci	if (vma->vm_flags & VM_SHADOW_STACK)
89862306a36Sopenharmony_ci		return pmd_mkwrite_shstk(pmd);
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	pmd = pmd_mkwrite_novma(pmd);
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	return pmd_clear_saveddirty(pmd);
90362306a36Sopenharmony_ci}
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_civoid arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
90662306a36Sopenharmony_ci{
90762306a36Sopenharmony_ci	/*
90862306a36Sopenharmony_ci	 * Hardware before shadow stack can (rarely) set Dirty=1
90962306a36Sopenharmony_ci	 * on a Write=0 PTE. So the below condition
91062306a36Sopenharmony_ci	 * only indicates a software bug when shadow stack is
91162306a36Sopenharmony_ci	 * supported by the HW. This checking is covered in
91262306a36Sopenharmony_ci	 * pte_shstk().
91362306a36Sopenharmony_ci	 */
91462306a36Sopenharmony_ci	VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
91562306a36Sopenharmony_ci			pte_shstk(pte));
91662306a36Sopenharmony_ci}
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_civoid arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
91962306a36Sopenharmony_ci{
92062306a36Sopenharmony_ci	/* See note in arch_check_zapped_pte() */
92162306a36Sopenharmony_ci	VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
92262306a36Sopenharmony_ci			pmd_shstk(pmd));
92362306a36Sopenharmony_ci}
924