162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/mm.h> 362306a36Sopenharmony_ci#include <linux/gfp.h> 462306a36Sopenharmony_ci#include <linux/hugetlb.h> 562306a36Sopenharmony_ci#include <asm/pgalloc.h> 662306a36Sopenharmony_ci#include <asm/tlb.h> 762306a36Sopenharmony_ci#include <asm/fixmap.h> 862306a36Sopenharmony_ci#include <asm/mtrr.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK 1162306a36Sopenharmony_ciphys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; 1262306a36Sopenharmony_ciEXPORT_SYMBOL(physical_mask); 1362306a36Sopenharmony_ci#endif 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#ifdef CONFIG_HIGHPTE 1662306a36Sopenharmony_ci#define PGTABLE_HIGHMEM __GFP_HIGHMEM 1762306a36Sopenharmony_ci#else 1862306a36Sopenharmony_ci#define PGTABLE_HIGHMEM 0 1962306a36Sopenharmony_ci#endif 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#ifndef CONFIG_PARAVIRT 2262306a36Sopenharmony_cistatic inline 2362306a36Sopenharmony_civoid paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci tlb_remove_page(tlb, table); 2662306a36Sopenharmony_ci} 2762306a36Sopenharmony_ci#endif 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cigfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_cipgtable_t pte_alloc_one(struct mm_struct *mm) 3262306a36Sopenharmony_ci{ 3362306a36Sopenharmony_ci return __pte_alloc_one(mm, __userpte_alloc_gfp); 3462306a36Sopenharmony_ci} 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_cistatic int __init setup_userpte(char *arg) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci if (!arg) 3962306a36Sopenharmony_ci return -EINVAL; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci /* 4262306a36Sopenharmony_ci * "userpte=nohigh" disables allocation of user pagetables in 4362306a36Sopenharmony_ci * high memory. 4462306a36Sopenharmony_ci */ 4562306a36Sopenharmony_ci if (strcmp(arg, "nohigh") == 0) 4662306a36Sopenharmony_ci __userpte_alloc_gfp &= ~__GFP_HIGHMEM; 4762306a36Sopenharmony_ci else 4862306a36Sopenharmony_ci return -EINVAL; 4962306a36Sopenharmony_ci return 0; 5062306a36Sopenharmony_ci} 5162306a36Sopenharmony_ciearly_param("userpte", setup_userpte); 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_civoid ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci pagetable_pte_dtor(page_ptdesc(pte)); 5662306a36Sopenharmony_ci paravirt_release_pte(page_to_pfn(pte)); 5762306a36Sopenharmony_ci paravirt_tlb_remove_table(tlb, pte); 5862306a36Sopenharmony_ci} 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS > 2 6162306a36Sopenharmony_civoid ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci struct ptdesc *ptdesc = virt_to_ptdesc(pmd); 6462306a36Sopenharmony_ci paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); 6562306a36Sopenharmony_ci /* 6662306a36Sopenharmony_ci * NOTE! For PAE, any changes to the top page-directory-pointer-table 6762306a36Sopenharmony_ci * entries need a full cr3 reload to flush. 6862306a36Sopenharmony_ci */ 6962306a36Sopenharmony_ci#ifdef CONFIG_X86_PAE 7062306a36Sopenharmony_ci tlb->need_flush_all = 1; 7162306a36Sopenharmony_ci#endif 7262306a36Sopenharmony_ci pagetable_pmd_dtor(ptdesc); 7362306a36Sopenharmony_ci paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc)); 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS > 3 7762306a36Sopenharmony_civoid ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); 8062306a36Sopenharmony_ci paravirt_tlb_remove_table(tlb, virt_to_page(pud)); 8162306a36Sopenharmony_ci} 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci#if CONFIG_PGTABLE_LEVELS > 4 8462306a36Sopenharmony_civoid ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); 8762306a36Sopenharmony_ci paravirt_tlb_remove_table(tlb, virt_to_page(p4d)); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci#endif /* CONFIG_PGTABLE_LEVELS > 4 */ 9062306a36Sopenharmony_ci#endif /* CONFIG_PGTABLE_LEVELS > 3 */ 9162306a36Sopenharmony_ci#endif /* CONFIG_PGTABLE_LEVELS > 2 */ 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_cistatic inline void pgd_list_add(pgd_t *pgd) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci struct ptdesc *ptdesc = virt_to_ptdesc(pgd); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci list_add(&ptdesc->pt_list, &pgd_list); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic inline void pgd_list_del(pgd_t *pgd) 10162306a36Sopenharmony_ci{ 10262306a36Sopenharmony_ci struct ptdesc *ptdesc = virt_to_ptdesc(pgd); 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci list_del(&ptdesc->pt_list); 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci#define UNSHARED_PTRS_PER_PGD \ 10862306a36Sopenharmony_ci (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) 10962306a36Sopenharmony_ci#define MAX_UNSHARED_PTRS_PER_PGD \ 11062306a36Sopenharmony_ci max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_cistatic void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci virt_to_ptdesc(pgd)->pt_mm = mm; 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistruct mm_struct *pgd_page_get_mm(struct page *page) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci return page_ptdesc(page)->pt_mm; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci /* If the pgd points to a shared pagetable level (either the 12662306a36Sopenharmony_ci ptes in non-PAE, or shared PMD in PAE), then just copy the 12762306a36Sopenharmony_ci references from swapper_pg_dir. */ 12862306a36Sopenharmony_ci if (CONFIG_PGTABLE_LEVELS == 2 || 12962306a36Sopenharmony_ci (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || 13062306a36Sopenharmony_ci CONFIG_PGTABLE_LEVELS >= 4) { 13162306a36Sopenharmony_ci clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, 13262306a36Sopenharmony_ci swapper_pg_dir + KERNEL_PGD_BOUNDARY, 13362306a36Sopenharmony_ci KERNEL_PGD_PTRS); 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci /* list required to sync kernel mapping updates */ 13762306a36Sopenharmony_ci if (!SHARED_KERNEL_PMD) { 13862306a36Sopenharmony_ci pgd_set_mm(pgd, mm); 13962306a36Sopenharmony_ci pgd_list_add(pgd); 14062306a36Sopenharmony_ci } 14162306a36Sopenharmony_ci} 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_cistatic void pgd_dtor(pgd_t *pgd) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci if (SHARED_KERNEL_PMD) 14662306a36Sopenharmony_ci return; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci spin_lock(&pgd_lock); 14962306a36Sopenharmony_ci pgd_list_del(pgd); 15062306a36Sopenharmony_ci spin_unlock(&pgd_lock); 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci/* 15462306a36Sopenharmony_ci * List of all pgd's needed for non-PAE so it can invalidate entries 15562306a36Sopenharmony_ci * in both cached and uncached pgd's; not needed for PAE since the 15662306a36Sopenharmony_ci * kernel pmd is shared. If PAE were not to share the pmd a similar 15762306a36Sopenharmony_ci * tactic would be needed. This is essentially codepath-based locking 15862306a36Sopenharmony_ci * against pageattr.c; it is the unique case in which a valid change 15962306a36Sopenharmony_ci * of kernel pagetables can't be lazily synchronized by vmalloc faults. 16062306a36Sopenharmony_ci * vmalloc faults work because attached pagetables are never freed. 16162306a36Sopenharmony_ci * -- nyc 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci#ifdef CONFIG_X86_PAE 16562306a36Sopenharmony_ci/* 16662306a36Sopenharmony_ci * In PAE mode, we need to do a cr3 reload (=tlb flush) when 16762306a36Sopenharmony_ci * updating the top-level pagetable entries to guarantee the 16862306a36Sopenharmony_ci * processor notices the update. Since this is expensive, and 16962306a36Sopenharmony_ci * all 4 top-level entries are used almost immediately in a 17062306a36Sopenharmony_ci * new process's life, we just pre-populate them here. 17162306a36Sopenharmony_ci * 17262306a36Sopenharmony_ci * Also, if we're in a paravirt environment where the kernel pmd is 17362306a36Sopenharmony_ci * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate 17462306a36Sopenharmony_ci * and initialize the kernel pmds here. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD 17762306a36Sopenharmony_ci#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci/* 18062306a36Sopenharmony_ci * We allocate separate PMDs for the kernel part of the user page-table 18162306a36Sopenharmony_ci * when PTI is enabled. We need them to map the per-process LDT into the 18262306a36Sopenharmony_ci * user-space page-table. 18362306a36Sopenharmony_ci */ 18462306a36Sopenharmony_ci#define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \ 18562306a36Sopenharmony_ci KERNEL_PGD_PTRS : 0) 18662306a36Sopenharmony_ci#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_civoid pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) 18962306a36Sopenharmony_ci{ 19062306a36Sopenharmony_ci paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci /* Note: almost everything apart from _PAGE_PRESENT is 19362306a36Sopenharmony_ci reserved at the pmd (PDPT) level. */ 19462306a36Sopenharmony_ci set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* 19762306a36Sopenharmony_ci * According to Intel App note "TLBs, Paging-Structure Caches, 19862306a36Sopenharmony_ci * and Their Invalidation", April 2007, document 317080-001, 19962306a36Sopenharmony_ci * section 8.1: in PAE mode we explicitly have to flush the 20062306a36Sopenharmony_ci * TLB via cr3 if the top-level pgd is changed... 20162306a36Sopenharmony_ci */ 20262306a36Sopenharmony_ci flush_tlb_mm(mm); 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci#else /* !CONFIG_X86_PAE */ 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci/* No need to prepopulate any pagetable entries in non-PAE modes. */ 20762306a36Sopenharmony_ci#define PREALLOCATED_PMDS 0 20862306a36Sopenharmony_ci#define MAX_PREALLOCATED_PMDS 0 20962306a36Sopenharmony_ci#define PREALLOCATED_USER_PMDS 0 21062306a36Sopenharmony_ci#define MAX_PREALLOCATED_USER_PMDS 0 21162306a36Sopenharmony_ci#endif /* CONFIG_X86_PAE */ 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_cistatic void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci int i; 21662306a36Sopenharmony_ci struct ptdesc *ptdesc; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci for (i = 0; i < count; i++) 21962306a36Sopenharmony_ci if (pmds[i]) { 22062306a36Sopenharmony_ci ptdesc = virt_to_ptdesc(pmds[i]); 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci pagetable_pmd_dtor(ptdesc); 22362306a36Sopenharmony_ci pagetable_free(ptdesc); 22462306a36Sopenharmony_ci mm_dec_nr_pmds(mm); 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci} 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_cistatic int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) 22962306a36Sopenharmony_ci{ 23062306a36Sopenharmony_ci int i; 23162306a36Sopenharmony_ci bool failed = false; 23262306a36Sopenharmony_ci gfp_t gfp = GFP_PGTABLE_USER; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci if (mm == &init_mm) 23562306a36Sopenharmony_ci gfp &= ~__GFP_ACCOUNT; 23662306a36Sopenharmony_ci gfp &= ~__GFP_HIGHMEM; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci for (i = 0; i < count; i++) { 23962306a36Sopenharmony_ci pmd_t *pmd = NULL; 24062306a36Sopenharmony_ci struct ptdesc *ptdesc = pagetable_alloc(gfp, 0); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci if (!ptdesc) 24362306a36Sopenharmony_ci failed = true; 24462306a36Sopenharmony_ci if (ptdesc && !pagetable_pmd_ctor(ptdesc)) { 24562306a36Sopenharmony_ci pagetable_free(ptdesc); 24662306a36Sopenharmony_ci ptdesc = NULL; 24762306a36Sopenharmony_ci failed = true; 24862306a36Sopenharmony_ci } 24962306a36Sopenharmony_ci if (ptdesc) { 25062306a36Sopenharmony_ci mm_inc_nr_pmds(mm); 25162306a36Sopenharmony_ci pmd = ptdesc_address(ptdesc); 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci pmds[i] = pmd; 25562306a36Sopenharmony_ci } 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci if (failed) { 25862306a36Sopenharmony_ci free_pmds(mm, pmds, count); 25962306a36Sopenharmony_ci return -ENOMEM; 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci return 0; 26362306a36Sopenharmony_ci} 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci/* 26662306a36Sopenharmony_ci * Mop up any pmd pages which may still be attached to the pgd. 26762306a36Sopenharmony_ci * Normally they will be freed by munmap/exit_mmap, but any pmd we 26862306a36Sopenharmony_ci * preallocate which never got a corresponding vma will need to be 26962306a36Sopenharmony_ci * freed manually. 27062306a36Sopenharmony_ci */ 27162306a36Sopenharmony_cistatic void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci pgd_t pgd = *pgdp; 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci if (pgd_val(pgd) != 0) { 27662306a36Sopenharmony_ci pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci pgd_clear(pgdp); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); 28162306a36Sopenharmony_ci pmd_free(mm, pmd); 28262306a36Sopenharmony_ci mm_dec_nr_pmds(mm); 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci} 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_cistatic void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) 28762306a36Sopenharmony_ci{ 28862306a36Sopenharmony_ci int i; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci for (i = 0; i < PREALLOCATED_PMDS; i++) 29162306a36Sopenharmony_ci mop_up_one_pmd(mm, &pgdp[i]); 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci#ifdef CONFIG_PAGE_TABLE_ISOLATION 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_PTI)) 29662306a36Sopenharmony_ci return; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci pgdp = kernel_to_user_pgdp(pgdp); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci for (i = 0; i < PREALLOCATED_USER_PMDS; i++) 30162306a36Sopenharmony_ci mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]); 30262306a36Sopenharmony_ci#endif 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_cistatic void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci p4d_t *p4d; 30862306a36Sopenharmony_ci pud_t *pud; 30962306a36Sopenharmony_ci int i; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci p4d = p4d_offset(pgd, 0); 31262306a36Sopenharmony_ci pud = pud_offset(p4d, 0); 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) { 31562306a36Sopenharmony_ci pmd_t *pmd = pmds[i]; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci if (i >= KERNEL_PGD_BOUNDARY) 31862306a36Sopenharmony_ci memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), 31962306a36Sopenharmony_ci sizeof(pmd_t) * PTRS_PER_PMD); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci pud_populate(mm, pud, pmd); 32262306a36Sopenharmony_ci } 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci#ifdef CONFIG_PAGE_TABLE_ISOLATION 32662306a36Sopenharmony_cistatic void pgd_prepopulate_user_pmd(struct mm_struct *mm, 32762306a36Sopenharmony_ci pgd_t *k_pgd, pmd_t *pmds[]) 32862306a36Sopenharmony_ci{ 32962306a36Sopenharmony_ci pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir); 33062306a36Sopenharmony_ci pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); 33162306a36Sopenharmony_ci p4d_t *u_p4d; 33262306a36Sopenharmony_ci pud_t *u_pud; 33362306a36Sopenharmony_ci int i; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci u_p4d = p4d_offset(u_pgd, 0); 33662306a36Sopenharmony_ci u_pud = pud_offset(u_p4d, 0); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci s_pgd += KERNEL_PGD_BOUNDARY; 33962306a36Sopenharmony_ci u_pud += KERNEL_PGD_BOUNDARY; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) { 34262306a36Sopenharmony_ci pmd_t *pmd = pmds[i]; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd), 34562306a36Sopenharmony_ci sizeof(pmd_t) * PTRS_PER_PMD); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci pud_populate(mm, u_pud, pmd); 34862306a36Sopenharmony_ci } 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci} 35162306a36Sopenharmony_ci#else 35262306a36Sopenharmony_cistatic void pgd_prepopulate_user_pmd(struct mm_struct *mm, 35362306a36Sopenharmony_ci pgd_t *k_pgd, pmd_t *pmds[]) 35462306a36Sopenharmony_ci{ 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci#endif 35762306a36Sopenharmony_ci/* 35862306a36Sopenharmony_ci * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also 35962306a36Sopenharmony_ci * assumes that pgd should be in one page. 36062306a36Sopenharmony_ci * 36162306a36Sopenharmony_ci * But kernel with PAE paging that is not running as a Xen domain 36262306a36Sopenharmony_ci * only needs to allocate 32 bytes for pgd instead of one page. 36362306a36Sopenharmony_ci */ 36462306a36Sopenharmony_ci#ifdef CONFIG_X86_PAE 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci#include <linux/slab.h> 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) 36962306a36Sopenharmony_ci#define PGD_ALIGN 32 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_cistatic struct kmem_cache *pgd_cache; 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_civoid __init pgtable_cache_init(void) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci /* 37662306a36Sopenharmony_ci * When PAE kernel is running as a Xen domain, it does not use 37762306a36Sopenharmony_ci * shared kernel pmd. And this requires a whole page for pgd. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci if (!SHARED_KERNEL_PMD) 38062306a36Sopenharmony_ci return; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci /* 38362306a36Sopenharmony_ci * when PAE kernel is not running as a Xen domain, it uses 38462306a36Sopenharmony_ci * shared kernel pmd. Shared kernel pmd does not require a whole 38562306a36Sopenharmony_ci * page for pgd. We are able to just allocate a 32-byte for pgd. 38662306a36Sopenharmony_ci * During boot time, we create a 32-byte slab for pgd table allocation. 38762306a36Sopenharmony_ci */ 38862306a36Sopenharmony_ci pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, 38962306a36Sopenharmony_ci SLAB_PANIC, NULL); 39062306a36Sopenharmony_ci} 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_cistatic inline pgd_t *_pgd_alloc(void) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci /* 39562306a36Sopenharmony_ci * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. 39662306a36Sopenharmony_ci * We allocate one page for pgd. 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_ci if (!SHARED_KERNEL_PMD) 39962306a36Sopenharmony_ci return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, 40062306a36Sopenharmony_ci PGD_ALLOCATION_ORDER); 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci /* 40362306a36Sopenharmony_ci * Now PAE kernel is not running as a Xen domain. We can allocate 40462306a36Sopenharmony_ci * a 32-byte slab for pgd to save memory space. 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_ci return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_cistatic inline void _pgd_free(pgd_t *pgd) 41062306a36Sopenharmony_ci{ 41162306a36Sopenharmony_ci if (!SHARED_KERNEL_PMD) 41262306a36Sopenharmony_ci free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); 41362306a36Sopenharmony_ci else 41462306a36Sopenharmony_ci kmem_cache_free(pgd_cache, pgd); 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci#else 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic inline pgd_t *_pgd_alloc(void) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, 42162306a36Sopenharmony_ci PGD_ALLOCATION_ORDER); 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_cistatic inline void _pgd_free(pgd_t *pgd) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); 42762306a36Sopenharmony_ci} 42862306a36Sopenharmony_ci#endif /* CONFIG_X86_PAE */ 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_cipgd_t *pgd_alloc(struct mm_struct *mm) 43162306a36Sopenharmony_ci{ 43262306a36Sopenharmony_ci pgd_t *pgd; 43362306a36Sopenharmony_ci pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; 43462306a36Sopenharmony_ci pmd_t *pmds[MAX_PREALLOCATED_PMDS]; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci pgd = _pgd_alloc(); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci if (pgd == NULL) 43962306a36Sopenharmony_ci goto out; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci mm->pgd = pgd; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci if (sizeof(pmds) != 0 && 44462306a36Sopenharmony_ci preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) 44562306a36Sopenharmony_ci goto out_free_pgd; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci if (sizeof(u_pmds) != 0 && 44862306a36Sopenharmony_ci preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) 44962306a36Sopenharmony_ci goto out_free_pmds; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci if (paravirt_pgd_alloc(mm) != 0) 45262306a36Sopenharmony_ci goto out_free_user_pmds; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci /* 45562306a36Sopenharmony_ci * Make sure that pre-populating the pmds is atomic with 45662306a36Sopenharmony_ci * respect to anything walking the pgd_list, so that they 45762306a36Sopenharmony_ci * never see a partially populated pgd. 45862306a36Sopenharmony_ci */ 45962306a36Sopenharmony_ci spin_lock(&pgd_lock); 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci pgd_ctor(mm, pgd); 46262306a36Sopenharmony_ci if (sizeof(pmds) != 0) 46362306a36Sopenharmony_ci pgd_prepopulate_pmd(mm, pgd, pmds); 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci if (sizeof(u_pmds) != 0) 46662306a36Sopenharmony_ci pgd_prepopulate_user_pmd(mm, pgd, u_pmds); 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci spin_unlock(&pgd_lock); 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci return pgd; 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ciout_free_user_pmds: 47362306a36Sopenharmony_ci if (sizeof(u_pmds) != 0) 47462306a36Sopenharmony_ci free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); 47562306a36Sopenharmony_ciout_free_pmds: 47662306a36Sopenharmony_ci if (sizeof(pmds) != 0) 47762306a36Sopenharmony_ci free_pmds(mm, pmds, PREALLOCATED_PMDS); 47862306a36Sopenharmony_ciout_free_pgd: 47962306a36Sopenharmony_ci _pgd_free(pgd); 48062306a36Sopenharmony_ciout: 48162306a36Sopenharmony_ci return NULL; 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_civoid pgd_free(struct mm_struct *mm, pgd_t *pgd) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci pgd_mop_up_pmds(mm, pgd); 48762306a36Sopenharmony_ci pgd_dtor(pgd); 48862306a36Sopenharmony_ci paravirt_pgd_free(mm, pgd); 48962306a36Sopenharmony_ci _pgd_free(pgd); 49062306a36Sopenharmony_ci} 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci/* 49362306a36Sopenharmony_ci * Used to set accessed or dirty bits in the page table entries 49462306a36Sopenharmony_ci * on other architectures. On x86, the accessed and dirty bits 49562306a36Sopenharmony_ci * are tracked by hardware. However, do_wp_page calls this function 49662306a36Sopenharmony_ci * to also make the pte writeable at the same time the dirty bit is 49762306a36Sopenharmony_ci * set. In that case we do actually need to write the PTE. 49862306a36Sopenharmony_ci */ 49962306a36Sopenharmony_ciint ptep_set_access_flags(struct vm_area_struct *vma, 50062306a36Sopenharmony_ci unsigned long address, pte_t *ptep, 50162306a36Sopenharmony_ci pte_t entry, int dirty) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci int changed = !pte_same(*ptep, entry); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci if (changed && dirty) 50662306a36Sopenharmony_ci set_pte(ptep, entry); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci return changed; 50962306a36Sopenharmony_ci} 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 51262306a36Sopenharmony_ciint pmdp_set_access_flags(struct vm_area_struct *vma, 51362306a36Sopenharmony_ci unsigned long address, pmd_t *pmdp, 51462306a36Sopenharmony_ci pmd_t entry, int dirty) 51562306a36Sopenharmony_ci{ 51662306a36Sopenharmony_ci int changed = !pmd_same(*pmdp, entry); 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci VM_BUG_ON(address & ~HPAGE_PMD_MASK); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci if (changed && dirty) { 52162306a36Sopenharmony_ci set_pmd(pmdp, entry); 52262306a36Sopenharmony_ci /* 52362306a36Sopenharmony_ci * We had a write-protection fault here and changed the pmd 52462306a36Sopenharmony_ci * to to more permissive. No need to flush the TLB for that, 52562306a36Sopenharmony_ci * #PF is architecturally guaranteed to do that and in the 52662306a36Sopenharmony_ci * worst-case we'll generate a spurious fault. 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ci } 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci return changed; 53162306a36Sopenharmony_ci} 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ciint pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, 53462306a36Sopenharmony_ci pud_t *pudp, pud_t entry, int dirty) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci int changed = !pud_same(*pudp, entry); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci VM_BUG_ON(address & ~HPAGE_PUD_MASK); 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci if (changed && dirty) { 54162306a36Sopenharmony_ci set_pud(pudp, entry); 54262306a36Sopenharmony_ci /* 54362306a36Sopenharmony_ci * We had a write-protection fault here and changed the pud 54462306a36Sopenharmony_ci * to to more permissive. No need to flush the TLB for that, 54562306a36Sopenharmony_ci * #PF is architecturally guaranteed to do that and in the 54662306a36Sopenharmony_ci * worst-case we'll generate a spurious fault. 54762306a36Sopenharmony_ci */ 54862306a36Sopenharmony_ci } 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci return changed; 55162306a36Sopenharmony_ci} 55262306a36Sopenharmony_ci#endif 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ciint ptep_test_and_clear_young(struct vm_area_struct *vma, 55562306a36Sopenharmony_ci unsigned long addr, pte_t *ptep) 55662306a36Sopenharmony_ci{ 55762306a36Sopenharmony_ci int ret = 0; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci if (pte_young(*ptep)) 56062306a36Sopenharmony_ci ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, 56162306a36Sopenharmony_ci (unsigned long *) &ptep->pte); 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci return ret; 56462306a36Sopenharmony_ci} 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) 56762306a36Sopenharmony_ciint pmdp_test_and_clear_young(struct vm_area_struct *vma, 56862306a36Sopenharmony_ci unsigned long addr, pmd_t *pmdp) 56962306a36Sopenharmony_ci{ 57062306a36Sopenharmony_ci int ret = 0; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (pmd_young(*pmdp)) 57362306a36Sopenharmony_ci ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, 57462306a36Sopenharmony_ci (unsigned long *)pmdp); 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci return ret; 57762306a36Sopenharmony_ci} 57862306a36Sopenharmony_ci#endif 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 58162306a36Sopenharmony_ciint pudp_test_and_clear_young(struct vm_area_struct *vma, 58262306a36Sopenharmony_ci unsigned long addr, pud_t *pudp) 58362306a36Sopenharmony_ci{ 58462306a36Sopenharmony_ci int ret = 0; 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci if (pud_young(*pudp)) 58762306a36Sopenharmony_ci ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, 58862306a36Sopenharmony_ci (unsigned long *)pudp); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci return ret; 59162306a36Sopenharmony_ci} 59262306a36Sopenharmony_ci#endif 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ciint ptep_clear_flush_young(struct vm_area_struct *vma, 59562306a36Sopenharmony_ci unsigned long address, pte_t *ptep) 59662306a36Sopenharmony_ci{ 59762306a36Sopenharmony_ci /* 59862306a36Sopenharmony_ci * On x86 CPUs, clearing the accessed bit without a TLB flush 59962306a36Sopenharmony_ci * doesn't cause data corruption. [ It could cause incorrect 60062306a36Sopenharmony_ci * page aging and the (mistaken) reclaim of hot pages, but the 60162306a36Sopenharmony_ci * chance of that should be relatively low. ] 60262306a36Sopenharmony_ci * 60362306a36Sopenharmony_ci * So as a performance optimization don't flush the TLB when 60462306a36Sopenharmony_ci * clearing the accessed bit, it will eventually be flushed by 60562306a36Sopenharmony_ci * a context switch or a VM operation anyway. [ In the rare 60662306a36Sopenharmony_ci * event of it not getting flushed for a long time the delay 60762306a36Sopenharmony_ci * shouldn't really matter because there's no real memory 60862306a36Sopenharmony_ci * pressure for swapout to react to. ] 60962306a36Sopenharmony_ci */ 61062306a36Sopenharmony_ci return ptep_test_and_clear_young(vma, address, ptep); 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 61462306a36Sopenharmony_ciint pmdp_clear_flush_young(struct vm_area_struct *vma, 61562306a36Sopenharmony_ci unsigned long address, pmd_t *pmdp) 61662306a36Sopenharmony_ci{ 61762306a36Sopenharmony_ci int young; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci VM_BUG_ON(address & ~HPAGE_PMD_MASK); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci young = pmdp_test_and_clear_young(vma, address, pmdp); 62262306a36Sopenharmony_ci if (young) 62362306a36Sopenharmony_ci flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci return young; 62662306a36Sopenharmony_ci} 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_cipmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, 62962306a36Sopenharmony_ci pmd_t *pmdp) 63062306a36Sopenharmony_ci{ 63162306a36Sopenharmony_ci /* 63262306a36Sopenharmony_ci * No flush is necessary. Once an invalid PTE is established, the PTE's 63362306a36Sopenharmony_ci * access and dirty bits cannot be updated. 63462306a36Sopenharmony_ci */ 63562306a36Sopenharmony_ci return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci#endif 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci/** 64062306a36Sopenharmony_ci * reserve_top_address - reserves a hole in the top of kernel address space 64162306a36Sopenharmony_ci * @reserve - size of hole to reserve 64262306a36Sopenharmony_ci * 64362306a36Sopenharmony_ci * Can be used to relocate the fixmap area and poke a hole in the top 64462306a36Sopenharmony_ci * of kernel address space to make room for a hypervisor. 64562306a36Sopenharmony_ci */ 64662306a36Sopenharmony_civoid __init reserve_top_address(unsigned long reserve) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci#ifdef CONFIG_X86_32 64962306a36Sopenharmony_ci BUG_ON(fixmaps_set > 0); 65062306a36Sopenharmony_ci __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE; 65162306a36Sopenharmony_ci printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n", 65262306a36Sopenharmony_ci -reserve, __FIXADDR_TOP + PAGE_SIZE); 65362306a36Sopenharmony_ci#endif 65462306a36Sopenharmony_ci} 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ciint fixmaps_set; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_civoid __native_set_fixmap(enum fixed_addresses idx, pte_t pte) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci unsigned long address = __fix_to_virt(idx); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci#ifdef CONFIG_X86_64 66362306a36Sopenharmony_ci /* 66462306a36Sopenharmony_ci * Ensure that the static initial page tables are covering the 66562306a36Sopenharmony_ci * fixmap completely. 66662306a36Sopenharmony_ci */ 66762306a36Sopenharmony_ci BUILD_BUG_ON(__end_of_permanent_fixed_addresses > 66862306a36Sopenharmony_ci (FIXMAP_PMD_NUM * PTRS_PER_PTE)); 66962306a36Sopenharmony_ci#endif 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci if (idx >= __end_of_fixed_addresses) { 67262306a36Sopenharmony_ci BUG(); 67362306a36Sopenharmony_ci return; 67462306a36Sopenharmony_ci } 67562306a36Sopenharmony_ci set_pte_vaddr(address, pte); 67662306a36Sopenharmony_ci fixmaps_set++; 67762306a36Sopenharmony_ci} 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_civoid native_set_fixmap(unsigned /* enum fixed_addresses */ idx, 68062306a36Sopenharmony_ci phys_addr_t phys, pgprot_t flags) 68162306a36Sopenharmony_ci{ 68262306a36Sopenharmony_ci /* Sanitize 'prot' against any unsupported bits: */ 68362306a36Sopenharmony_ci pgprot_val(flags) &= __default_kernel_pte_mask; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); 68662306a36Sopenharmony_ci} 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 68962306a36Sopenharmony_ci#ifdef CONFIG_X86_5LEVEL 69062306a36Sopenharmony_ci/** 69162306a36Sopenharmony_ci * p4d_set_huge - setup kernel P4D mapping 69262306a36Sopenharmony_ci * 69362306a36Sopenharmony_ci * No 512GB pages yet -- always return 0 69462306a36Sopenharmony_ci */ 69562306a36Sopenharmony_ciint p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 69662306a36Sopenharmony_ci{ 69762306a36Sopenharmony_ci return 0; 69862306a36Sopenharmony_ci} 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci/** 70162306a36Sopenharmony_ci * p4d_clear_huge - clear kernel P4D mapping when it is set 70262306a36Sopenharmony_ci * 70362306a36Sopenharmony_ci * No 512GB pages yet -- always return 0 70462306a36Sopenharmony_ci */ 70562306a36Sopenharmony_civoid p4d_clear_huge(p4d_t *p4d) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci} 70862306a36Sopenharmony_ci#endif 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci/** 71162306a36Sopenharmony_ci * pud_set_huge - setup kernel PUD mapping 71262306a36Sopenharmony_ci * 71362306a36Sopenharmony_ci * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this 71462306a36Sopenharmony_ci * function sets up a huge page only if the complete range has the same MTRR 71562306a36Sopenharmony_ci * caching mode. 71662306a36Sopenharmony_ci * 71762306a36Sopenharmony_ci * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger 71862306a36Sopenharmony_ci * page mapping attempt fails. 71962306a36Sopenharmony_ci * 72062306a36Sopenharmony_ci * Returns 1 on success and 0 on failure. 72162306a36Sopenharmony_ci */ 72262306a36Sopenharmony_ciint pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) 72362306a36Sopenharmony_ci{ 72462306a36Sopenharmony_ci u8 uniform; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); 72762306a36Sopenharmony_ci if (!uniform) 72862306a36Sopenharmony_ci return 0; 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci /* Bail out if we are we on a populated non-leaf entry: */ 73162306a36Sopenharmony_ci if (pud_present(*pud) && !pud_huge(*pud)) 73262306a36Sopenharmony_ci return 0; 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci set_pte((pte_t *)pud, pfn_pte( 73562306a36Sopenharmony_ci (u64)addr >> PAGE_SHIFT, 73662306a36Sopenharmony_ci __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci return 1; 73962306a36Sopenharmony_ci} 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci/** 74262306a36Sopenharmony_ci * pmd_set_huge - setup kernel PMD mapping 74362306a36Sopenharmony_ci * 74462306a36Sopenharmony_ci * See text over pud_set_huge() above. 74562306a36Sopenharmony_ci * 74662306a36Sopenharmony_ci * Returns 1 on success and 0 on failure. 74762306a36Sopenharmony_ci */ 74862306a36Sopenharmony_ciint pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) 74962306a36Sopenharmony_ci{ 75062306a36Sopenharmony_ci u8 uniform; 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); 75362306a36Sopenharmony_ci if (!uniform) { 75462306a36Sopenharmony_ci pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", 75562306a36Sopenharmony_ci __func__, addr, addr + PMD_SIZE); 75662306a36Sopenharmony_ci return 0; 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci /* Bail out if we are we on a populated non-leaf entry: */ 76062306a36Sopenharmony_ci if (pmd_present(*pmd) && !pmd_huge(*pmd)) 76162306a36Sopenharmony_ci return 0; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci set_pte((pte_t *)pmd, pfn_pte( 76462306a36Sopenharmony_ci (u64)addr >> PAGE_SHIFT, 76562306a36Sopenharmony_ci __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci return 1; 76862306a36Sopenharmony_ci} 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci/** 77162306a36Sopenharmony_ci * pud_clear_huge - clear kernel PUD mapping when it is set 77262306a36Sopenharmony_ci * 77362306a36Sopenharmony_ci * Returns 1 on success and 0 on failure (no PUD map is found). 77462306a36Sopenharmony_ci */ 77562306a36Sopenharmony_ciint pud_clear_huge(pud_t *pud) 77662306a36Sopenharmony_ci{ 77762306a36Sopenharmony_ci if (pud_large(*pud)) { 77862306a36Sopenharmony_ci pud_clear(pud); 77962306a36Sopenharmony_ci return 1; 78062306a36Sopenharmony_ci } 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci return 0; 78362306a36Sopenharmony_ci} 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci/** 78662306a36Sopenharmony_ci * pmd_clear_huge - clear kernel PMD mapping when it is set 78762306a36Sopenharmony_ci * 78862306a36Sopenharmony_ci * Returns 1 on success and 0 on failure (no PMD map is found). 78962306a36Sopenharmony_ci */ 79062306a36Sopenharmony_ciint pmd_clear_huge(pmd_t *pmd) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci if (pmd_large(*pmd)) { 79362306a36Sopenharmony_ci pmd_clear(pmd); 79462306a36Sopenharmony_ci return 1; 79562306a36Sopenharmony_ci } 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci return 0; 79862306a36Sopenharmony_ci} 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci#ifdef CONFIG_X86_64 80162306a36Sopenharmony_ci/** 80262306a36Sopenharmony_ci * pud_free_pmd_page - Clear pud entry and free pmd page. 80362306a36Sopenharmony_ci * @pud: Pointer to a PUD. 80462306a36Sopenharmony_ci * @addr: Virtual address associated with pud. 80562306a36Sopenharmony_ci * 80662306a36Sopenharmony_ci * Context: The pud range has been unmapped and TLB purged. 80762306a36Sopenharmony_ci * Return: 1 if clearing the entry succeeded. 0 otherwise. 80862306a36Sopenharmony_ci * 80962306a36Sopenharmony_ci * NOTE: Callers must allow a single page allocation. 81062306a36Sopenharmony_ci */ 81162306a36Sopenharmony_ciint pud_free_pmd_page(pud_t *pud, unsigned long addr) 81262306a36Sopenharmony_ci{ 81362306a36Sopenharmony_ci pmd_t *pmd, *pmd_sv; 81462306a36Sopenharmony_ci pte_t *pte; 81562306a36Sopenharmony_ci int i; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci pmd = pud_pgtable(*pud); 81862306a36Sopenharmony_ci pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); 81962306a36Sopenharmony_ci if (!pmd_sv) 82062306a36Sopenharmony_ci return 0; 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; i++) { 82362306a36Sopenharmony_ci pmd_sv[i] = pmd[i]; 82462306a36Sopenharmony_ci if (!pmd_none(pmd[i])) 82562306a36Sopenharmony_ci pmd_clear(&pmd[i]); 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci pud_clear(pud); 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci /* INVLPG to clear all paging-structure caches */ 83162306a36Sopenharmony_ci flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; i++) { 83462306a36Sopenharmony_ci if (!pmd_none(pmd_sv[i])) { 83562306a36Sopenharmony_ci pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); 83662306a36Sopenharmony_ci free_page((unsigned long)pte); 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci } 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci free_page((unsigned long)pmd_sv); 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci pagetable_pmd_dtor(virt_to_ptdesc(pmd)); 84362306a36Sopenharmony_ci free_page((unsigned long)pmd); 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci return 1; 84662306a36Sopenharmony_ci} 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci/** 84962306a36Sopenharmony_ci * pmd_free_pte_page - Clear pmd entry and free pte page. 85062306a36Sopenharmony_ci * @pmd: Pointer to a PMD. 85162306a36Sopenharmony_ci * @addr: Virtual address associated with pmd. 85262306a36Sopenharmony_ci * 85362306a36Sopenharmony_ci * Context: The pmd range has been unmapped and TLB purged. 85462306a36Sopenharmony_ci * Return: 1 if clearing the entry succeeded. 0 otherwise. 85562306a36Sopenharmony_ci */ 85662306a36Sopenharmony_ciint pmd_free_pte_page(pmd_t *pmd, unsigned long addr) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci pte_t *pte; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci pte = (pte_t *)pmd_page_vaddr(*pmd); 86162306a36Sopenharmony_ci pmd_clear(pmd); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci /* INVLPG to clear all paging-structure caches */ 86462306a36Sopenharmony_ci flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci free_page((unsigned long)pte); 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci return 1; 86962306a36Sopenharmony_ci} 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci#else /* !CONFIG_X86_64 */ 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci/* 87462306a36Sopenharmony_ci * Disable free page handling on x86-PAE. This assures that ioremap() 87562306a36Sopenharmony_ci * does not update sync'd pmd entries. See vmalloc_sync_one(). 87662306a36Sopenharmony_ci */ 87762306a36Sopenharmony_ciint pmd_free_pte_page(pmd_t *pmd, unsigned long addr) 87862306a36Sopenharmony_ci{ 87962306a36Sopenharmony_ci return pmd_none(*pmd); 88062306a36Sopenharmony_ci} 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci#endif /* CONFIG_X86_64 */ 88362306a36Sopenharmony_ci#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_cipte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma) 88662306a36Sopenharmony_ci{ 88762306a36Sopenharmony_ci if (vma->vm_flags & VM_SHADOW_STACK) 88862306a36Sopenharmony_ci return pte_mkwrite_shstk(pte); 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci pte = pte_mkwrite_novma(pte); 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci return pte_clear_saveddirty(pte); 89362306a36Sopenharmony_ci} 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_cipmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) 89662306a36Sopenharmony_ci{ 89762306a36Sopenharmony_ci if (vma->vm_flags & VM_SHADOW_STACK) 89862306a36Sopenharmony_ci return pmd_mkwrite_shstk(pmd); 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci pmd = pmd_mkwrite_novma(pmd); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci return pmd_clear_saveddirty(pmd); 90362306a36Sopenharmony_ci} 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_civoid arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) 90662306a36Sopenharmony_ci{ 90762306a36Sopenharmony_ci /* 90862306a36Sopenharmony_ci * Hardware before shadow stack can (rarely) set Dirty=1 90962306a36Sopenharmony_ci * on a Write=0 PTE. So the below condition 91062306a36Sopenharmony_ci * only indicates a software bug when shadow stack is 91162306a36Sopenharmony_ci * supported by the HW. This checking is covered in 91262306a36Sopenharmony_ci * pte_shstk(). 91362306a36Sopenharmony_ci */ 91462306a36Sopenharmony_ci VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && 91562306a36Sopenharmony_ci pte_shstk(pte)); 91662306a36Sopenharmony_ci} 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_civoid arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd) 91962306a36Sopenharmony_ci{ 92062306a36Sopenharmony_ci /* See note in arch_check_zapped_pte() */ 92162306a36Sopenharmony_ci VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && 92262306a36Sopenharmony_ci pmd_shstk(pmd)); 92362306a36Sopenharmony_ci} 924