162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2007-2008 Paul Mackerras, IBM Corp. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/errno.h> 762306a36Sopenharmony_ci#include <linux/kernel.h> 862306a36Sopenharmony_ci#include <linux/gfp.h> 962306a36Sopenharmony_ci#include <linux/types.h> 1062306a36Sopenharmony_ci#include <linux/pagewalk.h> 1162306a36Sopenharmony_ci#include <linux/hugetlb.h> 1262306a36Sopenharmony_ci#include <linux/syscalls.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/pgtable.h> 1562306a36Sopenharmony_ci#include <linux/uaccess.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci/* 1862306a36Sopenharmony_ci * Free all pages allocated for subpage protection maps and pointers. 1962306a36Sopenharmony_ci * Also makes sure that the subpage_prot_table structure is 2062306a36Sopenharmony_ci * reinitialized for the next user. 2162306a36Sopenharmony_ci */ 2262306a36Sopenharmony_civoid subpage_prot_free(struct mm_struct *mm) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context); 2562306a36Sopenharmony_ci unsigned long i, j, addr; 2662306a36Sopenharmony_ci u32 **p; 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci if (!spt) 2962306a36Sopenharmony_ci return; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci for (i = 0; i < 4; ++i) { 3262306a36Sopenharmony_ci if (spt->low_prot[i]) { 3362306a36Sopenharmony_ci free_page((unsigned long)spt->low_prot[i]); 3462306a36Sopenharmony_ci spt->low_prot[i] = NULL; 3562306a36Sopenharmony_ci } 3662306a36Sopenharmony_ci } 3762306a36Sopenharmony_ci addr = 0; 3862306a36Sopenharmony_ci for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) { 3962306a36Sopenharmony_ci p = spt->protptrs[i]; 4062306a36Sopenharmony_ci if (!p) 4162306a36Sopenharmony_ci continue; 4262306a36Sopenharmony_ci spt->protptrs[i] = NULL; 4362306a36Sopenharmony_ci for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr; 4462306a36Sopenharmony_ci ++j, addr += PAGE_SIZE) 4562306a36Sopenharmony_ci if (p[j]) 4662306a36Sopenharmony_ci free_page((unsigned long)p[j]); 4762306a36Sopenharmony_ci free_page((unsigned long)p); 4862306a36Sopenharmony_ci } 4962306a36Sopenharmony_ci spt->maxaddr = 0; 5062306a36Sopenharmony_ci kfree(spt); 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic void hpte_flush_range(struct mm_struct *mm, unsigned long addr, 5462306a36Sopenharmony_ci int npages) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci pgd_t *pgd; 5762306a36Sopenharmony_ci p4d_t *p4d; 5862306a36Sopenharmony_ci pud_t *pud; 5962306a36Sopenharmony_ci pmd_t *pmd; 6062306a36Sopenharmony_ci pte_t *pte; 6162306a36Sopenharmony_ci spinlock_t *ptl; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci pgd = pgd_offset(mm, addr); 6462306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 6562306a36Sopenharmony_ci if (p4d_none(*p4d)) 6662306a36Sopenharmony_ci return; 6762306a36Sopenharmony_ci pud = pud_offset(p4d, addr); 6862306a36Sopenharmony_ci if (pud_none(*pud)) 6962306a36Sopenharmony_ci return; 7062306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 7162306a36Sopenharmony_ci if (pmd_none(*pmd)) 7262306a36Sopenharmony_ci return; 7362306a36Sopenharmony_ci pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 7462306a36Sopenharmony_ci if (!pte) 7562306a36Sopenharmony_ci return; 7662306a36Sopenharmony_ci arch_enter_lazy_mmu_mode(); 7762306a36Sopenharmony_ci for (; npages > 0; --npages) { 7862306a36Sopenharmony_ci pte_update(mm, addr, pte, 0, 0, 0); 7962306a36Sopenharmony_ci addr += PAGE_SIZE; 8062306a36Sopenharmony_ci ++pte; 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci arch_leave_lazy_mmu_mode(); 8362306a36Sopenharmony_ci pte_unmap_unlock(pte - 1, ptl); 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* 8762306a36Sopenharmony_ci * Clear the subpage protection map for an address range, allowing 8862306a36Sopenharmony_ci * all accesses that are allowed by the pte permissions. 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_cistatic void subpage_prot_clear(unsigned long addr, unsigned long len) 9162306a36Sopenharmony_ci{ 9262306a36Sopenharmony_ci struct mm_struct *mm = current->mm; 9362306a36Sopenharmony_ci struct subpage_prot_table *spt; 9462306a36Sopenharmony_ci u32 **spm, *spp; 9562306a36Sopenharmony_ci unsigned long i; 9662306a36Sopenharmony_ci size_t nw; 9762306a36Sopenharmony_ci unsigned long next, limit; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci mmap_write_lock(mm); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci spt = mm_ctx_subpage_prot(&mm->context); 10262306a36Sopenharmony_ci if (!spt) 10362306a36Sopenharmony_ci goto err_out; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci limit = addr + len; 10662306a36Sopenharmony_ci if (limit > spt->maxaddr) 10762306a36Sopenharmony_ci limit = spt->maxaddr; 10862306a36Sopenharmony_ci for (; addr < limit; addr = next) { 10962306a36Sopenharmony_ci next = pmd_addr_end(addr, limit); 11062306a36Sopenharmony_ci if (addr < 0x100000000UL) { 11162306a36Sopenharmony_ci spm = spt->low_prot; 11262306a36Sopenharmony_ci } else { 11362306a36Sopenharmony_ci spm = spt->protptrs[addr >> SBP_L3_SHIFT]; 11462306a36Sopenharmony_ci if (!spm) 11562306a36Sopenharmony_ci continue; 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; 11862306a36Sopenharmony_ci if (!spp) 11962306a36Sopenharmony_ci continue; 12062306a36Sopenharmony_ci spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 12362306a36Sopenharmony_ci nw = PTRS_PER_PTE - i; 12462306a36Sopenharmony_ci if (addr + (nw << PAGE_SHIFT) > next) 12562306a36Sopenharmony_ci nw = (next - addr) >> PAGE_SHIFT; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci memset(spp, 0, nw * sizeof(u32)); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci /* now flush any existing HPTEs for the range */ 13062306a36Sopenharmony_ci hpte_flush_range(mm, addr, nw); 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cierr_out: 13462306a36Sopenharmony_ci mmap_write_unlock(mm); 13562306a36Sopenharmony_ci} 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 13862306a36Sopenharmony_cistatic int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, 13962306a36Sopenharmony_ci unsigned long end, struct mm_walk *walk) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 14262306a36Sopenharmony_ci split_huge_pmd(vma, pmd, addr); 14362306a36Sopenharmony_ci return 0; 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_cistatic const struct mm_walk_ops subpage_walk_ops = { 14762306a36Sopenharmony_ci .pmd_entry = subpage_walk_pmd_entry, 14862306a36Sopenharmony_ci .walk_lock = PGWALK_WRLOCK_VERIFY, 14962306a36Sopenharmony_ci}; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistatic void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, 15262306a36Sopenharmony_ci unsigned long len) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci struct vm_area_struct *vma; 15562306a36Sopenharmony_ci VMA_ITERATOR(vmi, mm, addr); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* 15862306a36Sopenharmony_ci * We don't try too hard, we just mark all the vma in that range 15962306a36Sopenharmony_ci * VM_NOHUGEPAGE and split them. 16062306a36Sopenharmony_ci */ 16162306a36Sopenharmony_ci for_each_vma_range(vmi, vma, addr + len) { 16262306a36Sopenharmony_ci vm_flags_set(vma, VM_NOHUGEPAGE); 16362306a36Sopenharmony_ci walk_page_vma(vma, &subpage_walk_ops, NULL); 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci#else 16762306a36Sopenharmony_cistatic void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, 16862306a36Sopenharmony_ci unsigned long len) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci return; 17162306a36Sopenharmony_ci} 17262306a36Sopenharmony_ci#endif 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci/* 17562306a36Sopenharmony_ci * Copy in a subpage protection map for an address range. 17662306a36Sopenharmony_ci * The map has 2 bits per 4k subpage, so 32 bits per 64k page. 17762306a36Sopenharmony_ci * Each 2-bit field is 0 to allow any access, 1 to prevent writes, 17862306a36Sopenharmony_ci * 2 or 3 to prevent all accesses. 17962306a36Sopenharmony_ci * Note that the normal page protections also apply; the subpage 18062306a36Sopenharmony_ci * protection mechanism is an additional constraint, so putting 0 18162306a36Sopenharmony_ci * in a 2-bit field won't allow writes to a page that is otherwise 18262306a36Sopenharmony_ci * write-protected. 18362306a36Sopenharmony_ci */ 18462306a36Sopenharmony_ciSYSCALL_DEFINE3(subpage_prot, unsigned long, addr, 18562306a36Sopenharmony_ci unsigned long, len, u32 __user *, map) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci struct mm_struct *mm = current->mm; 18862306a36Sopenharmony_ci struct subpage_prot_table *spt; 18962306a36Sopenharmony_ci u32 **spm, *spp; 19062306a36Sopenharmony_ci unsigned long i; 19162306a36Sopenharmony_ci size_t nw; 19262306a36Sopenharmony_ci unsigned long next, limit; 19362306a36Sopenharmony_ci int err; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci if (radix_enabled()) 19662306a36Sopenharmony_ci return -ENOENT; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci /* Check parameters */ 19962306a36Sopenharmony_ci if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) || 20062306a36Sopenharmony_ci addr >= mm->task_size || len >= mm->task_size || 20162306a36Sopenharmony_ci addr + len > mm->task_size) 20262306a36Sopenharmony_ci return -EINVAL; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci if (is_hugepage_only_range(mm, addr, len)) 20562306a36Sopenharmony_ci return -EINVAL; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci if (!map) { 20862306a36Sopenharmony_ci /* Clear out the protection map for the address range */ 20962306a36Sopenharmony_ci subpage_prot_clear(addr, len); 21062306a36Sopenharmony_ci return 0; 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32))) 21462306a36Sopenharmony_ci return -EFAULT; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci mmap_write_lock(mm); 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci spt = mm_ctx_subpage_prot(&mm->context); 21962306a36Sopenharmony_ci if (!spt) { 22062306a36Sopenharmony_ci /* 22162306a36Sopenharmony_ci * Allocate subpage prot table if not already done. 22262306a36Sopenharmony_ci * Do this with mmap_lock held 22362306a36Sopenharmony_ci */ 22462306a36Sopenharmony_ci spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); 22562306a36Sopenharmony_ci if (!spt) { 22662306a36Sopenharmony_ci err = -ENOMEM; 22762306a36Sopenharmony_ci goto out; 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci mm->context.hash_context->spt = spt; 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci subpage_mark_vma_nohuge(mm, addr, len); 23362306a36Sopenharmony_ci for (limit = addr + len; addr < limit; addr = next) { 23462306a36Sopenharmony_ci next = pmd_addr_end(addr, limit); 23562306a36Sopenharmony_ci err = -ENOMEM; 23662306a36Sopenharmony_ci if (addr < 0x100000000UL) { 23762306a36Sopenharmony_ci spm = spt->low_prot; 23862306a36Sopenharmony_ci } else { 23962306a36Sopenharmony_ci spm = spt->protptrs[addr >> SBP_L3_SHIFT]; 24062306a36Sopenharmony_ci if (!spm) { 24162306a36Sopenharmony_ci spm = (u32 **)get_zeroed_page(GFP_KERNEL); 24262306a36Sopenharmony_ci if (!spm) 24362306a36Sopenharmony_ci goto out; 24462306a36Sopenharmony_ci spt->protptrs[addr >> SBP_L3_SHIFT] = spm; 24562306a36Sopenharmony_ci } 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1); 24862306a36Sopenharmony_ci spp = *spm; 24962306a36Sopenharmony_ci if (!spp) { 25062306a36Sopenharmony_ci spp = (u32 *)get_zeroed_page(GFP_KERNEL); 25162306a36Sopenharmony_ci if (!spp) 25262306a36Sopenharmony_ci goto out; 25362306a36Sopenharmony_ci *spm = spp; 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci local_irq_disable(); 25862306a36Sopenharmony_ci demote_segment_4k(mm, addr); 25962306a36Sopenharmony_ci local_irq_enable(); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 26262306a36Sopenharmony_ci nw = PTRS_PER_PTE - i; 26362306a36Sopenharmony_ci if (addr + (nw << PAGE_SHIFT) > next) 26462306a36Sopenharmony_ci nw = (next - addr) >> PAGE_SHIFT; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci mmap_write_unlock(mm); 26762306a36Sopenharmony_ci if (__copy_from_user(spp, map, nw * sizeof(u32))) 26862306a36Sopenharmony_ci return -EFAULT; 26962306a36Sopenharmony_ci map += nw; 27062306a36Sopenharmony_ci mmap_write_lock(mm); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* now flush any existing HPTEs for the range */ 27362306a36Sopenharmony_ci hpte_flush_range(mm, addr, nw); 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci if (limit > spt->maxaddr) 27662306a36Sopenharmony_ci spt->maxaddr = limit; 27762306a36Sopenharmony_ci err = 0; 27862306a36Sopenharmony_ci out: 27962306a36Sopenharmony_ci mmap_write_unlock(mm); 28062306a36Sopenharmony_ci return err; 28162306a36Sopenharmony_ci} 282