162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * This file contains common routines for dealing with free of page tables 462306a36Sopenharmony_ci * Along with common page table handling code 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Derived from arch/powerpc/mm/tlb_64.c: 762306a36Sopenharmony_ci * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 1062306a36Sopenharmony_ci * and Cort Dougan (PReP) (cort@cs.nmt.edu) 1162306a36Sopenharmony_ci * Copyright (C) 1996 Paul Mackerras 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * Derived from "arch/i386/mm/init.c" 1462306a36Sopenharmony_ci * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * Dave Engebretsen <engebret@us.ibm.com> 1762306a36Sopenharmony_ci * Rework for PPC64 port. 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <linux/kernel.h> 2162306a36Sopenharmony_ci#include <linux/gfp.h> 2262306a36Sopenharmony_ci#include <linux/mm.h> 2362306a36Sopenharmony_ci#include <linux/percpu.h> 2462306a36Sopenharmony_ci#include <linux/hardirq.h> 2562306a36Sopenharmony_ci#include <linux/hugetlb.h> 2662306a36Sopenharmony_ci#include <asm/tlbflush.h> 2762306a36Sopenharmony_ci#include <asm/tlb.h> 2862306a36Sopenharmony_ci#include <asm/hugetlb.h> 2962306a36Sopenharmony_ci#include <asm/pte-walk.h> 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#ifdef CONFIG_PPC64 3262306a36Sopenharmony_ci#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD) 3362306a36Sopenharmony_ci#else 3462306a36Sopenharmony_ci#define PGD_ALIGN PAGE_SIZE 3562306a36Sopenharmony_ci#endif 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cipgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN); 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_cistatic inline int is_exec_fault(void) 4062306a36Sopenharmony_ci{ 4162306a36Sopenharmony_ci return current->thread.regs && TRAP(current->thread.regs) == 0x400; 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* We only try to do i/d cache coherency on stuff that looks like 4562306a36Sopenharmony_ci * reasonably "normal" PTEs. We currently require a PTE to be present 4662306a36Sopenharmony_ci * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that 4762306a36Sopenharmony_ci * on userspace PTEs 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_cistatic inline int pte_looks_normal(pte_t pte) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci if (pte_present(pte) && !pte_special(pte)) { 5362306a36Sopenharmony_ci if (pte_ci(pte)) 5462306a36Sopenharmony_ci return 0; 5562306a36Sopenharmony_ci if (pte_user(pte)) 5662306a36Sopenharmony_ci return 1; 5762306a36Sopenharmony_ci } 5862306a36Sopenharmony_ci return 0; 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic struct folio *maybe_pte_to_folio(pte_t pte) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci unsigned long pfn = pte_pfn(pte); 6462306a36Sopenharmony_ci struct page *page; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (unlikely(!pfn_valid(pfn))) 6762306a36Sopenharmony_ci return NULL; 6862306a36Sopenharmony_ci page = pfn_to_page(pfn); 6962306a36Sopenharmony_ci if (PageReserved(page)) 7062306a36Sopenharmony_ci return NULL; 7162306a36Sopenharmony_ci return page_folio(page); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci/* Server-style MMU handles coherency when hashing if HW exec permission 7762306a36Sopenharmony_ci * is supposed per page (currently 64-bit only). If not, then, we always 7862306a36Sopenharmony_ci * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec 7962306a36Sopenharmony_ci * support falls into the same category. 8062306a36Sopenharmony_ci */ 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_cistatic pte_t set_pte_filter_hash(pte_t pte) 8362306a36Sopenharmony_ci{ 8462306a36Sopenharmony_ci pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 8562306a36Sopenharmony_ci if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || 8662306a36Sopenharmony_ci cpu_has_feature(CPU_FTR_NOEXECUTE))) { 8762306a36Sopenharmony_ci struct folio *folio = maybe_pte_to_folio(pte); 8862306a36Sopenharmony_ci if (!folio) 8962306a36Sopenharmony_ci return pte; 9062306a36Sopenharmony_ci if (!test_bit(PG_dcache_clean, &folio->flags)) { 9162306a36Sopenharmony_ci flush_dcache_icache_folio(folio); 9262306a36Sopenharmony_ci set_bit(PG_dcache_clean, &folio->flags); 9362306a36Sopenharmony_ci } 9462306a36Sopenharmony_ci } 9562306a36Sopenharmony_ci return pte; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci#else /* CONFIG_PPC_BOOK3S */ 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic pte_t set_pte_filter_hash(pte_t pte) { return pte; } 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci#endif /* CONFIG_PPC_BOOK3S */ 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci/* Embedded type MMU with HW exec support. This is a bit more complicated 10562306a36Sopenharmony_ci * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so 10662306a36Sopenharmony_ci * instead we "filter out" the exec permission for non clean pages. 10762306a36Sopenharmony_ci * 10862306a36Sopenharmony_ci * This is also called once for the folio. So only work with folio->flags here. 10962306a36Sopenharmony_ci */ 11062306a36Sopenharmony_cistatic inline pte_t set_pte_filter(pte_t pte) 11162306a36Sopenharmony_ci{ 11262306a36Sopenharmony_ci struct folio *folio; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci if (radix_enabled()) 11562306a36Sopenharmony_ci return pte; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 11862306a36Sopenharmony_ci return set_pte_filter_hash(pte); 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci /* No exec permission in the first place, move on */ 12162306a36Sopenharmony_ci if (!pte_exec(pte) || !pte_looks_normal(pte)) 12262306a36Sopenharmony_ci return pte; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* If you set _PAGE_EXEC on weird pages you're on your own */ 12562306a36Sopenharmony_ci folio = maybe_pte_to_folio(pte); 12662306a36Sopenharmony_ci if (unlikely(!folio)) 12762306a36Sopenharmony_ci return pte; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci /* If the page clean, we move on */ 13062306a36Sopenharmony_ci if (test_bit(PG_dcache_clean, &folio->flags)) 13162306a36Sopenharmony_ci return pte; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci /* If it's an exec fault, we flush the cache and make it clean */ 13462306a36Sopenharmony_ci if (is_exec_fault()) { 13562306a36Sopenharmony_ci flush_dcache_icache_folio(folio); 13662306a36Sopenharmony_ci set_bit(PG_dcache_clean, &folio->flags); 13762306a36Sopenharmony_ci return pte; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci /* Else, we filter out _PAGE_EXEC */ 14162306a36Sopenharmony_ci return pte_exprotect(pte); 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_cistatic pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, 14562306a36Sopenharmony_ci int dirty) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci struct folio *folio; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) 15062306a36Sopenharmony_ci return pte; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 15362306a36Sopenharmony_ci return pte; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci /* So here, we only care about exec faults, as we use them 15662306a36Sopenharmony_ci * to recover lost _PAGE_EXEC and perform I$/D$ coherency 15762306a36Sopenharmony_ci * if necessary. Also if _PAGE_EXEC is already set, same deal, 15862306a36Sopenharmony_ci * we just bail out 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_ci if (dirty || pte_exec(pte) || !is_exec_fault()) 16162306a36Sopenharmony_ci return pte; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 16462306a36Sopenharmony_ci /* So this is an exec fault, _PAGE_EXEC is not set. If it was 16562306a36Sopenharmony_ci * an error we would have bailed out earlier in do_page_fault() 16662306a36Sopenharmony_ci * but let's make sure of it 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_ci if (WARN_ON(!(vma->vm_flags & VM_EXEC))) 16962306a36Sopenharmony_ci return pte; 17062306a36Sopenharmony_ci#endif /* CONFIG_DEBUG_VM */ 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* If you set _PAGE_EXEC on weird pages you're on your own */ 17362306a36Sopenharmony_ci folio = maybe_pte_to_folio(pte); 17462306a36Sopenharmony_ci if (unlikely(!folio)) 17562306a36Sopenharmony_ci goto bail; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci /* If the page is already clean, we move on */ 17862306a36Sopenharmony_ci if (test_bit(PG_dcache_clean, &folio->flags)) 17962306a36Sopenharmony_ci goto bail; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci /* Clean the page and set PG_dcache_clean */ 18262306a36Sopenharmony_ci flush_dcache_icache_folio(folio); 18362306a36Sopenharmony_ci set_bit(PG_dcache_clean, &folio->flags); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci bail: 18662306a36Sopenharmony_ci return pte_mkexec(pte); 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci/* 19062306a36Sopenharmony_ci * set_pte stores a linux PTE into the linux page table. 19162306a36Sopenharmony_ci */ 19262306a36Sopenharmony_civoid set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 19362306a36Sopenharmony_ci pte_t pte, unsigned int nr) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* Note: mm->context.id might not yet have been assigned as 19762306a36Sopenharmony_ci * this context might not have been activated yet when this 19862306a36Sopenharmony_ci * is called. Filter the pte value and use the filtered value 19962306a36Sopenharmony_ci * to setup all the ptes in the range. 20062306a36Sopenharmony_ci */ 20162306a36Sopenharmony_ci pte = set_pte_filter(pte); 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci /* 20462306a36Sopenharmony_ci * We don't need to call arch_enter/leave_lazy_mmu_mode() 20562306a36Sopenharmony_ci * because we expect set_ptes to be only be used on not present 20662306a36Sopenharmony_ci * and not hw_valid ptes. Hence there is no translation cache flush 20762306a36Sopenharmony_ci * involved that need to be batched. 20862306a36Sopenharmony_ci */ 20962306a36Sopenharmony_ci for (;;) { 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci /* 21262306a36Sopenharmony_ci * Make sure hardware valid bit is not set. We don't do 21362306a36Sopenharmony_ci * tlb flush for this update. 21462306a36Sopenharmony_ci */ 21562306a36Sopenharmony_ci VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* Perform the setting of the PTE */ 21862306a36Sopenharmony_ci __set_pte_at(mm, addr, ptep, pte, 0); 21962306a36Sopenharmony_ci if (--nr == 0) 22062306a36Sopenharmony_ci break; 22162306a36Sopenharmony_ci ptep++; 22262306a36Sopenharmony_ci addr += PAGE_SIZE; 22362306a36Sopenharmony_ci /* 22462306a36Sopenharmony_ci * increment the pfn. 22562306a36Sopenharmony_ci */ 22662306a36Sopenharmony_ci pte = pfn_pte(pte_pfn(pte) + 1, pte_pgprot((pte))); 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_civoid unmap_kernel_page(unsigned long va) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci pmd_t *pmdp = pmd_off_k(va); 23362306a36Sopenharmony_ci pte_t *ptep = pte_offset_kernel(pmdp, va); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci pte_clear(&init_mm, va, ptep); 23662306a36Sopenharmony_ci flush_tlb_kernel_range(va, va + PAGE_SIZE); 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * This is called when relaxing access to a PTE. It's also called in the page 24162306a36Sopenharmony_ci * fault path when we don't hit any of the major fault cases, ie, a minor 24262306a36Sopenharmony_ci * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have 24362306a36Sopenharmony_ci * handled those two for us, we additionally deal with missing execute 24462306a36Sopenharmony_ci * permission here on some processors 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_ciint ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, 24762306a36Sopenharmony_ci pte_t *ptep, pte_t entry, int dirty) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci int changed; 25062306a36Sopenharmony_ci entry = set_access_flags_filter(entry, vma, dirty); 25162306a36Sopenharmony_ci changed = !pte_same(*(ptep), entry); 25262306a36Sopenharmony_ci if (changed) { 25362306a36Sopenharmony_ci assert_pte_locked(vma->vm_mm, address); 25462306a36Sopenharmony_ci __ptep_set_access_flags(vma, ptep, entry, 25562306a36Sopenharmony_ci address, mmu_virtual_psize); 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci return changed; 25862306a36Sopenharmony_ci} 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE 26162306a36Sopenharmony_ciint huge_ptep_set_access_flags(struct vm_area_struct *vma, 26262306a36Sopenharmony_ci unsigned long addr, pte_t *ptep, 26362306a36Sopenharmony_ci pte_t pte, int dirty) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci#ifdef HUGETLB_NEED_PRELOAD 26662306a36Sopenharmony_ci /* 26762306a36Sopenharmony_ci * The "return 1" forces a call of update_mmu_cache, which will write a 26862306a36Sopenharmony_ci * TLB entry. Without this, platforms that don't do a write of the TLB 26962306a36Sopenharmony_ci * entry in the TLB miss handler asm will fault ad infinitum. 27062306a36Sopenharmony_ci */ 27162306a36Sopenharmony_ci ptep_set_access_flags(vma, addr, ptep, pte, dirty); 27262306a36Sopenharmony_ci return 1; 27362306a36Sopenharmony_ci#else 27462306a36Sopenharmony_ci int changed, psize; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci pte = set_access_flags_filter(pte, vma, dirty); 27762306a36Sopenharmony_ci changed = !pte_same(*(ptep), pte); 27862306a36Sopenharmony_ci if (changed) { 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64 28162306a36Sopenharmony_ci struct hstate *h = hstate_vma(vma); 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci psize = hstate_get_psize(h); 28462306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 28562306a36Sopenharmony_ci assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep)); 28662306a36Sopenharmony_ci#endif 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci#else 28962306a36Sopenharmony_ci /* 29062306a36Sopenharmony_ci * Not used on non book3s64 platforms. 29162306a36Sopenharmony_ci * 8xx compares it with mmu_virtual_psize to 29262306a36Sopenharmony_ci * know if it is a huge page or not. 29362306a36Sopenharmony_ci */ 29462306a36Sopenharmony_ci psize = MMU_PAGE_COUNT; 29562306a36Sopenharmony_ci#endif 29662306a36Sopenharmony_ci __ptep_set_access_flags(vma, ptep, pte, addr, psize); 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci return changed; 29962306a36Sopenharmony_ci#endif 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci#if defined(CONFIG_PPC_8xx) 30362306a36Sopenharmony_civoid set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 30462306a36Sopenharmony_ci pte_t pte, unsigned long sz) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci pmd_t *pmd = pmd_off(mm, addr); 30762306a36Sopenharmony_ci pte_basic_t val; 30862306a36Sopenharmony_ci pte_basic_t *entry = (pte_basic_t *)ptep; 30962306a36Sopenharmony_ci int num, i; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci /* 31262306a36Sopenharmony_ci * Make sure hardware valid bit is not set. We don't do 31362306a36Sopenharmony_ci * tlb flush for this update. 31462306a36Sopenharmony_ci */ 31562306a36Sopenharmony_ci VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci pte = set_pte_filter(pte); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci val = pte_val(pte); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci num = number_of_cells_per_pte(pmd, val, 1); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci for (i = 0; i < num; i++, entry++, val += SZ_4K) 32462306a36Sopenharmony_ci *entry = val; 32562306a36Sopenharmony_ci} 32662306a36Sopenharmony_ci#endif 32762306a36Sopenharmony_ci#endif /* CONFIG_HUGETLB_PAGE */ 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 33062306a36Sopenharmony_civoid assert_pte_locked(struct mm_struct *mm, unsigned long addr) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci pgd_t *pgd; 33362306a36Sopenharmony_ci p4d_t *p4d; 33462306a36Sopenharmony_ci pud_t *pud; 33562306a36Sopenharmony_ci pmd_t *pmd; 33662306a36Sopenharmony_ci pte_t *pte; 33762306a36Sopenharmony_ci spinlock_t *ptl; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci if (mm == &init_mm) 34062306a36Sopenharmony_ci return; 34162306a36Sopenharmony_ci pgd = mm->pgd + pgd_index(addr); 34262306a36Sopenharmony_ci BUG_ON(pgd_none(*pgd)); 34362306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 34462306a36Sopenharmony_ci BUG_ON(p4d_none(*p4d)); 34562306a36Sopenharmony_ci pud = pud_offset(p4d, addr); 34662306a36Sopenharmony_ci BUG_ON(pud_none(*pud)); 34762306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 34862306a36Sopenharmony_ci /* 34962306a36Sopenharmony_ci * khugepaged to collapse normal pages to hugepage, first set 35062306a36Sopenharmony_ci * pmd to none to force page fault/gup to take mmap_lock. After 35162306a36Sopenharmony_ci * pmd is set to none, we do a pte_clear which does this assertion 35262306a36Sopenharmony_ci * so if we find pmd none, return. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci if (pmd_none(*pmd)) 35562306a36Sopenharmony_ci return; 35662306a36Sopenharmony_ci pte = pte_offset_map_nolock(mm, pmd, addr, &ptl); 35762306a36Sopenharmony_ci BUG_ON(!pte); 35862306a36Sopenharmony_ci assert_spin_locked(ptl); 35962306a36Sopenharmony_ci pte_unmap(pte); 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ci#endif /* CONFIG_DEBUG_VM */ 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ciunsigned long vmalloc_to_phys(void *va) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci unsigned long pfn = vmalloc_to_pfn(va); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci BUG_ON(!pfn); 36862306a36Sopenharmony_ci return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vmalloc_to_phys); 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci/* 37362306a36Sopenharmony_ci * We have 4 cases for pgds and pmds: 37462306a36Sopenharmony_ci * (1) invalid (all zeroes) 37562306a36Sopenharmony_ci * (2) pointer to next table, as normal; bottom 6 bits == 0 37662306a36Sopenharmony_ci * (3) leaf pte for huge page _PAGE_PTE set 37762306a36Sopenharmony_ci * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table 37862306a36Sopenharmony_ci * 37962306a36Sopenharmony_ci * So long as we atomically load page table pointers we are safe against teardown, 38062306a36Sopenharmony_ci * we can follow the address down to the page and take a ref on it. 38162306a36Sopenharmony_ci * This function need to be called with interrupts disabled. We use this variant 38262306a36Sopenharmony_ci * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_cipte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, 38562306a36Sopenharmony_ci bool *is_thp, unsigned *hpage_shift) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci pgd_t *pgdp; 38862306a36Sopenharmony_ci p4d_t p4d, *p4dp; 38962306a36Sopenharmony_ci pud_t pud, *pudp; 39062306a36Sopenharmony_ci pmd_t pmd, *pmdp; 39162306a36Sopenharmony_ci pte_t *ret_pte; 39262306a36Sopenharmony_ci hugepd_t *hpdp = NULL; 39362306a36Sopenharmony_ci unsigned pdshift; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci if (hpage_shift) 39662306a36Sopenharmony_ci *hpage_shift = 0; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci if (is_thp) 39962306a36Sopenharmony_ci *is_thp = false; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci /* 40262306a36Sopenharmony_ci * Always operate on the local stack value. This make sure the 40362306a36Sopenharmony_ci * value don't get updated by a parallel THP split/collapse, 40462306a36Sopenharmony_ci * page fault or a page unmap. The return pte_t * is still not 40562306a36Sopenharmony_ci * stable. So should be checked there for above conditions. 40662306a36Sopenharmony_ci * Top level is an exception because it is folded into p4d. 40762306a36Sopenharmony_ci */ 40862306a36Sopenharmony_ci pgdp = pgdir + pgd_index(ea); 40962306a36Sopenharmony_ci p4dp = p4d_offset(pgdp, ea); 41062306a36Sopenharmony_ci p4d = READ_ONCE(*p4dp); 41162306a36Sopenharmony_ci pdshift = P4D_SHIFT; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci if (p4d_none(p4d)) 41462306a36Sopenharmony_ci return NULL; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci if (p4d_is_leaf(p4d)) { 41762306a36Sopenharmony_ci ret_pte = (pte_t *)p4dp; 41862306a36Sopenharmony_ci goto out; 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci if (is_hugepd(__hugepd(p4d_val(p4d)))) { 42262306a36Sopenharmony_ci hpdp = (hugepd_t *)&p4d; 42362306a36Sopenharmony_ci goto out_huge; 42462306a36Sopenharmony_ci } 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci /* 42762306a36Sopenharmony_ci * Even if we end up with an unmap, the pgtable will not 42862306a36Sopenharmony_ci * be freed, because we do an rcu free and here we are 42962306a36Sopenharmony_ci * irq disabled 43062306a36Sopenharmony_ci */ 43162306a36Sopenharmony_ci pdshift = PUD_SHIFT; 43262306a36Sopenharmony_ci pudp = pud_offset(&p4d, ea); 43362306a36Sopenharmony_ci pud = READ_ONCE(*pudp); 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci if (pud_none(pud)) 43662306a36Sopenharmony_ci return NULL; 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci if (pud_is_leaf(pud)) { 43962306a36Sopenharmony_ci ret_pte = (pte_t *)pudp; 44062306a36Sopenharmony_ci goto out; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci if (is_hugepd(__hugepd(pud_val(pud)))) { 44462306a36Sopenharmony_ci hpdp = (hugepd_t *)&pud; 44562306a36Sopenharmony_ci goto out_huge; 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci pdshift = PMD_SHIFT; 44962306a36Sopenharmony_ci pmdp = pmd_offset(&pud, ea); 45062306a36Sopenharmony_ci pmd = READ_ONCE(*pmdp); 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci /* 45362306a36Sopenharmony_ci * A hugepage collapse is captured by this condition, see 45462306a36Sopenharmony_ci * pmdp_collapse_flush. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ci if (pmd_none(pmd)) 45762306a36Sopenharmony_ci return NULL; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64 46062306a36Sopenharmony_ci /* 46162306a36Sopenharmony_ci * A hugepage split is captured by this condition, see 46262306a36Sopenharmony_ci * pmdp_invalidate. 46362306a36Sopenharmony_ci * 46462306a36Sopenharmony_ci * Huge page modification can be caught here too. 46562306a36Sopenharmony_ci */ 46662306a36Sopenharmony_ci if (pmd_is_serializing(pmd)) 46762306a36Sopenharmony_ci return NULL; 46862306a36Sopenharmony_ci#endif 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { 47162306a36Sopenharmony_ci if (is_thp) 47262306a36Sopenharmony_ci *is_thp = true; 47362306a36Sopenharmony_ci ret_pte = (pte_t *)pmdp; 47462306a36Sopenharmony_ci goto out; 47562306a36Sopenharmony_ci } 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (pmd_is_leaf(pmd)) { 47862306a36Sopenharmony_ci ret_pte = (pte_t *)pmdp; 47962306a36Sopenharmony_ci goto out; 48062306a36Sopenharmony_ci } 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci if (is_hugepd(__hugepd(pmd_val(pmd)))) { 48362306a36Sopenharmony_ci hpdp = (hugepd_t *)&pmd; 48462306a36Sopenharmony_ci goto out_huge; 48562306a36Sopenharmony_ci } 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci return pte_offset_kernel(&pmd, ea); 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ciout_huge: 49062306a36Sopenharmony_ci if (!hpdp) 49162306a36Sopenharmony_ci return NULL; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci ret_pte = hugepte_offset(*hpdp, ea, pdshift); 49462306a36Sopenharmony_ci pdshift = hugepd_shift(*hpdp); 49562306a36Sopenharmony_ciout: 49662306a36Sopenharmony_ci if (hpage_shift) 49762306a36Sopenharmony_ci *hpage_shift = pdshift; 49862306a36Sopenharmony_ci return ret_pte; 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__find_linux_pte); 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci/* Note due to the way vm flags are laid out, the bits are XWR */ 50362306a36Sopenharmony_ciconst pgprot_t protection_map[16] = { 50462306a36Sopenharmony_ci [VM_NONE] = PAGE_NONE, 50562306a36Sopenharmony_ci [VM_READ] = PAGE_READONLY, 50662306a36Sopenharmony_ci [VM_WRITE] = PAGE_COPY, 50762306a36Sopenharmony_ci [VM_WRITE | VM_READ] = PAGE_COPY, 50862306a36Sopenharmony_ci [VM_EXEC] = PAGE_READONLY_X, 50962306a36Sopenharmony_ci [VM_EXEC | VM_READ] = PAGE_READONLY_X, 51062306a36Sopenharmony_ci [VM_EXEC | VM_WRITE] = PAGE_COPY_X, 51162306a36Sopenharmony_ci [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X, 51262306a36Sopenharmony_ci [VM_SHARED] = PAGE_NONE, 51362306a36Sopenharmony_ci [VM_SHARED | VM_READ] = PAGE_READONLY, 51462306a36Sopenharmony_ci [VM_SHARED | VM_WRITE] = PAGE_SHARED, 51562306a36Sopenharmony_ci [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, 51662306a36Sopenharmony_ci [VM_SHARED | VM_EXEC] = PAGE_READONLY_X, 51762306a36Sopenharmony_ci [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X, 51862306a36Sopenharmony_ci [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X, 51962306a36Sopenharmony_ci [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X 52062306a36Sopenharmony_ci}; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci#ifndef CONFIG_PPC_BOOK3S_64 52362306a36Sopenharmony_ciDECLARE_VM_GET_PAGE_PROT 52462306a36Sopenharmony_ci#endif 525