162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/sched.h> 762306a36Sopenharmony_ci#include <linux/mm_types.h> 862306a36Sopenharmony_ci#include <linux/memblock.h> 962306a36Sopenharmony_ci#include <linux/memremap.h> 1062306a36Sopenharmony_ci#include <linux/pkeys.h> 1162306a36Sopenharmony_ci#include <linux/debugfs.h> 1262306a36Sopenharmony_ci#include <linux/proc_fs.h> 1362306a36Sopenharmony_ci#include <misc/cxl-base.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include <asm/pgalloc.h> 1662306a36Sopenharmony_ci#include <asm/tlb.h> 1762306a36Sopenharmony_ci#include <asm/trace.h> 1862306a36Sopenharmony_ci#include <asm/powernv.h> 1962306a36Sopenharmony_ci#include <asm/firmware.h> 2062306a36Sopenharmony_ci#include <asm/ultravisor.h> 2162306a36Sopenharmony_ci#include <asm/kexec.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#include <mm/mmu_decl.h> 2462306a36Sopenharmony_ci#include <trace/events/thp.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include "internal.h" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_cistruct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 2962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_psize_defs); 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 3262306a36Sopenharmony_ciint mmu_vmemmap_psize = MMU_PAGE_4K; 3362306a36Sopenharmony_ci#endif 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ciunsigned long __pmd_frag_nr; 3662306a36Sopenharmony_ciEXPORT_SYMBOL(__pmd_frag_nr); 3762306a36Sopenharmony_ciunsigned long __pmd_frag_size_shift; 3862306a36Sopenharmony_ciEXPORT_SYMBOL(__pmd_frag_size_shift); 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 4162306a36Sopenharmony_ci/* 4262306a36Sopenharmony_ci * This is called when relaxing access to a hugepage. It's also called in the page 4362306a36Sopenharmony_ci * fault path when we don't hit any of the major fault cases, ie, a minor 4462306a36Sopenharmony_ci * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have 4562306a36Sopenharmony_ci * handled those two for us, we additionally deal with missing execute 4662306a36Sopenharmony_ci * permission here on some processors 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ciint pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, 4962306a36Sopenharmony_ci pmd_t *pmdp, pmd_t entry, int dirty) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci int changed; 5262306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 5362306a36Sopenharmony_ci WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); 5462306a36Sopenharmony_ci assert_spin_locked(pmd_lockptr(vma->vm_mm, pmdp)); 5562306a36Sopenharmony_ci#endif 5662306a36Sopenharmony_ci changed = !pmd_same(*(pmdp), entry); 5762306a36Sopenharmony_ci if (changed) { 5862306a36Sopenharmony_ci /* 5962306a36Sopenharmony_ci * We can use MMU_PAGE_2M here, because only radix 6062306a36Sopenharmony_ci * path look at the psize. 6162306a36Sopenharmony_ci */ 6262306a36Sopenharmony_ci __ptep_set_access_flags(vma, pmdp_ptep(pmdp), 6362306a36Sopenharmony_ci pmd_pte(entry), address, MMU_PAGE_2M); 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci return changed; 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ciint pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, 6962306a36Sopenharmony_ci pud_t *pudp, pud_t entry, int dirty) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci int changed; 7262306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 7362306a36Sopenharmony_ci WARN_ON(!pud_devmap(*pudp)); 7462306a36Sopenharmony_ci assert_spin_locked(pud_lockptr(vma->vm_mm, pudp)); 7562306a36Sopenharmony_ci#endif 7662306a36Sopenharmony_ci changed = !pud_same(*(pudp), entry); 7762306a36Sopenharmony_ci if (changed) { 7862306a36Sopenharmony_ci /* 7962306a36Sopenharmony_ci * We can use MMU_PAGE_1G here, because only radix 8062306a36Sopenharmony_ci * path look at the psize. 8162306a36Sopenharmony_ci */ 8262306a36Sopenharmony_ci __ptep_set_access_flags(vma, pudp_ptep(pudp), 8362306a36Sopenharmony_ci pud_pte(entry), address, MMU_PAGE_1G); 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci return changed; 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ciint pmdp_test_and_clear_young(struct vm_area_struct *vma, 9062306a36Sopenharmony_ci unsigned long address, pmd_t *pmdp) 9162306a36Sopenharmony_ci{ 9262306a36Sopenharmony_ci return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); 9362306a36Sopenharmony_ci} 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ciint pudp_test_and_clear_young(struct vm_area_struct *vma, 9662306a36Sopenharmony_ci unsigned long address, pud_t *pudp) 9762306a36Sopenharmony_ci{ 9862306a36Sopenharmony_ci return __pudp_test_and_clear_young(vma->vm_mm, address, pudp); 9962306a36Sopenharmony_ci} 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci/* 10262306a36Sopenharmony_ci * set a new huge pmd. We should not be called for updating 10362306a36Sopenharmony_ci * an existing pmd entry. That should go via pmd_hugepage_update. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_civoid set_pmd_at(struct mm_struct *mm, unsigned long addr, 10662306a36Sopenharmony_ci pmd_t *pmdp, pmd_t pmd) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 10962306a36Sopenharmony_ci /* 11062306a36Sopenharmony_ci * Make sure hardware valid bit is not set. We don't do 11162306a36Sopenharmony_ci * tlb flush for this update. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); 11562306a36Sopenharmony_ci assert_spin_locked(pmd_lockptr(mm, pmdp)); 11662306a36Sopenharmony_ci WARN_ON(!(pmd_large(pmd))); 11762306a36Sopenharmony_ci#endif 11862306a36Sopenharmony_ci trace_hugepage_set_pmd(addr, pmd_val(pmd)); 11962306a36Sopenharmony_ci return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_civoid set_pud_at(struct mm_struct *mm, unsigned long addr, 12362306a36Sopenharmony_ci pud_t *pudp, pud_t pud) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM 12662306a36Sopenharmony_ci /* 12762306a36Sopenharmony_ci * Make sure hardware valid bit is not set. We don't do 12862306a36Sopenharmony_ci * tlb flush for this update. 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci WARN_ON(pte_hw_valid(pud_pte(*pudp))); 13262306a36Sopenharmony_ci assert_spin_locked(pud_lockptr(mm, pudp)); 13362306a36Sopenharmony_ci WARN_ON(!(pud_large(pud))); 13462306a36Sopenharmony_ci#endif 13562306a36Sopenharmony_ci trace_hugepage_set_pud(addr, pud_val(pud)); 13662306a36Sopenharmony_ci return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud)); 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cistatic void do_serialize(void *arg) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci /* We've taken the IPI, so try to trim the mask while here */ 14262306a36Sopenharmony_ci if (radix_enabled()) { 14362306a36Sopenharmony_ci struct mm_struct *mm = arg; 14462306a36Sopenharmony_ci exit_lazy_flush_tlb(mm, false); 14562306a36Sopenharmony_ci } 14662306a36Sopenharmony_ci} 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci/* 14962306a36Sopenharmony_ci * Serialize against __find_linux_pte() which does lock-less 15062306a36Sopenharmony_ci * lookup in page tables with local interrupts disabled. For huge pages 15162306a36Sopenharmony_ci * it casts pmd_t to pte_t. Since format of pte_t is different from 15262306a36Sopenharmony_ci * pmd_t we want to prevent transit from pmd pointing to page table 15362306a36Sopenharmony_ci * to pmd pointing to huge page (and back) while interrupts are disabled. 15462306a36Sopenharmony_ci * We clear pmd to possibly replace it with page table pointer in 15562306a36Sopenharmony_ci * different code paths. So make sure we wait for the parallel 15662306a36Sopenharmony_ci * __find_linux_pte() to finish. 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_civoid serialize_against_pte_lookup(struct mm_struct *mm) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci smp_mb(); 16162306a36Sopenharmony_ci smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1); 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci/* 16562306a36Sopenharmony_ci * We use this to invalidate a pmdp entry before switching from a 16662306a36Sopenharmony_ci * hugepte to regular pmd entry. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_cipmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 16962306a36Sopenharmony_ci pmd_t *pmdp) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci unsigned long old_pmd; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); 17462306a36Sopenharmony_ci flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 17562306a36Sopenharmony_ci return __pmd(old_pmd); 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_cipmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, 17962306a36Sopenharmony_ci unsigned long addr, pmd_t *pmdp, int full) 18062306a36Sopenharmony_ci{ 18162306a36Sopenharmony_ci pmd_t pmd; 18262306a36Sopenharmony_ci VM_BUG_ON(addr & ~HPAGE_PMD_MASK); 18362306a36Sopenharmony_ci VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && 18462306a36Sopenharmony_ci !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); 18562306a36Sopenharmony_ci pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); 18662306a36Sopenharmony_ci /* 18762306a36Sopenharmony_ci * if it not a fullmm flush, then we can possibly end up converting 18862306a36Sopenharmony_ci * this PMD pte entry to a regular level 0 PTE by a parallel page fault. 18962306a36Sopenharmony_ci * Make sure we flush the tlb in this case. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci if (!full) 19262306a36Sopenharmony_ci flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); 19362306a36Sopenharmony_ci return pmd; 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_cipud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma, 19762306a36Sopenharmony_ci unsigned long addr, pud_t *pudp, int full) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci pud_t pud; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci VM_BUG_ON(addr & ~HPAGE_PMD_MASK); 20262306a36Sopenharmony_ci VM_BUG_ON((pud_present(*pudp) && !pud_devmap(*pudp)) || 20362306a36Sopenharmony_ci !pud_present(*pudp)); 20462306a36Sopenharmony_ci pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp); 20562306a36Sopenharmony_ci /* 20662306a36Sopenharmony_ci * if it not a fullmm flush, then we can possibly end up converting 20762306a36Sopenharmony_ci * this PMD pte entry to a regular level 0 PTE by a parallel page fault. 20862306a36Sopenharmony_ci * Make sure we flush the tlb in this case. 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_ci if (!full) 21162306a36Sopenharmony_ci flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE); 21262306a36Sopenharmony_ci return pud; 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_cistatic pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) 21662306a36Sopenharmony_ci{ 21762306a36Sopenharmony_ci return __pmd(pmd_val(pmd) | pgprot_val(pgprot)); 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistatic pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci return __pud(pud_val(pud) | pgprot_val(pgprot)); 22362306a36Sopenharmony_ci} 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci/* 22662306a36Sopenharmony_ci * At some point we should be able to get rid of 22762306a36Sopenharmony_ci * pmd_mkhuge() and mk_huge_pmd() when we update all the 22862306a36Sopenharmony_ci * other archs to mark the pmd huge in pfn_pmd() 22962306a36Sopenharmony_ci */ 23062306a36Sopenharmony_cipmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci unsigned long pmdv; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot)); 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_cipud_t pfn_pud(unsigned long pfn, pgprot_t pgprot) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci unsigned long pudv; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot)); 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_cipmd_t mk_pmd(struct page *page, pgprot_t pgprot) 24962306a36Sopenharmony_ci{ 25062306a36Sopenharmony_ci return pfn_pmd(page_to_pfn(page), pgprot); 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cipmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci unsigned long pmdv; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci pmdv = pmd_val(pmd); 25862306a36Sopenharmony_ci pmdv &= _HPAGE_CHG_MASK; 25962306a36Sopenharmony_ci return pmd_set_protbits(__pmd(pmdv), newprot); 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci/* For use by kexec, called with MMU off */ 26462306a36Sopenharmony_cinotrace void mmu_cleanup_all(void) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci if (radix_enabled()) 26762306a36Sopenharmony_ci radix__mmu_cleanup_all(); 26862306a36Sopenharmony_ci else if (mmu_hash_ops.hpte_clear_all) 26962306a36Sopenharmony_ci mmu_hash_ops.hpte_clear_all(); 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci reset_sprs(); 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 27562306a36Sopenharmony_ciint __meminit create_section_mapping(unsigned long start, unsigned long end, 27662306a36Sopenharmony_ci int nid, pgprot_t prot) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci if (radix_enabled()) 27962306a36Sopenharmony_ci return radix__create_section_mapping(start, end, nid, prot); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci return hash__create_section_mapping(start, end, nid, prot); 28262306a36Sopenharmony_ci} 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ciint __meminit remove_section_mapping(unsigned long start, unsigned long end) 28562306a36Sopenharmony_ci{ 28662306a36Sopenharmony_ci if (radix_enabled()) 28762306a36Sopenharmony_ci return radix__remove_section_mapping(start, end); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci return hash__remove_section_mapping(start, end); 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */ 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_civoid __init mmu_partition_table_init(void) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; 29662306a36Sopenharmony_ci unsigned long ptcr; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci /* Initialize the Partition Table with no entries */ 29962306a36Sopenharmony_ci partition_tb = memblock_alloc(patb_size, patb_size); 30062306a36Sopenharmony_ci if (!partition_tb) 30162306a36Sopenharmony_ci panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 30262306a36Sopenharmony_ci __func__, patb_size, patb_size); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); 30562306a36Sopenharmony_ci set_ptcr_when_no_uv(ptcr); 30662306a36Sopenharmony_ci powernv_set_nmmu_ptcr(ptcr); 30762306a36Sopenharmony_ci} 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_cistatic void flush_partition(unsigned int lpid, bool radix) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci if (radix) { 31262306a36Sopenharmony_ci radix__flush_all_lpid(lpid); 31362306a36Sopenharmony_ci radix__flush_all_lpid_guest(lpid); 31462306a36Sopenharmony_ci } else { 31562306a36Sopenharmony_ci asm volatile("ptesync" : : : "memory"); 31662306a36Sopenharmony_ci asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : 31762306a36Sopenharmony_ci "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 31862306a36Sopenharmony_ci /* do we need fixup here ?*/ 31962306a36Sopenharmony_ci asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 32062306a36Sopenharmony_ci trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); 32162306a36Sopenharmony_ci } 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_civoid mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, 32562306a36Sopenharmony_ci unsigned long dw1, bool flush) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci /* 33062306a36Sopenharmony_ci * When ultravisor is enabled, the partition table is stored in secure 33162306a36Sopenharmony_ci * memory and can only be accessed doing an ultravisor call. However, we 33262306a36Sopenharmony_ci * maintain a copy of the partition table in normal memory to allow Nest 33362306a36Sopenharmony_ci * MMU translations to occur (for normal VMs). 33462306a36Sopenharmony_ci * 33562306a36Sopenharmony_ci * Therefore, here we always update partition_tb, regardless of whether 33662306a36Sopenharmony_ci * we are running under an ultravisor or not. 33762306a36Sopenharmony_ci */ 33862306a36Sopenharmony_ci partition_tb[lpid].patb0 = cpu_to_be64(dw0); 33962306a36Sopenharmony_ci partition_tb[lpid].patb1 = cpu_to_be64(dw1); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci /* 34262306a36Sopenharmony_ci * If ultravisor is enabled, we do an ultravisor call to register the 34362306a36Sopenharmony_ci * partition table entry (PATE), which also do a global flush of TLBs 34462306a36Sopenharmony_ci * and partition table caches for the lpid. Otherwise, just do the 34562306a36Sopenharmony_ci * flush. The type of flush (hash or radix) depends on what the previous 34662306a36Sopenharmony_ci * use of the partition ID was, not the new use. 34762306a36Sopenharmony_ci */ 34862306a36Sopenharmony_ci if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) { 34962306a36Sopenharmony_ci uv_register_pate(lpid, dw0, dw1); 35062306a36Sopenharmony_ci pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n", 35162306a36Sopenharmony_ci dw0, dw1); 35262306a36Sopenharmony_ci } else if (flush) { 35362306a36Sopenharmony_ci /* 35462306a36Sopenharmony_ci * Boot does not need to flush, because MMU is off and each 35562306a36Sopenharmony_ci * CPU does a tlbiel_all() before switching them on, which 35662306a36Sopenharmony_ci * flushes everything. 35762306a36Sopenharmony_ci */ 35862306a36Sopenharmony_ci flush_partition(lpid, (old & PATB_HR)); 35962306a36Sopenharmony_ci } 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_cistatic pmd_t *get_pmd_from_cache(struct mm_struct *mm) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci void *pmd_frag, *ret; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci if (PMD_FRAG_NR == 1) 36862306a36Sopenharmony_ci return NULL; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci spin_lock(&mm->page_table_lock); 37162306a36Sopenharmony_ci ret = mm->context.pmd_frag; 37262306a36Sopenharmony_ci if (ret) { 37362306a36Sopenharmony_ci pmd_frag = ret + PMD_FRAG_SIZE; 37462306a36Sopenharmony_ci /* 37562306a36Sopenharmony_ci * If we have taken up all the fragments mark PTE page NULL 37662306a36Sopenharmony_ci */ 37762306a36Sopenharmony_ci if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0) 37862306a36Sopenharmony_ci pmd_frag = NULL; 37962306a36Sopenharmony_ci mm->context.pmd_frag = pmd_frag; 38062306a36Sopenharmony_ci } 38162306a36Sopenharmony_ci spin_unlock(&mm->page_table_lock); 38262306a36Sopenharmony_ci return (pmd_t *)ret; 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_cistatic pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci void *ret = NULL; 38862306a36Sopenharmony_ci struct ptdesc *ptdesc; 38962306a36Sopenharmony_ci gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci if (mm == &init_mm) 39262306a36Sopenharmony_ci gfp &= ~__GFP_ACCOUNT; 39362306a36Sopenharmony_ci ptdesc = pagetable_alloc(gfp, 0); 39462306a36Sopenharmony_ci if (!ptdesc) 39562306a36Sopenharmony_ci return NULL; 39662306a36Sopenharmony_ci if (!pagetable_pmd_ctor(ptdesc)) { 39762306a36Sopenharmony_ci pagetable_free(ptdesc); 39862306a36Sopenharmony_ci return NULL; 39962306a36Sopenharmony_ci } 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci atomic_set(&ptdesc->pt_frag_refcount, 1); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci ret = ptdesc_address(ptdesc); 40462306a36Sopenharmony_ci /* 40562306a36Sopenharmony_ci * if we support only one fragment just return the 40662306a36Sopenharmony_ci * allocated page. 40762306a36Sopenharmony_ci */ 40862306a36Sopenharmony_ci if (PMD_FRAG_NR == 1) 40962306a36Sopenharmony_ci return ret; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci spin_lock(&mm->page_table_lock); 41262306a36Sopenharmony_ci /* 41362306a36Sopenharmony_ci * If we find ptdesc_page set, we return 41462306a36Sopenharmony_ci * the allocated page with single fragment 41562306a36Sopenharmony_ci * count. 41662306a36Sopenharmony_ci */ 41762306a36Sopenharmony_ci if (likely(!mm->context.pmd_frag)) { 41862306a36Sopenharmony_ci atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR); 41962306a36Sopenharmony_ci mm->context.pmd_frag = ret + PMD_FRAG_SIZE; 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci spin_unlock(&mm->page_table_lock); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci return (pmd_t *)ret; 42462306a36Sopenharmony_ci} 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_cipmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr) 42762306a36Sopenharmony_ci{ 42862306a36Sopenharmony_ci pmd_t *pmd; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci pmd = get_pmd_from_cache(mm); 43162306a36Sopenharmony_ci if (pmd) 43262306a36Sopenharmony_ci return pmd; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci return __alloc_for_pmdcache(mm); 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_civoid pmd_fragment_free(unsigned long *pmd) 43862306a36Sopenharmony_ci{ 43962306a36Sopenharmony_ci struct ptdesc *ptdesc = virt_to_ptdesc(pmd); 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci if (pagetable_is_reserved(ptdesc)) 44262306a36Sopenharmony_ci return free_reserved_ptdesc(ptdesc); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0); 44562306a36Sopenharmony_ci if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) { 44662306a36Sopenharmony_ci pagetable_pmd_dtor(ptdesc); 44762306a36Sopenharmony_ci pagetable_free(ptdesc); 44862306a36Sopenharmony_ci } 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_cistatic inline void pgtable_free(void *table, int index) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci switch (index) { 45462306a36Sopenharmony_ci case PTE_INDEX: 45562306a36Sopenharmony_ci pte_fragment_free(table, 0); 45662306a36Sopenharmony_ci break; 45762306a36Sopenharmony_ci case PMD_INDEX: 45862306a36Sopenharmony_ci pmd_fragment_free(table); 45962306a36Sopenharmony_ci break; 46062306a36Sopenharmony_ci case PUD_INDEX: 46162306a36Sopenharmony_ci __pud_free(table); 46262306a36Sopenharmony_ci break; 46362306a36Sopenharmony_ci#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) 46462306a36Sopenharmony_ci /* 16M hugepd directory at pud level */ 46562306a36Sopenharmony_ci case HTLB_16M_INDEX: 46662306a36Sopenharmony_ci BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0); 46762306a36Sopenharmony_ci kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table); 46862306a36Sopenharmony_ci break; 46962306a36Sopenharmony_ci /* 16G hugepd directory at the pgd level */ 47062306a36Sopenharmony_ci case HTLB_16G_INDEX: 47162306a36Sopenharmony_ci BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0); 47262306a36Sopenharmony_ci kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table); 47362306a36Sopenharmony_ci break; 47462306a36Sopenharmony_ci#endif 47562306a36Sopenharmony_ci /* We don't free pgd table via RCU callback */ 47662306a36Sopenharmony_ci default: 47762306a36Sopenharmony_ci BUG(); 47862306a36Sopenharmony_ci } 47962306a36Sopenharmony_ci} 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_civoid pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) 48262306a36Sopenharmony_ci{ 48362306a36Sopenharmony_ci unsigned long pgf = (unsigned long)table; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci BUG_ON(index > MAX_PGTABLE_INDEX_SIZE); 48662306a36Sopenharmony_ci pgf |= index; 48762306a36Sopenharmony_ci tlb_remove_table(tlb, (void *)pgf); 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_civoid __tlb_remove_table(void *_table) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); 49362306a36Sopenharmony_ci unsigned int index = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci return pgtable_free(table, index); 49662306a36Sopenharmony_ci} 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 49962306a36Sopenharmony_ciatomic_long_t direct_pages_count[MMU_PAGE_COUNT]; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_civoid arch_report_meminfo(struct seq_file *m) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci /* 50462306a36Sopenharmony_ci * Hash maps the memory with one size mmu_linear_psize. 50562306a36Sopenharmony_ci * So don't bother to print these on hash 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_ci if (!radix_enabled()) 50862306a36Sopenharmony_ci return; 50962306a36Sopenharmony_ci seq_printf(m, "DirectMap4k: %8lu kB\n", 51062306a36Sopenharmony_ci atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2); 51162306a36Sopenharmony_ci seq_printf(m, "DirectMap64k: %8lu kB\n", 51262306a36Sopenharmony_ci atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6); 51362306a36Sopenharmony_ci seq_printf(m, "DirectMap2M: %8lu kB\n", 51462306a36Sopenharmony_ci atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11); 51562306a36Sopenharmony_ci seq_printf(m, "DirectMap1G: %8lu kB\n", 51662306a36Sopenharmony_ci atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20); 51762306a36Sopenharmony_ci} 51862306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */ 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_cipte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, 52162306a36Sopenharmony_ci pte_t *ptep) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci unsigned long pte_val; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci /* 52662306a36Sopenharmony_ci * Clear the _PAGE_PRESENT so that no hardware parallel update is 52762306a36Sopenharmony_ci * possible. Also keep the pte_present true so that we don't take 52862306a36Sopenharmony_ci * wrong fault. 52962306a36Sopenharmony_ci */ 53062306a36Sopenharmony_ci pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci return __pte(pte_val); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci} 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_civoid ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 53762306a36Sopenharmony_ci pte_t *ptep, pte_t old_pte, pte_t pte) 53862306a36Sopenharmony_ci{ 53962306a36Sopenharmony_ci if (radix_enabled()) 54062306a36Sopenharmony_ci return radix__ptep_modify_prot_commit(vma, addr, 54162306a36Sopenharmony_ci ptep, old_pte, pte); 54262306a36Sopenharmony_ci set_pte_at(vma->vm_mm, addr, ptep, pte); 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 54662306a36Sopenharmony_ci/* 54762306a36Sopenharmony_ci * For hash translation mode, we use the deposited table to store hash slot 54862306a36Sopenharmony_ci * information and they are stored at PTRS_PER_PMD offset from related pmd 54962306a36Sopenharmony_ci * location. Hence a pmd move requires deposit and withdraw. 55062306a36Sopenharmony_ci * 55162306a36Sopenharmony_ci * For radix translation with split pmd ptl, we store the deposited table in the 55262306a36Sopenharmony_ci * pmd page. Hence if we have different pmd page we need to withdraw during pmd 55362306a36Sopenharmony_ci * move. 55462306a36Sopenharmony_ci * 55562306a36Sopenharmony_ci * With hash we use deposited table always irrespective of anon or not. 55662306a36Sopenharmony_ci * With radix we use deposited table only for anonymous mapping. 55762306a36Sopenharmony_ci */ 55862306a36Sopenharmony_ciint pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, 55962306a36Sopenharmony_ci struct spinlock *old_pmd_ptl, 56062306a36Sopenharmony_ci struct vm_area_struct *vma) 56162306a36Sopenharmony_ci{ 56262306a36Sopenharmony_ci if (radix_enabled()) 56362306a36Sopenharmony_ci return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci return true; 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci#endif 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci/* 57062306a36Sopenharmony_ci * Does the CPU support tlbie? 57162306a36Sopenharmony_ci */ 57262306a36Sopenharmony_cibool tlbie_capable __read_mostly = true; 57362306a36Sopenharmony_ciEXPORT_SYMBOL(tlbie_capable); 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci/* 57662306a36Sopenharmony_ci * Should tlbie be used for management of CPU TLBs, for kernel and process 57762306a36Sopenharmony_ci * address spaces? tlbie may still be used for nMMU accelerators, and for KVM 57862306a36Sopenharmony_ci * guest address spaces. 57962306a36Sopenharmony_ci */ 58062306a36Sopenharmony_cibool tlbie_enabled __read_mostly = true; 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_cistatic int __init setup_disable_tlbie(char *str) 58362306a36Sopenharmony_ci{ 58462306a36Sopenharmony_ci if (!radix_enabled()) { 58562306a36Sopenharmony_ci pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n"); 58662306a36Sopenharmony_ci return 1; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci tlbie_capable = false; 59062306a36Sopenharmony_ci tlbie_enabled = false; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci return 1; 59362306a36Sopenharmony_ci} 59462306a36Sopenharmony_ci__setup("disable_tlbie", setup_disable_tlbie); 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_cistatic int __init pgtable_debugfs_setup(void) 59762306a36Sopenharmony_ci{ 59862306a36Sopenharmony_ci if (!tlbie_capable) 59962306a36Sopenharmony_ci return 0; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci /* 60262306a36Sopenharmony_ci * There is no locking vs tlb flushing when changing this value. 60362306a36Sopenharmony_ci * The tlb flushers will see one value or another, and use either 60462306a36Sopenharmony_ci * tlbie or tlbiel with IPIs. In both cases the TLBs will be 60562306a36Sopenharmony_ci * invalidated as expected. 60662306a36Sopenharmony_ci */ 60762306a36Sopenharmony_ci debugfs_create_bool("tlbie_enabled", 0600, 60862306a36Sopenharmony_ci arch_debugfs_dir, 60962306a36Sopenharmony_ci &tlbie_enabled); 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci return 0; 61262306a36Sopenharmony_ci} 61362306a36Sopenharmony_ciarch_initcall(pgtable_debugfs_setup); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN) 61662306a36Sopenharmony_ci/* 61762306a36Sopenharmony_ci * Override the generic version in mm/memremap.c. 61862306a36Sopenharmony_ci * 61962306a36Sopenharmony_ci * With hash translation, the direct-map range is mapped with just one 62062306a36Sopenharmony_ci * page size selected by htab_init_page_sizes(). Consult 62162306a36Sopenharmony_ci * mmu_psize_defs[] to determine the minimum page size alignment. 62262306a36Sopenharmony_ci*/ 62362306a36Sopenharmony_ciunsigned long memremap_compat_align(void) 62462306a36Sopenharmony_ci{ 62562306a36Sopenharmony_ci if (!radix_enabled()) { 62662306a36Sopenharmony_ci unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; 62762306a36Sopenharmony_ci return max(SUBSECTION_SIZE, 1UL << shift); 62862306a36Sopenharmony_ci } 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci return SUBSECTION_SIZE; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memremap_compat_align); 63362306a36Sopenharmony_ci#endif 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_cipgprot_t vm_get_page_prot(unsigned long vm_flags) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci unsigned long prot; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci /* Radix supports execute-only, but protection_map maps X -> RX */ 64062306a36Sopenharmony_ci if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) { 64162306a36Sopenharmony_ci prot = pgprot_val(PAGE_EXECONLY); 64262306a36Sopenharmony_ci } else { 64362306a36Sopenharmony_ci prot = pgprot_val(protection_map[vm_flags & 64462306a36Sopenharmony_ci (VM_ACCESS_FLAGS | VM_SHARED)]); 64562306a36Sopenharmony_ci } 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci if (vm_flags & VM_SAO) 64862306a36Sopenharmony_ci prot |= _PAGE_SAO; 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci#ifdef CONFIG_PPC_MEM_KEYS 65162306a36Sopenharmony_ci prot |= vmflag_to_pte_pkey_bits(vm_flags); 65262306a36Sopenharmony_ci#endif 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci return __pgprot(prot); 65562306a36Sopenharmony_ci} 65662306a36Sopenharmony_ciEXPORT_SYMBOL(vm_get_page_prot); 657