162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/sched.h>
762306a36Sopenharmony_ci#include <linux/mm_types.h>
862306a36Sopenharmony_ci#include <linux/memblock.h>
962306a36Sopenharmony_ci#include <linux/memremap.h>
1062306a36Sopenharmony_ci#include <linux/pkeys.h>
1162306a36Sopenharmony_ci#include <linux/debugfs.h>
1262306a36Sopenharmony_ci#include <linux/proc_fs.h>
1362306a36Sopenharmony_ci#include <misc/cxl-base.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include <asm/pgalloc.h>
1662306a36Sopenharmony_ci#include <asm/tlb.h>
1762306a36Sopenharmony_ci#include <asm/trace.h>
1862306a36Sopenharmony_ci#include <asm/powernv.h>
1962306a36Sopenharmony_ci#include <asm/firmware.h>
2062306a36Sopenharmony_ci#include <asm/ultravisor.h>
2162306a36Sopenharmony_ci#include <asm/kexec.h>
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#include <mm/mmu_decl.h>
2462306a36Sopenharmony_ci#include <trace/events/thp.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include "internal.h"
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cistruct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
2962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_psize_defs);
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP
3262306a36Sopenharmony_ciint mmu_vmemmap_psize = MMU_PAGE_4K;
3362306a36Sopenharmony_ci#endif
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ciunsigned long __pmd_frag_nr;
3662306a36Sopenharmony_ciEXPORT_SYMBOL(__pmd_frag_nr);
3762306a36Sopenharmony_ciunsigned long __pmd_frag_size_shift;
3862306a36Sopenharmony_ciEXPORT_SYMBOL(__pmd_frag_size_shift);
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
4162306a36Sopenharmony_ci/*
4262306a36Sopenharmony_ci * This is called when relaxing access to a hugepage. It's also called in the page
4362306a36Sopenharmony_ci * fault path when we don't hit any of the major fault cases, ie, a minor
4462306a36Sopenharmony_ci * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
4562306a36Sopenharmony_ci * handled those two for us, we additionally deal with missing execute
4662306a36Sopenharmony_ci * permission here on some processors
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_ciint pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
4962306a36Sopenharmony_ci			  pmd_t *pmdp, pmd_t entry, int dirty)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	int changed;
5262306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
5362306a36Sopenharmony_ci	WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
5462306a36Sopenharmony_ci	assert_spin_locked(pmd_lockptr(vma->vm_mm, pmdp));
5562306a36Sopenharmony_ci#endif
5662306a36Sopenharmony_ci	changed = !pmd_same(*(pmdp), entry);
5762306a36Sopenharmony_ci	if (changed) {
5862306a36Sopenharmony_ci		/*
5962306a36Sopenharmony_ci		 * We can use MMU_PAGE_2M here, because only radix
6062306a36Sopenharmony_ci		 * path look at the psize.
6162306a36Sopenharmony_ci		 */
6262306a36Sopenharmony_ci		__ptep_set_access_flags(vma, pmdp_ptep(pmdp),
6362306a36Sopenharmony_ci					pmd_pte(entry), address, MMU_PAGE_2M);
6462306a36Sopenharmony_ci	}
6562306a36Sopenharmony_ci	return changed;
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ciint pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
6962306a36Sopenharmony_ci			  pud_t *pudp, pud_t entry, int dirty)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	int changed;
7262306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
7362306a36Sopenharmony_ci	WARN_ON(!pud_devmap(*pudp));
7462306a36Sopenharmony_ci	assert_spin_locked(pud_lockptr(vma->vm_mm, pudp));
7562306a36Sopenharmony_ci#endif
7662306a36Sopenharmony_ci	changed = !pud_same(*(pudp), entry);
7762306a36Sopenharmony_ci	if (changed) {
7862306a36Sopenharmony_ci		/*
7962306a36Sopenharmony_ci		 * We can use MMU_PAGE_1G here, because only radix
8062306a36Sopenharmony_ci		 * path look at the psize.
8162306a36Sopenharmony_ci		 */
8262306a36Sopenharmony_ci		__ptep_set_access_flags(vma, pudp_ptep(pudp),
8362306a36Sopenharmony_ci					pud_pte(entry), address, MMU_PAGE_1G);
8462306a36Sopenharmony_ci	}
8562306a36Sopenharmony_ci	return changed;
8662306a36Sopenharmony_ci}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ciint pmdp_test_and_clear_young(struct vm_area_struct *vma,
9062306a36Sopenharmony_ci			      unsigned long address, pmd_t *pmdp)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
9362306a36Sopenharmony_ci}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ciint pudp_test_and_clear_young(struct vm_area_struct *vma,
9662306a36Sopenharmony_ci			      unsigned long address, pud_t *pudp)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci/*
10262306a36Sopenharmony_ci * set a new huge pmd. We should not be called for updating
10362306a36Sopenharmony_ci * an existing pmd entry. That should go via pmd_hugepage_update.
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_civoid set_pmd_at(struct mm_struct *mm, unsigned long addr,
10662306a36Sopenharmony_ci		pmd_t *pmdp, pmd_t pmd)
10762306a36Sopenharmony_ci{
10862306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
10962306a36Sopenharmony_ci	/*
11062306a36Sopenharmony_ci	 * Make sure hardware valid bit is not set. We don't do
11162306a36Sopenharmony_ci	 * tlb flush for this update.
11262306a36Sopenharmony_ci	 */
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
11562306a36Sopenharmony_ci	assert_spin_locked(pmd_lockptr(mm, pmdp));
11662306a36Sopenharmony_ci	WARN_ON(!(pmd_large(pmd)));
11762306a36Sopenharmony_ci#endif
11862306a36Sopenharmony_ci	trace_hugepage_set_pmd(addr, pmd_val(pmd));
11962306a36Sopenharmony_ci	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_civoid set_pud_at(struct mm_struct *mm, unsigned long addr,
12362306a36Sopenharmony_ci		pud_t *pudp, pud_t pud)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
12662306a36Sopenharmony_ci	/*
12762306a36Sopenharmony_ci	 * Make sure hardware valid bit is not set. We don't do
12862306a36Sopenharmony_ci	 * tlb flush for this update.
12962306a36Sopenharmony_ci	 */
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	WARN_ON(pte_hw_valid(pud_pte(*pudp)));
13262306a36Sopenharmony_ci	assert_spin_locked(pud_lockptr(mm, pudp));
13362306a36Sopenharmony_ci	WARN_ON(!(pud_large(pud)));
13462306a36Sopenharmony_ci#endif
13562306a36Sopenharmony_ci	trace_hugepage_set_pud(addr, pud_val(pud));
13662306a36Sopenharmony_ci	return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cistatic void do_serialize(void *arg)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	/* We've taken the IPI, so try to trim the mask while here */
14262306a36Sopenharmony_ci	if (radix_enabled()) {
14362306a36Sopenharmony_ci		struct mm_struct *mm = arg;
14462306a36Sopenharmony_ci		exit_lazy_flush_tlb(mm, false);
14562306a36Sopenharmony_ci	}
14662306a36Sopenharmony_ci}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci/*
14962306a36Sopenharmony_ci * Serialize against __find_linux_pte() which does lock-less
15062306a36Sopenharmony_ci * lookup in page tables with local interrupts disabled. For huge pages
15162306a36Sopenharmony_ci * it casts pmd_t to pte_t. Since format of pte_t is different from
15262306a36Sopenharmony_ci * pmd_t we want to prevent transit from pmd pointing to page table
15362306a36Sopenharmony_ci * to pmd pointing to huge page (and back) while interrupts are disabled.
15462306a36Sopenharmony_ci * We clear pmd to possibly replace it with page table pointer in
15562306a36Sopenharmony_ci * different code paths. So make sure we wait for the parallel
15662306a36Sopenharmony_ci * __find_linux_pte() to finish.
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_civoid serialize_against_pte_lookup(struct mm_struct *mm)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	smp_mb();
16162306a36Sopenharmony_ci	smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci/*
16562306a36Sopenharmony_ci * We use this to invalidate a pmdp entry before switching from a
16662306a36Sopenharmony_ci * hugepte to regular pmd entry.
16762306a36Sopenharmony_ci */
16862306a36Sopenharmony_cipmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
16962306a36Sopenharmony_ci		     pmd_t *pmdp)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	unsigned long old_pmd;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
17462306a36Sopenharmony_ci	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
17562306a36Sopenharmony_ci	return __pmd(old_pmd);
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cipmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
17962306a36Sopenharmony_ci				   unsigned long addr, pmd_t *pmdp, int full)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	pmd_t pmd;
18262306a36Sopenharmony_ci	VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
18362306a36Sopenharmony_ci	VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
18462306a36Sopenharmony_ci		   !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
18562306a36Sopenharmony_ci	pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
18662306a36Sopenharmony_ci	/*
18762306a36Sopenharmony_ci	 * if it not a fullmm flush, then we can possibly end up converting
18862306a36Sopenharmony_ci	 * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
18962306a36Sopenharmony_ci	 * Make sure we flush the tlb in this case.
19062306a36Sopenharmony_ci	 */
19162306a36Sopenharmony_ci	if (!full)
19262306a36Sopenharmony_ci		flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
19362306a36Sopenharmony_ci	return pmd;
19462306a36Sopenharmony_ci}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_cipud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
19762306a36Sopenharmony_ci				   unsigned long addr, pud_t *pudp, int full)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	pud_t pud;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
20262306a36Sopenharmony_ci	VM_BUG_ON((pud_present(*pudp) && !pud_devmap(*pudp)) ||
20362306a36Sopenharmony_ci		  !pud_present(*pudp));
20462306a36Sopenharmony_ci	pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp);
20562306a36Sopenharmony_ci	/*
20662306a36Sopenharmony_ci	 * if it not a fullmm flush, then we can possibly end up converting
20762306a36Sopenharmony_ci	 * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
20862306a36Sopenharmony_ci	 * Make sure we flush the tlb in this case.
20962306a36Sopenharmony_ci	 */
21062306a36Sopenharmony_ci	if (!full)
21162306a36Sopenharmony_ci		flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE);
21262306a36Sopenharmony_ci	return pud;
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistatic pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cistatic pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot)
22162306a36Sopenharmony_ci{
22262306a36Sopenharmony_ci	return __pud(pud_val(pud) | pgprot_val(pgprot));
22362306a36Sopenharmony_ci}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci/*
22662306a36Sopenharmony_ci * At some point we should be able to get rid of
22762306a36Sopenharmony_ci * pmd_mkhuge() and mk_huge_pmd() when we update all the
22862306a36Sopenharmony_ci * other archs to mark the pmd huge in pfn_pmd()
22962306a36Sopenharmony_ci */
23062306a36Sopenharmony_cipmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
23162306a36Sopenharmony_ci{
23262306a36Sopenharmony_ci	unsigned long pmdv;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot));
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_cipud_t pfn_pud(unsigned long pfn, pgprot_t pgprot)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	unsigned long pudv;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot));
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_cipmd_t mk_pmd(struct page *page, pgprot_t pgprot)
24962306a36Sopenharmony_ci{
25062306a36Sopenharmony_ci	return pfn_pmd(page_to_pfn(page), pgprot);
25162306a36Sopenharmony_ci}
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_cipmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
25462306a36Sopenharmony_ci{
25562306a36Sopenharmony_ci	unsigned long pmdv;
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	pmdv = pmd_val(pmd);
25862306a36Sopenharmony_ci	pmdv &= _HPAGE_CHG_MASK;
25962306a36Sopenharmony_ci	return pmd_set_protbits(__pmd(pmdv), newprot);
26062306a36Sopenharmony_ci}
26162306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci/* For use by kexec, called with MMU off */
26462306a36Sopenharmony_cinotrace void mmu_cleanup_all(void)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	if (radix_enabled())
26762306a36Sopenharmony_ci		radix__mmu_cleanup_all();
26862306a36Sopenharmony_ci	else if (mmu_hash_ops.hpte_clear_all)
26962306a36Sopenharmony_ci		mmu_hash_ops.hpte_clear_all();
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	reset_sprs();
27262306a36Sopenharmony_ci}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
27562306a36Sopenharmony_ciint __meminit create_section_mapping(unsigned long start, unsigned long end,
27662306a36Sopenharmony_ci				     int nid, pgprot_t prot)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	if (radix_enabled())
27962306a36Sopenharmony_ci		return radix__create_section_mapping(start, end, nid, prot);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	return hash__create_section_mapping(start, end, nid, prot);
28262306a36Sopenharmony_ci}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ciint __meminit remove_section_mapping(unsigned long start, unsigned long end)
28562306a36Sopenharmony_ci{
28662306a36Sopenharmony_ci	if (radix_enabled())
28762306a36Sopenharmony_ci		return radix__remove_section_mapping(start, end);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	return hash__remove_section_mapping(start, end);
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_civoid __init mmu_partition_table_init(void)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
29662306a36Sopenharmony_ci	unsigned long ptcr;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	/* Initialize the Partition Table with no entries */
29962306a36Sopenharmony_ci	partition_tb = memblock_alloc(patb_size, patb_size);
30062306a36Sopenharmony_ci	if (!partition_tb)
30162306a36Sopenharmony_ci		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
30262306a36Sopenharmony_ci		      __func__, patb_size, patb_size);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
30562306a36Sopenharmony_ci	set_ptcr_when_no_uv(ptcr);
30662306a36Sopenharmony_ci	powernv_set_nmmu_ptcr(ptcr);
30762306a36Sopenharmony_ci}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_cistatic void flush_partition(unsigned int lpid, bool radix)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	if (radix) {
31262306a36Sopenharmony_ci		radix__flush_all_lpid(lpid);
31362306a36Sopenharmony_ci		radix__flush_all_lpid_guest(lpid);
31462306a36Sopenharmony_ci	} else {
31562306a36Sopenharmony_ci		asm volatile("ptesync" : : : "memory");
31662306a36Sopenharmony_ci		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
31762306a36Sopenharmony_ci			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
31862306a36Sopenharmony_ci		/* do we need fixup here ?*/
31962306a36Sopenharmony_ci		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
32062306a36Sopenharmony_ci		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
32162306a36Sopenharmony_ci	}
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_civoid mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
32562306a36Sopenharmony_ci				  unsigned long dw1, bool flush)
32662306a36Sopenharmony_ci{
32762306a36Sopenharmony_ci	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	/*
33062306a36Sopenharmony_ci	 * When ultravisor is enabled, the partition table is stored in secure
33162306a36Sopenharmony_ci	 * memory and can only be accessed doing an ultravisor call. However, we
33262306a36Sopenharmony_ci	 * maintain a copy of the partition table in normal memory to allow Nest
33362306a36Sopenharmony_ci	 * MMU translations to occur (for normal VMs).
33462306a36Sopenharmony_ci	 *
33562306a36Sopenharmony_ci	 * Therefore, here we always update partition_tb, regardless of whether
33662306a36Sopenharmony_ci	 * we are running under an ultravisor or not.
33762306a36Sopenharmony_ci	 */
33862306a36Sopenharmony_ci	partition_tb[lpid].patb0 = cpu_to_be64(dw0);
33962306a36Sopenharmony_ci	partition_tb[lpid].patb1 = cpu_to_be64(dw1);
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	/*
34262306a36Sopenharmony_ci	 * If ultravisor is enabled, we do an ultravisor call to register the
34362306a36Sopenharmony_ci	 * partition table entry (PATE), which also do a global flush of TLBs
34462306a36Sopenharmony_ci	 * and partition table caches for the lpid. Otherwise, just do the
34562306a36Sopenharmony_ci	 * flush. The type of flush (hash or radix) depends on what the previous
34662306a36Sopenharmony_ci	 * use of the partition ID was, not the new use.
34762306a36Sopenharmony_ci	 */
34862306a36Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
34962306a36Sopenharmony_ci		uv_register_pate(lpid, dw0, dw1);
35062306a36Sopenharmony_ci		pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
35162306a36Sopenharmony_ci			dw0, dw1);
35262306a36Sopenharmony_ci	} else if (flush) {
35362306a36Sopenharmony_ci		/*
35462306a36Sopenharmony_ci		 * Boot does not need to flush, because MMU is off and each
35562306a36Sopenharmony_ci		 * CPU does a tlbiel_all() before switching them on, which
35662306a36Sopenharmony_ci		 * flushes everything.
35762306a36Sopenharmony_ci		 */
35862306a36Sopenharmony_ci		flush_partition(lpid, (old & PATB_HR));
35962306a36Sopenharmony_ci	}
36062306a36Sopenharmony_ci}
36162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_cistatic pmd_t *get_pmd_from_cache(struct mm_struct *mm)
36462306a36Sopenharmony_ci{
36562306a36Sopenharmony_ci	void *pmd_frag, *ret;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	if (PMD_FRAG_NR == 1)
36862306a36Sopenharmony_ci		return NULL;
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	spin_lock(&mm->page_table_lock);
37162306a36Sopenharmony_ci	ret = mm->context.pmd_frag;
37262306a36Sopenharmony_ci	if (ret) {
37362306a36Sopenharmony_ci		pmd_frag = ret + PMD_FRAG_SIZE;
37462306a36Sopenharmony_ci		/*
37562306a36Sopenharmony_ci		 * If we have taken up all the fragments mark PTE page NULL
37662306a36Sopenharmony_ci		 */
37762306a36Sopenharmony_ci		if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0)
37862306a36Sopenharmony_ci			pmd_frag = NULL;
37962306a36Sopenharmony_ci		mm->context.pmd_frag = pmd_frag;
38062306a36Sopenharmony_ci	}
38162306a36Sopenharmony_ci	spin_unlock(&mm->page_table_lock);
38262306a36Sopenharmony_ci	return (pmd_t *)ret;
38362306a36Sopenharmony_ci}
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_cistatic pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	void *ret = NULL;
38862306a36Sopenharmony_ci	struct ptdesc *ptdesc;
38962306a36Sopenharmony_ci	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	if (mm == &init_mm)
39262306a36Sopenharmony_ci		gfp &= ~__GFP_ACCOUNT;
39362306a36Sopenharmony_ci	ptdesc = pagetable_alloc(gfp, 0);
39462306a36Sopenharmony_ci	if (!ptdesc)
39562306a36Sopenharmony_ci		return NULL;
39662306a36Sopenharmony_ci	if (!pagetable_pmd_ctor(ptdesc)) {
39762306a36Sopenharmony_ci		pagetable_free(ptdesc);
39862306a36Sopenharmony_ci		return NULL;
39962306a36Sopenharmony_ci	}
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	atomic_set(&ptdesc->pt_frag_refcount, 1);
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	ret = ptdesc_address(ptdesc);
40462306a36Sopenharmony_ci	/*
40562306a36Sopenharmony_ci	 * if we support only one fragment just return the
40662306a36Sopenharmony_ci	 * allocated page.
40762306a36Sopenharmony_ci	 */
40862306a36Sopenharmony_ci	if (PMD_FRAG_NR == 1)
40962306a36Sopenharmony_ci		return ret;
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci	spin_lock(&mm->page_table_lock);
41262306a36Sopenharmony_ci	/*
41362306a36Sopenharmony_ci	 * If we find ptdesc_page set, we return
41462306a36Sopenharmony_ci	 * the allocated page with single fragment
41562306a36Sopenharmony_ci	 * count.
41662306a36Sopenharmony_ci	 */
41762306a36Sopenharmony_ci	if (likely(!mm->context.pmd_frag)) {
41862306a36Sopenharmony_ci		atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR);
41962306a36Sopenharmony_ci		mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
42062306a36Sopenharmony_ci	}
42162306a36Sopenharmony_ci	spin_unlock(&mm->page_table_lock);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	return (pmd_t *)ret;
42462306a36Sopenharmony_ci}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_cipmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
42762306a36Sopenharmony_ci{
42862306a36Sopenharmony_ci	pmd_t *pmd;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	pmd = get_pmd_from_cache(mm);
43162306a36Sopenharmony_ci	if (pmd)
43262306a36Sopenharmony_ci		return pmd;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	return __alloc_for_pmdcache(mm);
43562306a36Sopenharmony_ci}
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_civoid pmd_fragment_free(unsigned long *pmd)
43862306a36Sopenharmony_ci{
43962306a36Sopenharmony_ci	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	if (pagetable_is_reserved(ptdesc))
44262306a36Sopenharmony_ci		return free_reserved_ptdesc(ptdesc);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
44562306a36Sopenharmony_ci	if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
44662306a36Sopenharmony_ci		pagetable_pmd_dtor(ptdesc);
44762306a36Sopenharmony_ci		pagetable_free(ptdesc);
44862306a36Sopenharmony_ci	}
44962306a36Sopenharmony_ci}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_cistatic inline void pgtable_free(void *table, int index)
45262306a36Sopenharmony_ci{
45362306a36Sopenharmony_ci	switch (index) {
45462306a36Sopenharmony_ci	case PTE_INDEX:
45562306a36Sopenharmony_ci		pte_fragment_free(table, 0);
45662306a36Sopenharmony_ci		break;
45762306a36Sopenharmony_ci	case PMD_INDEX:
45862306a36Sopenharmony_ci		pmd_fragment_free(table);
45962306a36Sopenharmony_ci		break;
46062306a36Sopenharmony_ci	case PUD_INDEX:
46162306a36Sopenharmony_ci		__pud_free(table);
46262306a36Sopenharmony_ci		break;
46362306a36Sopenharmony_ci#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
46462306a36Sopenharmony_ci		/* 16M hugepd directory at pud level */
46562306a36Sopenharmony_ci	case HTLB_16M_INDEX:
46662306a36Sopenharmony_ci		BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
46762306a36Sopenharmony_ci		kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
46862306a36Sopenharmony_ci		break;
46962306a36Sopenharmony_ci		/* 16G hugepd directory at the pgd level */
47062306a36Sopenharmony_ci	case HTLB_16G_INDEX:
47162306a36Sopenharmony_ci		BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
47262306a36Sopenharmony_ci		kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
47362306a36Sopenharmony_ci		break;
47462306a36Sopenharmony_ci#endif
47562306a36Sopenharmony_ci		/* We don't free pgd table via RCU callback */
47662306a36Sopenharmony_ci	default:
47762306a36Sopenharmony_ci		BUG();
47862306a36Sopenharmony_ci	}
47962306a36Sopenharmony_ci}
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_civoid pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
48262306a36Sopenharmony_ci{
48362306a36Sopenharmony_ci	unsigned long pgf = (unsigned long)table;
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	BUG_ON(index > MAX_PGTABLE_INDEX_SIZE);
48662306a36Sopenharmony_ci	pgf |= index;
48762306a36Sopenharmony_ci	tlb_remove_table(tlb, (void *)pgf);
48862306a36Sopenharmony_ci}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_civoid __tlb_remove_table(void *_table)
49162306a36Sopenharmony_ci{
49262306a36Sopenharmony_ci	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
49362306a36Sopenharmony_ci	unsigned int index = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci	return pgtable_free(table, index);
49662306a36Sopenharmony_ci}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS
49962306a36Sopenharmony_ciatomic_long_t direct_pages_count[MMU_PAGE_COUNT];
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_civoid arch_report_meminfo(struct seq_file *m)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	/*
50462306a36Sopenharmony_ci	 * Hash maps the memory with one size mmu_linear_psize.
50562306a36Sopenharmony_ci	 * So don't bother to print these on hash
50662306a36Sopenharmony_ci	 */
50762306a36Sopenharmony_ci	if (!radix_enabled())
50862306a36Sopenharmony_ci		return;
50962306a36Sopenharmony_ci	seq_printf(m, "DirectMap4k:    %8lu kB\n",
51062306a36Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2);
51162306a36Sopenharmony_ci	seq_printf(m, "DirectMap64k:    %8lu kB\n",
51262306a36Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6);
51362306a36Sopenharmony_ci	seq_printf(m, "DirectMap2M:    %8lu kB\n",
51462306a36Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11);
51562306a36Sopenharmony_ci	seq_printf(m, "DirectMap1G:    %8lu kB\n",
51662306a36Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_cipte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
52162306a36Sopenharmony_ci			     pte_t *ptep)
52262306a36Sopenharmony_ci{
52362306a36Sopenharmony_ci	unsigned long pte_val;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	/*
52662306a36Sopenharmony_ci	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
52762306a36Sopenharmony_ci	 * possible. Also keep the pte_present true so that we don't take
52862306a36Sopenharmony_ci	 * wrong fault.
52962306a36Sopenharmony_ci	 */
53062306a36Sopenharmony_ci	pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	return __pte(pte_val);
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_civoid ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
53762306a36Sopenharmony_ci			     pte_t *ptep, pte_t old_pte, pte_t pte)
53862306a36Sopenharmony_ci{
53962306a36Sopenharmony_ci	if (radix_enabled())
54062306a36Sopenharmony_ci		return radix__ptep_modify_prot_commit(vma, addr,
54162306a36Sopenharmony_ci						      ptep, old_pte, pte);
54262306a36Sopenharmony_ci	set_pte_at(vma->vm_mm, addr, ptep, pte);
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
54662306a36Sopenharmony_ci/*
54762306a36Sopenharmony_ci * For hash translation mode, we use the deposited table to store hash slot
54862306a36Sopenharmony_ci * information and they are stored at PTRS_PER_PMD offset from related pmd
54962306a36Sopenharmony_ci * location. Hence a pmd move requires deposit and withdraw.
55062306a36Sopenharmony_ci *
55162306a36Sopenharmony_ci * For radix translation with split pmd ptl, we store the deposited table in the
55262306a36Sopenharmony_ci * pmd page. Hence if we have different pmd page we need to withdraw during pmd
55362306a36Sopenharmony_ci * move.
55462306a36Sopenharmony_ci *
55562306a36Sopenharmony_ci * With hash we use deposited table always irrespective of anon or not.
55662306a36Sopenharmony_ci * With radix we use deposited table only for anonymous mapping.
55762306a36Sopenharmony_ci */
55862306a36Sopenharmony_ciint pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
55962306a36Sopenharmony_ci			   struct spinlock *old_pmd_ptl,
56062306a36Sopenharmony_ci			   struct vm_area_struct *vma)
56162306a36Sopenharmony_ci{
56262306a36Sopenharmony_ci	if (radix_enabled())
56362306a36Sopenharmony_ci		return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	return true;
56662306a36Sopenharmony_ci}
56762306a36Sopenharmony_ci#endif
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci/*
57062306a36Sopenharmony_ci * Does the CPU support tlbie?
57162306a36Sopenharmony_ci */
57262306a36Sopenharmony_cibool tlbie_capable __read_mostly = true;
57362306a36Sopenharmony_ciEXPORT_SYMBOL(tlbie_capable);
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci/*
57662306a36Sopenharmony_ci * Should tlbie be used for management of CPU TLBs, for kernel and process
57762306a36Sopenharmony_ci * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
57862306a36Sopenharmony_ci * guest address spaces.
57962306a36Sopenharmony_ci */
58062306a36Sopenharmony_cibool tlbie_enabled __read_mostly = true;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_cistatic int __init setup_disable_tlbie(char *str)
58362306a36Sopenharmony_ci{
58462306a36Sopenharmony_ci	if (!radix_enabled()) {
58562306a36Sopenharmony_ci		pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
58662306a36Sopenharmony_ci		return 1;
58762306a36Sopenharmony_ci	}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	tlbie_capable = false;
59062306a36Sopenharmony_ci	tlbie_enabled = false;
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci        return 1;
59362306a36Sopenharmony_ci}
59462306a36Sopenharmony_ci__setup("disable_tlbie", setup_disable_tlbie);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_cistatic int __init pgtable_debugfs_setup(void)
59762306a36Sopenharmony_ci{
59862306a36Sopenharmony_ci	if (!tlbie_capable)
59962306a36Sopenharmony_ci		return 0;
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	/*
60262306a36Sopenharmony_ci	 * There is no locking vs tlb flushing when changing this value.
60362306a36Sopenharmony_ci	 * The tlb flushers will see one value or another, and use either
60462306a36Sopenharmony_ci	 * tlbie or tlbiel with IPIs. In both cases the TLBs will be
60562306a36Sopenharmony_ci	 * invalidated as expected.
60662306a36Sopenharmony_ci	 */
60762306a36Sopenharmony_ci	debugfs_create_bool("tlbie_enabled", 0600,
60862306a36Sopenharmony_ci			arch_debugfs_dir,
60962306a36Sopenharmony_ci			&tlbie_enabled);
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	return 0;
61262306a36Sopenharmony_ci}
61362306a36Sopenharmony_ciarch_initcall(pgtable_debugfs_setup);
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN)
61662306a36Sopenharmony_ci/*
61762306a36Sopenharmony_ci * Override the generic version in mm/memremap.c.
61862306a36Sopenharmony_ci *
61962306a36Sopenharmony_ci * With hash translation, the direct-map range is mapped with just one
62062306a36Sopenharmony_ci * page size selected by htab_init_page_sizes(). Consult
62162306a36Sopenharmony_ci * mmu_psize_defs[] to determine the minimum page size alignment.
62262306a36Sopenharmony_ci*/
62362306a36Sopenharmony_ciunsigned long memremap_compat_align(void)
62462306a36Sopenharmony_ci{
62562306a36Sopenharmony_ci	if (!radix_enabled()) {
62662306a36Sopenharmony_ci		unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
62762306a36Sopenharmony_ci		return max(SUBSECTION_SIZE, 1UL << shift);
62862306a36Sopenharmony_ci	}
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	return SUBSECTION_SIZE;
63162306a36Sopenharmony_ci}
63262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memremap_compat_align);
63362306a36Sopenharmony_ci#endif
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_cipgprot_t vm_get_page_prot(unsigned long vm_flags)
63662306a36Sopenharmony_ci{
63762306a36Sopenharmony_ci	unsigned long prot;
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	/* Radix supports execute-only, but protection_map maps X -> RX */
64062306a36Sopenharmony_ci	if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) {
64162306a36Sopenharmony_ci		prot = pgprot_val(PAGE_EXECONLY);
64262306a36Sopenharmony_ci	} else {
64362306a36Sopenharmony_ci		prot = pgprot_val(protection_map[vm_flags &
64462306a36Sopenharmony_ci						 (VM_ACCESS_FLAGS | VM_SHARED)]);
64562306a36Sopenharmony_ci	}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	if (vm_flags & VM_SAO)
64862306a36Sopenharmony_ci		prot |= _PAGE_SAO;
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci#ifdef CONFIG_PPC_MEM_KEYS
65162306a36Sopenharmony_ci	prot |= vmflag_to_pte_pkey_bits(vm_flags);
65262306a36Sopenharmony_ci#endif
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	return __pgprot(prot);
65562306a36Sopenharmony_ci}
65662306a36Sopenharmony_ciEXPORT_SYMBOL(vm_get_page_prot);
657