18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/sched.h>
78c2ecf20Sopenharmony_ci#include <linux/mm_types.h>
88c2ecf20Sopenharmony_ci#include <linux/memblock.h>
98c2ecf20Sopenharmony_ci#include <misc/cxl-base.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <asm/debugfs.h>
128c2ecf20Sopenharmony_ci#include <asm/pgalloc.h>
138c2ecf20Sopenharmony_ci#include <asm/tlb.h>
148c2ecf20Sopenharmony_ci#include <asm/trace.h>
158c2ecf20Sopenharmony_ci#include <asm/powernv.h>
168c2ecf20Sopenharmony_ci#include <asm/firmware.h>
178c2ecf20Sopenharmony_ci#include <asm/ultravisor.h>
188c2ecf20Sopenharmony_ci#include <asm/kexec.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#include <mm/mmu_decl.h>
218c2ecf20Sopenharmony_ci#include <trace/events/thp.h>
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ciunsigned long __pmd_frag_nr;
248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__pmd_frag_nr);
258c2ecf20Sopenharmony_ciunsigned long __pmd_frag_size_shift;
268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__pmd_frag_size_shift);
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
298c2ecf20Sopenharmony_ci/*
308c2ecf20Sopenharmony_ci * This is called when relaxing access to a hugepage. It's also called in the page
318c2ecf20Sopenharmony_ci * fault path when we don't hit any of the major fault cases, ie, a minor
328c2ecf20Sopenharmony_ci * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
338c2ecf20Sopenharmony_ci * handled those two for us, we additionally deal with missing execute
348c2ecf20Sopenharmony_ci * permission here on some processors
358c2ecf20Sopenharmony_ci */
368c2ecf20Sopenharmony_ciint pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
378c2ecf20Sopenharmony_ci			  pmd_t *pmdp, pmd_t entry, int dirty)
388c2ecf20Sopenharmony_ci{
398c2ecf20Sopenharmony_ci	int changed;
408c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
418c2ecf20Sopenharmony_ci	WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
428c2ecf20Sopenharmony_ci	assert_spin_locked(pmd_lockptr(vma->vm_mm, pmdp));
438c2ecf20Sopenharmony_ci#endif
448c2ecf20Sopenharmony_ci	changed = !pmd_same(*(pmdp), entry);
458c2ecf20Sopenharmony_ci	if (changed) {
468c2ecf20Sopenharmony_ci		/*
478c2ecf20Sopenharmony_ci		 * We can use MMU_PAGE_2M here, because only radix
488c2ecf20Sopenharmony_ci		 * path look at the psize.
498c2ecf20Sopenharmony_ci		 */
508c2ecf20Sopenharmony_ci		__ptep_set_access_flags(vma, pmdp_ptep(pmdp),
518c2ecf20Sopenharmony_ci					pmd_pte(entry), address, MMU_PAGE_2M);
528c2ecf20Sopenharmony_ci	}
538c2ecf20Sopenharmony_ci	return changed;
548c2ecf20Sopenharmony_ci}
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ciint pmdp_test_and_clear_young(struct vm_area_struct *vma,
578c2ecf20Sopenharmony_ci			      unsigned long address, pmd_t *pmdp)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
608c2ecf20Sopenharmony_ci}
618c2ecf20Sopenharmony_ci/*
628c2ecf20Sopenharmony_ci * set a new huge pmd. We should not be called for updating
638c2ecf20Sopenharmony_ci * an existing pmd entry. That should go via pmd_hugepage_update.
648c2ecf20Sopenharmony_ci */
658c2ecf20Sopenharmony_civoid set_pmd_at(struct mm_struct *mm, unsigned long addr,
668c2ecf20Sopenharmony_ci		pmd_t *pmdp, pmd_t pmd)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
698c2ecf20Sopenharmony_ci	/*
708c2ecf20Sopenharmony_ci	 * Make sure hardware valid bit is not set. We don't do
718c2ecf20Sopenharmony_ci	 * tlb flush for this update.
728c2ecf20Sopenharmony_ci	 */
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
758c2ecf20Sopenharmony_ci	assert_spin_locked(pmd_lockptr(mm, pmdp));
768c2ecf20Sopenharmony_ci	WARN_ON(!(pmd_large(pmd)));
778c2ecf20Sopenharmony_ci#endif
788c2ecf20Sopenharmony_ci	trace_hugepage_set_pmd(addr, pmd_val(pmd));
798c2ecf20Sopenharmony_ci	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_cistatic void do_nothing(void *unused)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci/*
878c2ecf20Sopenharmony_ci * Serialize against find_current_mm_pte which does lock-less
888c2ecf20Sopenharmony_ci * lookup in page tables with local interrupts disabled. For huge pages
898c2ecf20Sopenharmony_ci * it casts pmd_t to pte_t. Since format of pte_t is different from
908c2ecf20Sopenharmony_ci * pmd_t we want to prevent transit from pmd pointing to page table
918c2ecf20Sopenharmony_ci * to pmd pointing to huge page (and back) while interrupts are disabled.
928c2ecf20Sopenharmony_ci * We clear pmd to possibly replace it with page table pointer in
938c2ecf20Sopenharmony_ci * different code paths. So make sure we wait for the parallel
948c2ecf20Sopenharmony_ci * find_current_mm_pte to finish.
958c2ecf20Sopenharmony_ci */
968c2ecf20Sopenharmony_civoid serialize_against_pte_lookup(struct mm_struct *mm)
978c2ecf20Sopenharmony_ci{
988c2ecf20Sopenharmony_ci	smp_mb();
998c2ecf20Sopenharmony_ci	smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
1008c2ecf20Sopenharmony_ci}
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci/*
1038c2ecf20Sopenharmony_ci * We use this to invalidate a pmdp entry before switching from a
1048c2ecf20Sopenharmony_ci * hugepte to regular pmd entry.
1058c2ecf20Sopenharmony_ci */
1068c2ecf20Sopenharmony_cipmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
1078c2ecf20Sopenharmony_ci		     pmd_t *pmdp)
1088c2ecf20Sopenharmony_ci{
1098c2ecf20Sopenharmony_ci	unsigned long old_pmd;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
1128c2ecf20Sopenharmony_ci	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
1138c2ecf20Sopenharmony_ci	return __pmd(old_pmd);
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cipmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
1178c2ecf20Sopenharmony_ci				   unsigned long addr, pmd_t *pmdp, int full)
1188c2ecf20Sopenharmony_ci{
1198c2ecf20Sopenharmony_ci	pmd_t pmd;
1208c2ecf20Sopenharmony_ci	VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
1218c2ecf20Sopenharmony_ci	VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
1228c2ecf20Sopenharmony_ci		   !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
1238c2ecf20Sopenharmony_ci	pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
1248c2ecf20Sopenharmony_ci	/*
1258c2ecf20Sopenharmony_ci	 * if it not a fullmm flush, then we can possibly end up converting
1268c2ecf20Sopenharmony_ci	 * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
1278c2ecf20Sopenharmony_ci	 * Make sure we flush the tlb in this case.
1288c2ecf20Sopenharmony_ci	 */
1298c2ecf20Sopenharmony_ci	if (!full)
1308c2ecf20Sopenharmony_ci		flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
1318c2ecf20Sopenharmony_ci	return pmd;
1328c2ecf20Sopenharmony_ci}
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_cistatic pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
1358c2ecf20Sopenharmony_ci{
1368c2ecf20Sopenharmony_ci	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
1378c2ecf20Sopenharmony_ci}
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_cipmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
1408c2ecf20Sopenharmony_ci{
1418c2ecf20Sopenharmony_ci	unsigned long pmdv;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
1448c2ecf20Sopenharmony_ci	return pmd_set_protbits(__pmd(pmdv), pgprot);
1458c2ecf20Sopenharmony_ci}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cipmd_t mk_pmd(struct page *page, pgprot_t pgprot)
1488c2ecf20Sopenharmony_ci{
1498c2ecf20Sopenharmony_ci	return pfn_pmd(page_to_pfn(page), pgprot);
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cipmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	unsigned long pmdv;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	pmdv = pmd_val(pmd);
1578c2ecf20Sopenharmony_ci	pmdv &= _HPAGE_CHG_MASK;
1588c2ecf20Sopenharmony_ci	return pmd_set_protbits(__pmd(pmdv), newprot);
1598c2ecf20Sopenharmony_ci}
1608c2ecf20Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci/* For use by kexec */
1638c2ecf20Sopenharmony_civoid mmu_cleanup_all(void)
1648c2ecf20Sopenharmony_ci{
1658c2ecf20Sopenharmony_ci	if (radix_enabled())
1668c2ecf20Sopenharmony_ci		radix__mmu_cleanup_all();
1678c2ecf20Sopenharmony_ci	else if (mmu_hash_ops.hpte_clear_all)
1688c2ecf20Sopenharmony_ci		mmu_hash_ops.hpte_clear_all();
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	reset_sprs();
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
1748c2ecf20Sopenharmony_ciint __meminit create_section_mapping(unsigned long start, unsigned long end,
1758c2ecf20Sopenharmony_ci				     int nid, pgprot_t prot)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci	if (radix_enabled())
1788c2ecf20Sopenharmony_ci		return radix__create_section_mapping(start, end, nid, prot);
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	return hash__create_section_mapping(start, end, nid, prot);
1818c2ecf20Sopenharmony_ci}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ciint __meminit remove_section_mapping(unsigned long start, unsigned long end)
1848c2ecf20Sopenharmony_ci{
1858c2ecf20Sopenharmony_ci	if (radix_enabled())
1868c2ecf20Sopenharmony_ci		return radix__remove_section_mapping(start, end);
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	return hash__remove_section_mapping(start, end);
1898c2ecf20Sopenharmony_ci}
1908c2ecf20Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_civoid __init mmu_partition_table_init(void)
1938c2ecf20Sopenharmony_ci{
1948c2ecf20Sopenharmony_ci	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
1958c2ecf20Sopenharmony_ci	unsigned long ptcr;
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
1988c2ecf20Sopenharmony_ci	/* Initialize the Partition Table with no entries */
1998c2ecf20Sopenharmony_ci	partition_tb = memblock_alloc(patb_size, patb_size);
2008c2ecf20Sopenharmony_ci	if (!partition_tb)
2018c2ecf20Sopenharmony_ci		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
2028c2ecf20Sopenharmony_ci		      __func__, patb_size, patb_size);
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	/*
2058c2ecf20Sopenharmony_ci	 * update partition table control register,
2068c2ecf20Sopenharmony_ci	 * 64 K size.
2078c2ecf20Sopenharmony_ci	 */
2088c2ecf20Sopenharmony_ci	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
2098c2ecf20Sopenharmony_ci	set_ptcr_when_no_uv(ptcr);
2108c2ecf20Sopenharmony_ci	powernv_set_nmmu_ptcr(ptcr);
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistatic void flush_partition(unsigned int lpid, bool radix)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	if (radix) {
2168c2ecf20Sopenharmony_ci		radix__flush_all_lpid(lpid);
2178c2ecf20Sopenharmony_ci		radix__flush_all_lpid_guest(lpid);
2188c2ecf20Sopenharmony_ci	} else {
2198c2ecf20Sopenharmony_ci		asm volatile("ptesync" : : : "memory");
2208c2ecf20Sopenharmony_ci		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
2218c2ecf20Sopenharmony_ci			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
2228c2ecf20Sopenharmony_ci		/* do we need fixup here ?*/
2238c2ecf20Sopenharmony_ci		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
2248c2ecf20Sopenharmony_ci		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
2258c2ecf20Sopenharmony_ci	}
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_civoid mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
2298c2ecf20Sopenharmony_ci				  unsigned long dw1, bool flush)
2308c2ecf20Sopenharmony_ci{
2318c2ecf20Sopenharmony_ci	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	/*
2348c2ecf20Sopenharmony_ci	 * When ultravisor is enabled, the partition table is stored in secure
2358c2ecf20Sopenharmony_ci	 * memory and can only be accessed doing an ultravisor call. However, we
2368c2ecf20Sopenharmony_ci	 * maintain a copy of the partition table in normal memory to allow Nest
2378c2ecf20Sopenharmony_ci	 * MMU translations to occur (for normal VMs).
2388c2ecf20Sopenharmony_ci	 *
2398c2ecf20Sopenharmony_ci	 * Therefore, here we always update partition_tb, regardless of whether
2408c2ecf20Sopenharmony_ci	 * we are running under an ultravisor or not.
2418c2ecf20Sopenharmony_ci	 */
2428c2ecf20Sopenharmony_ci	partition_tb[lpid].patb0 = cpu_to_be64(dw0);
2438c2ecf20Sopenharmony_ci	partition_tb[lpid].patb1 = cpu_to_be64(dw1);
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	/*
2468c2ecf20Sopenharmony_ci	 * If ultravisor is enabled, we do an ultravisor call to register the
2478c2ecf20Sopenharmony_ci	 * partition table entry (PATE), which also do a global flush of TLBs
2488c2ecf20Sopenharmony_ci	 * and partition table caches for the lpid. Otherwise, just do the
2498c2ecf20Sopenharmony_ci	 * flush. The type of flush (hash or radix) depends on what the previous
2508c2ecf20Sopenharmony_ci	 * use of the partition ID was, not the new use.
2518c2ecf20Sopenharmony_ci	 */
2528c2ecf20Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
2538c2ecf20Sopenharmony_ci		uv_register_pate(lpid, dw0, dw1);
2548c2ecf20Sopenharmony_ci		pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
2558c2ecf20Sopenharmony_ci			dw0, dw1);
2568c2ecf20Sopenharmony_ci	} else if (flush) {
2578c2ecf20Sopenharmony_ci		/*
2588c2ecf20Sopenharmony_ci		 * Boot does not need to flush, because MMU is off and each
2598c2ecf20Sopenharmony_ci		 * CPU does a tlbiel_all() before switching them on, which
2608c2ecf20Sopenharmony_ci		 * flushes everything.
2618c2ecf20Sopenharmony_ci		 */
2628c2ecf20Sopenharmony_ci		flush_partition(lpid, (old & PATB_HR));
2638c2ecf20Sopenharmony_ci	}
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cistatic pmd_t *get_pmd_from_cache(struct mm_struct *mm)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	void *pmd_frag, *ret;
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	if (PMD_FRAG_NR == 1)
2728c2ecf20Sopenharmony_ci		return NULL;
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	spin_lock(&mm->page_table_lock);
2758c2ecf20Sopenharmony_ci	ret = mm->context.pmd_frag;
2768c2ecf20Sopenharmony_ci	if (ret) {
2778c2ecf20Sopenharmony_ci		pmd_frag = ret + PMD_FRAG_SIZE;
2788c2ecf20Sopenharmony_ci		/*
2798c2ecf20Sopenharmony_ci		 * If we have taken up all the fragments mark PTE page NULL
2808c2ecf20Sopenharmony_ci		 */
2818c2ecf20Sopenharmony_ci		if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0)
2828c2ecf20Sopenharmony_ci			pmd_frag = NULL;
2838c2ecf20Sopenharmony_ci		mm->context.pmd_frag = pmd_frag;
2848c2ecf20Sopenharmony_ci	}
2858c2ecf20Sopenharmony_ci	spin_unlock(&mm->page_table_lock);
2868c2ecf20Sopenharmony_ci	return (pmd_t *)ret;
2878c2ecf20Sopenharmony_ci}
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_cistatic pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
2908c2ecf20Sopenharmony_ci{
2918c2ecf20Sopenharmony_ci	void *ret = NULL;
2928c2ecf20Sopenharmony_ci	struct page *page;
2938c2ecf20Sopenharmony_ci	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	if (mm == &init_mm)
2968c2ecf20Sopenharmony_ci		gfp &= ~__GFP_ACCOUNT;
2978c2ecf20Sopenharmony_ci	page = alloc_page(gfp);
2988c2ecf20Sopenharmony_ci	if (!page)
2998c2ecf20Sopenharmony_ci		return NULL;
3008c2ecf20Sopenharmony_ci	if (!pgtable_pmd_page_ctor(page)) {
3018c2ecf20Sopenharmony_ci		__free_pages(page, 0);
3028c2ecf20Sopenharmony_ci		return NULL;
3038c2ecf20Sopenharmony_ci	}
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	atomic_set(&page->pt_frag_refcount, 1);
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci	ret = page_address(page);
3088c2ecf20Sopenharmony_ci	/*
3098c2ecf20Sopenharmony_ci	 * if we support only one fragment just return the
3108c2ecf20Sopenharmony_ci	 * allocated page.
3118c2ecf20Sopenharmony_ci	 */
3128c2ecf20Sopenharmony_ci	if (PMD_FRAG_NR == 1)
3138c2ecf20Sopenharmony_ci		return ret;
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	spin_lock(&mm->page_table_lock);
3168c2ecf20Sopenharmony_ci	/*
3178c2ecf20Sopenharmony_ci	 * If we find pgtable_page set, we return
3188c2ecf20Sopenharmony_ci	 * the allocated page with single fragement
3198c2ecf20Sopenharmony_ci	 * count.
3208c2ecf20Sopenharmony_ci	 */
3218c2ecf20Sopenharmony_ci	if (likely(!mm->context.pmd_frag)) {
3228c2ecf20Sopenharmony_ci		atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
3238c2ecf20Sopenharmony_ci		mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
3248c2ecf20Sopenharmony_ci	}
3258c2ecf20Sopenharmony_ci	spin_unlock(&mm->page_table_lock);
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	return (pmd_t *)ret;
3288c2ecf20Sopenharmony_ci}
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_cipmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
3318c2ecf20Sopenharmony_ci{
3328c2ecf20Sopenharmony_ci	pmd_t *pmd;
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	pmd = get_pmd_from_cache(mm);
3358c2ecf20Sopenharmony_ci	if (pmd)
3368c2ecf20Sopenharmony_ci		return pmd;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	return __alloc_for_pmdcache(mm);
3398c2ecf20Sopenharmony_ci}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_civoid pmd_fragment_free(unsigned long *pmd)
3428c2ecf20Sopenharmony_ci{
3438c2ecf20Sopenharmony_ci	struct page *page = virt_to_page(pmd);
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	if (PageReserved(page))
3468c2ecf20Sopenharmony_ci		return free_reserved_page(page);
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
3498c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
3508c2ecf20Sopenharmony_ci		pgtable_pmd_page_dtor(page);
3518c2ecf20Sopenharmony_ci		__free_page(page);
3528c2ecf20Sopenharmony_ci	}
3538c2ecf20Sopenharmony_ci}
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_cistatic inline void pgtable_free(void *table, int index)
3568c2ecf20Sopenharmony_ci{
3578c2ecf20Sopenharmony_ci	switch (index) {
3588c2ecf20Sopenharmony_ci	case PTE_INDEX:
3598c2ecf20Sopenharmony_ci		pte_fragment_free(table, 0);
3608c2ecf20Sopenharmony_ci		break;
3618c2ecf20Sopenharmony_ci	case PMD_INDEX:
3628c2ecf20Sopenharmony_ci		pmd_fragment_free(table);
3638c2ecf20Sopenharmony_ci		break;
3648c2ecf20Sopenharmony_ci	case PUD_INDEX:
3658c2ecf20Sopenharmony_ci		__pud_free(table);
3668c2ecf20Sopenharmony_ci		break;
3678c2ecf20Sopenharmony_ci#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
3688c2ecf20Sopenharmony_ci		/* 16M hugepd directory at pud level */
3698c2ecf20Sopenharmony_ci	case HTLB_16M_INDEX:
3708c2ecf20Sopenharmony_ci		BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
3718c2ecf20Sopenharmony_ci		kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
3728c2ecf20Sopenharmony_ci		break;
3738c2ecf20Sopenharmony_ci		/* 16G hugepd directory at the pgd level */
3748c2ecf20Sopenharmony_ci	case HTLB_16G_INDEX:
3758c2ecf20Sopenharmony_ci		BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
3768c2ecf20Sopenharmony_ci		kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
3778c2ecf20Sopenharmony_ci		break;
3788c2ecf20Sopenharmony_ci#endif
3798c2ecf20Sopenharmony_ci		/* We don't free pgd table via RCU callback */
3808c2ecf20Sopenharmony_ci	default:
3818c2ecf20Sopenharmony_ci		BUG();
3828c2ecf20Sopenharmony_ci	}
3838c2ecf20Sopenharmony_ci}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_civoid pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
3868c2ecf20Sopenharmony_ci{
3878c2ecf20Sopenharmony_ci	unsigned long pgf = (unsigned long)table;
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_ci	BUG_ON(index > MAX_PGTABLE_INDEX_SIZE);
3908c2ecf20Sopenharmony_ci	pgf |= index;
3918c2ecf20Sopenharmony_ci	tlb_remove_table(tlb, (void *)pgf);
3928c2ecf20Sopenharmony_ci}
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_civoid __tlb_remove_table(void *_table)
3958c2ecf20Sopenharmony_ci{
3968c2ecf20Sopenharmony_ci	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
3978c2ecf20Sopenharmony_ci	unsigned int index = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	return pgtable_free(table, index);
4008c2ecf20Sopenharmony_ci}
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS
4038c2ecf20Sopenharmony_ciatomic_long_t direct_pages_count[MMU_PAGE_COUNT];
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_civoid arch_report_meminfo(struct seq_file *m)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	/*
4088c2ecf20Sopenharmony_ci	 * Hash maps the memory with one size mmu_linear_psize.
4098c2ecf20Sopenharmony_ci	 * So don't bother to print these on hash
4108c2ecf20Sopenharmony_ci	 */
4118c2ecf20Sopenharmony_ci	if (!radix_enabled())
4128c2ecf20Sopenharmony_ci		return;
4138c2ecf20Sopenharmony_ci	seq_printf(m, "DirectMap4k:    %8lu kB\n",
4148c2ecf20Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2);
4158c2ecf20Sopenharmony_ci	seq_printf(m, "DirectMap64k:    %8lu kB\n",
4168c2ecf20Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6);
4178c2ecf20Sopenharmony_ci	seq_printf(m, "DirectMap2M:    %8lu kB\n",
4188c2ecf20Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11);
4198c2ecf20Sopenharmony_ci	seq_printf(m, "DirectMap1G:    %8lu kB\n",
4208c2ecf20Sopenharmony_ci		   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
4218c2ecf20Sopenharmony_ci}
4228c2ecf20Sopenharmony_ci#endif /* CONFIG_PROC_FS */
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_cipte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
4258c2ecf20Sopenharmony_ci			     pte_t *ptep)
4268c2ecf20Sopenharmony_ci{
4278c2ecf20Sopenharmony_ci	unsigned long pte_val;
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	/*
4308c2ecf20Sopenharmony_ci	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
4318c2ecf20Sopenharmony_ci	 * possible. Also keep the pte_present true so that we don't take
4328c2ecf20Sopenharmony_ci	 * wrong fault.
4338c2ecf20Sopenharmony_ci	 */
4348c2ecf20Sopenharmony_ci	pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci	return __pte(pte_val);
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci}
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_civoid ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
4418c2ecf20Sopenharmony_ci			     pte_t *ptep, pte_t old_pte, pte_t pte)
4428c2ecf20Sopenharmony_ci{
4438c2ecf20Sopenharmony_ci	if (radix_enabled())
4448c2ecf20Sopenharmony_ci		return radix__ptep_modify_prot_commit(vma, addr,
4458c2ecf20Sopenharmony_ci						      ptep, old_pte, pte);
4468c2ecf20Sopenharmony_ci	set_pte_at(vma->vm_mm, addr, ptep, pte);
4478c2ecf20Sopenharmony_ci}
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
4508c2ecf20Sopenharmony_ci/*
4518c2ecf20Sopenharmony_ci * For hash translation mode, we use the deposited table to store hash slot
4528c2ecf20Sopenharmony_ci * information and they are stored at PTRS_PER_PMD offset from related pmd
4538c2ecf20Sopenharmony_ci * location. Hence a pmd move requires deposit and withdraw.
4548c2ecf20Sopenharmony_ci *
4558c2ecf20Sopenharmony_ci * For radix translation with split pmd ptl, we store the deposited table in the
4568c2ecf20Sopenharmony_ci * pmd page. Hence if we have different pmd page we need to withdraw during pmd
4578c2ecf20Sopenharmony_ci * move.
4588c2ecf20Sopenharmony_ci *
4598c2ecf20Sopenharmony_ci * With hash we use deposited table always irrespective of anon or not.
4608c2ecf20Sopenharmony_ci * With radix we use deposited table only for anonymous mapping.
4618c2ecf20Sopenharmony_ci */
4628c2ecf20Sopenharmony_ciint pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
4638c2ecf20Sopenharmony_ci			   struct spinlock *old_pmd_ptl,
4648c2ecf20Sopenharmony_ci			   struct vm_area_struct *vma)
4658c2ecf20Sopenharmony_ci{
4668c2ecf20Sopenharmony_ci	if (radix_enabled())
4678c2ecf20Sopenharmony_ci		return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	return true;
4708c2ecf20Sopenharmony_ci}
4718c2ecf20Sopenharmony_ci#endif
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci/*
4748c2ecf20Sopenharmony_ci * Does the CPU support tlbie?
4758c2ecf20Sopenharmony_ci */
4768c2ecf20Sopenharmony_cibool tlbie_capable __read_mostly = true;
4778c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tlbie_capable);
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci/*
4808c2ecf20Sopenharmony_ci * Should tlbie be used for management of CPU TLBs, for kernel and process
4818c2ecf20Sopenharmony_ci * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
4828c2ecf20Sopenharmony_ci * guest address spaces.
4838c2ecf20Sopenharmony_ci */
4848c2ecf20Sopenharmony_cibool tlbie_enabled __read_mostly = true;
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_cistatic int __init setup_disable_tlbie(char *str)
4878c2ecf20Sopenharmony_ci{
4888c2ecf20Sopenharmony_ci	if (!radix_enabled()) {
4898c2ecf20Sopenharmony_ci		pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
4908c2ecf20Sopenharmony_ci		return 1;
4918c2ecf20Sopenharmony_ci	}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	tlbie_capable = false;
4948c2ecf20Sopenharmony_ci	tlbie_enabled = false;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci        return 1;
4978c2ecf20Sopenharmony_ci}
4988c2ecf20Sopenharmony_ci__setup("disable_tlbie", setup_disable_tlbie);
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_cistatic int __init pgtable_debugfs_setup(void)
5018c2ecf20Sopenharmony_ci{
5028c2ecf20Sopenharmony_ci	if (!tlbie_capable)
5038c2ecf20Sopenharmony_ci		return 0;
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	/*
5068c2ecf20Sopenharmony_ci	 * There is no locking vs tlb flushing when changing this value.
5078c2ecf20Sopenharmony_ci	 * The tlb flushers will see one value or another, and use either
5088c2ecf20Sopenharmony_ci	 * tlbie or tlbiel with IPIs. In both cases the TLBs will be
5098c2ecf20Sopenharmony_ci	 * invalidated as expected.
5108c2ecf20Sopenharmony_ci	 */
5118c2ecf20Sopenharmony_ci	debugfs_create_bool("tlbie_enabled", 0600,
5128c2ecf20Sopenharmony_ci			powerpc_debugfs_root,
5138c2ecf20Sopenharmony_ci			&tlbie_enabled);
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	return 0;
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ciarch_initcall(pgtable_debugfs_setup);
518