162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright 2005, Paul Mackerras, IBM Corporation.
462306a36Sopenharmony_ci * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
562306a36Sopenharmony_ci * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/sched.h>
962306a36Sopenharmony_ci#include <linux/mm_types.h>
1062306a36Sopenharmony_ci#include <linux/mm.h>
1162306a36Sopenharmony_ci#include <linux/stop_machine.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <asm/sections.h>
1462306a36Sopenharmony_ci#include <asm/mmu.h>
1562306a36Sopenharmony_ci#include <asm/tlb.h>
1662306a36Sopenharmony_ci#include <asm/firmware.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include <mm/mmu_decl.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include <trace/events/thp.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
2362306a36Sopenharmony_ci#warning Limited user VSID range means pagetable space is wasted
2462306a36Sopenharmony_ci#endif
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP
2762306a36Sopenharmony_ci/*
2862306a36Sopenharmony_ci * vmemmap is the starting address of the virtual address space where
2962306a36Sopenharmony_ci * struct pages are allocated for all possible PFNs present on the system
3062306a36Sopenharmony_ci * including holes and bad memory (hence sparse). These virtual struct
3162306a36Sopenharmony_ci * pages are stored in sequence in this virtual address space irrespective
3262306a36Sopenharmony_ci * of the fact whether the corresponding PFN is valid or not. This achieves
3362306a36Sopenharmony_ci * constant relationship between address of struct page and its PFN.
3462306a36Sopenharmony_ci *
3562306a36Sopenharmony_ci * During boot or memory hotplug operation when a new memory section is
3662306a36Sopenharmony_ci * added, physical memory allocation (including hash table bolting) will
3762306a36Sopenharmony_ci * be performed for the set of struct pages which are part of the memory
3862306a36Sopenharmony_ci * section. This saves memory by not allocating struct pages for PFNs
3962306a36Sopenharmony_ci * which are not valid.
4062306a36Sopenharmony_ci *
4162306a36Sopenharmony_ci *		----------------------------------------------
4262306a36Sopenharmony_ci *		| PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES|
4362306a36Sopenharmony_ci *		----------------------------------------------
4462306a36Sopenharmony_ci *
4562306a36Sopenharmony_ci *	   f000000000000000                  c000000000000000
4662306a36Sopenharmony_ci * vmemmap +--------------+                  +--------------+
4762306a36Sopenharmony_ci *  +      |  page struct | +--------------> |  page struct |
4862306a36Sopenharmony_ci *  |      +--------------+                  +--------------+
4962306a36Sopenharmony_ci *  |      |  page struct | +--------------> |  page struct |
5062306a36Sopenharmony_ci *  |      +--------------+ |                +--------------+
5162306a36Sopenharmony_ci *  |      |  page struct | +       +------> |  page struct |
5262306a36Sopenharmony_ci *  |      +--------------+         |        +--------------+
5362306a36Sopenharmony_ci *  |      |  page struct |         |   +--> |  page struct |
5462306a36Sopenharmony_ci *  |      +--------------+         |   |    +--------------+
5562306a36Sopenharmony_ci *  |      |  page struct |         |   |
5662306a36Sopenharmony_ci *  |      +--------------+         |   |
5762306a36Sopenharmony_ci *  |      |  page struct |         |   |
5862306a36Sopenharmony_ci *  |      +--------------+         |   |
5962306a36Sopenharmony_ci *  |      |  page struct |         |   |
6062306a36Sopenharmony_ci *  |      +--------------+         |   |
6162306a36Sopenharmony_ci *  |      |  page struct |         |   |
6262306a36Sopenharmony_ci *  |      +--------------+         |   |
6362306a36Sopenharmony_ci *  |      |  page struct | +-------+   |
6462306a36Sopenharmony_ci *  |      +--------------+             |
6562306a36Sopenharmony_ci *  |      |  page struct | +-----------+
6662306a36Sopenharmony_ci *  |      +--------------+
6762306a36Sopenharmony_ci *  |      |  page struct | No mapping
6862306a36Sopenharmony_ci *  |      +--------------+
6962306a36Sopenharmony_ci *  |      |  page struct | No mapping
7062306a36Sopenharmony_ci *  v      +--------------+
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci *		-----------------------------------------
7362306a36Sopenharmony_ci *		| RELATION BETWEEN STRUCT PAGES AND PFNS|
7462306a36Sopenharmony_ci *		-----------------------------------------
7562306a36Sopenharmony_ci *
7662306a36Sopenharmony_ci * vmemmap +--------------+                 +---------------+
7762306a36Sopenharmony_ci *  +      |  page struct | +-------------> |      PFN      |
7862306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
7962306a36Sopenharmony_ci *  |      |  page struct | +-------------> |      PFN      |
8062306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
8162306a36Sopenharmony_ci *  |      |  page struct | +-------------> |      PFN      |
8262306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
8362306a36Sopenharmony_ci *  |      |  page struct | +-------------> |      PFN      |
8462306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
8562306a36Sopenharmony_ci *  |      |              |
8662306a36Sopenharmony_ci *  |      +--------------+
8762306a36Sopenharmony_ci *  |      |              |
8862306a36Sopenharmony_ci *  |      +--------------+
8962306a36Sopenharmony_ci *  |      |              |
9062306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
9162306a36Sopenharmony_ci *  |      |  page struct | +-------------> |      PFN      |
9262306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
9362306a36Sopenharmony_ci *  |      |              |
9462306a36Sopenharmony_ci *  |      +--------------+
9562306a36Sopenharmony_ci *  |      |              |
9662306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
9762306a36Sopenharmony_ci *  |      |  page struct | +-------------> |      PFN      |
9862306a36Sopenharmony_ci *  |      +--------------+                 +---------------+
9962306a36Sopenharmony_ci *  |      |  page struct | +-------------> |      PFN      |
10062306a36Sopenharmony_ci *  v      +--------------+                 +---------------+
10162306a36Sopenharmony_ci */
10262306a36Sopenharmony_ci/*
10362306a36Sopenharmony_ci * On hash-based CPUs, the vmemmap is bolted in the hash table.
10462306a36Sopenharmony_ci *
10562306a36Sopenharmony_ci */
10662306a36Sopenharmony_ciint __meminit hash__vmemmap_create_mapping(unsigned long start,
10762306a36Sopenharmony_ci				       unsigned long page_size,
10862306a36Sopenharmony_ci				       unsigned long phys)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	int rc;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if ((start + page_size) >= H_VMEMMAP_END) {
11362306a36Sopenharmony_ci		pr_warn("Outside the supported range\n");
11462306a36Sopenharmony_ci		return -1;
11562306a36Sopenharmony_ci	}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	rc = htab_bolt_mapping(start, start + page_size, phys,
11862306a36Sopenharmony_ci			       pgprot_val(PAGE_KERNEL),
11962306a36Sopenharmony_ci			       mmu_vmemmap_psize, mmu_kernel_ssize);
12062306a36Sopenharmony_ci	if (rc < 0) {
12162306a36Sopenharmony_ci		int rc2 = htab_remove_mapping(start, start + page_size,
12262306a36Sopenharmony_ci					      mmu_vmemmap_psize,
12362306a36Sopenharmony_ci					      mmu_kernel_ssize);
12462306a36Sopenharmony_ci		BUG_ON(rc2 && (rc2 != -ENOENT));
12562306a36Sopenharmony_ci	}
12662306a36Sopenharmony_ci	return rc;
12762306a36Sopenharmony_ci}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
13062306a36Sopenharmony_civoid hash__vmemmap_remove_mapping(unsigned long start,
13162306a36Sopenharmony_ci			      unsigned long page_size)
13262306a36Sopenharmony_ci{
13362306a36Sopenharmony_ci	int rc = htab_remove_mapping(start, start + page_size,
13462306a36Sopenharmony_ci				     mmu_vmemmap_psize,
13562306a36Sopenharmony_ci				     mmu_kernel_ssize);
13662306a36Sopenharmony_ci	BUG_ON((rc < 0) && (rc != -ENOENT));
13762306a36Sopenharmony_ci	WARN_ON(rc == -ENOENT);
13862306a36Sopenharmony_ci}
13962306a36Sopenharmony_ci#endif
14062306a36Sopenharmony_ci#endif /* CONFIG_SPARSEMEM_VMEMMAP */
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci/*
14362306a36Sopenharmony_ci * map_kernel_page currently only called by __ioremap
14462306a36Sopenharmony_ci * map_kernel_page adds an entry to the ioremap page table
14562306a36Sopenharmony_ci * and adds an entry to the HPT, possibly bolting it
14662306a36Sopenharmony_ci */
14762306a36Sopenharmony_ciint hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	pgd_t *pgdp;
15062306a36Sopenharmony_ci	p4d_t *p4dp;
15162306a36Sopenharmony_ci	pud_t *pudp;
15262306a36Sopenharmony_ci	pmd_t *pmdp;
15362306a36Sopenharmony_ci	pte_t *ptep;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
15662306a36Sopenharmony_ci	if (slab_is_available()) {
15762306a36Sopenharmony_ci		pgdp = pgd_offset_k(ea);
15862306a36Sopenharmony_ci		p4dp = p4d_offset(pgdp, ea);
15962306a36Sopenharmony_ci		pudp = pud_alloc(&init_mm, p4dp, ea);
16062306a36Sopenharmony_ci		if (!pudp)
16162306a36Sopenharmony_ci			return -ENOMEM;
16262306a36Sopenharmony_ci		pmdp = pmd_alloc(&init_mm, pudp, ea);
16362306a36Sopenharmony_ci		if (!pmdp)
16462306a36Sopenharmony_ci			return -ENOMEM;
16562306a36Sopenharmony_ci		ptep = pte_alloc_kernel(pmdp, ea);
16662306a36Sopenharmony_ci		if (!ptep)
16762306a36Sopenharmony_ci			return -ENOMEM;
16862306a36Sopenharmony_ci		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
16962306a36Sopenharmony_ci	} else {
17062306a36Sopenharmony_ci		/*
17162306a36Sopenharmony_ci		 * If the mm subsystem is not fully up, we cannot create a
17262306a36Sopenharmony_ci		 * linux page table entry for this mapping.  Simply bolt an
17362306a36Sopenharmony_ci		 * entry in the hardware page table.
17462306a36Sopenharmony_ci		 *
17562306a36Sopenharmony_ci		 */
17662306a36Sopenharmony_ci		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
17762306a36Sopenharmony_ci				      mmu_io_psize, mmu_kernel_ssize)) {
17862306a36Sopenharmony_ci			printk(KERN_ERR "Failed to do bolted mapping IO "
17962306a36Sopenharmony_ci			       "memory at %016lx !\n", pa);
18062306a36Sopenharmony_ci			return -ENOMEM;
18162306a36Sopenharmony_ci		}
18262306a36Sopenharmony_ci	}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	smp_wmb();
18562306a36Sopenharmony_ci	return 0;
18662306a36Sopenharmony_ci}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ciunsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
19162306a36Sopenharmony_ci				    pmd_t *pmdp, unsigned long clr,
19262306a36Sopenharmony_ci				    unsigned long set)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	__be64 old_be, tmp;
19562306a36Sopenharmony_ci	unsigned long old;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
19862306a36Sopenharmony_ci	WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
19962306a36Sopenharmony_ci	assert_spin_locked(pmd_lockptr(mm, pmdp));
20062306a36Sopenharmony_ci#endif
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	__asm__ __volatile__(
20362306a36Sopenharmony_ci	"1:	ldarx	%0,0,%3\n\
20462306a36Sopenharmony_ci		and.	%1,%0,%6\n\
20562306a36Sopenharmony_ci		bne-	1b \n\
20662306a36Sopenharmony_ci		andc	%1,%0,%4 \n\
20762306a36Sopenharmony_ci		or	%1,%1,%7\n\
20862306a36Sopenharmony_ci		stdcx.	%1,0,%3 \n\
20962306a36Sopenharmony_ci		bne-	1b"
21062306a36Sopenharmony_ci	: "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
21162306a36Sopenharmony_ci	: "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
21262306a36Sopenharmony_ci	  "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
21362306a36Sopenharmony_ci	: "cc" );
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	old = be64_to_cpu(old_be);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	trace_hugepage_update_pmd(addr, old, clr, set);
21862306a36Sopenharmony_ci	if (old & H_PAGE_HASHPTE)
21962306a36Sopenharmony_ci		hpte_do_hugepage_flush(mm, addr, pmdp, old);
22062306a36Sopenharmony_ci	return old;
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cipmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
22462306a36Sopenharmony_ci			    pmd_t *pmdp)
22562306a36Sopenharmony_ci{
22662306a36Sopenharmony_ci	pmd_t pmd;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
22962306a36Sopenharmony_ci	VM_BUG_ON(pmd_trans_huge(*pmdp));
23062306a36Sopenharmony_ci	VM_BUG_ON(pmd_devmap(*pmdp));
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	pmd = *pmdp;
23362306a36Sopenharmony_ci	pmd_clear(pmdp);
23462306a36Sopenharmony_ci	/*
23562306a36Sopenharmony_ci	 * Wait for all pending hash_page to finish. This is needed
23662306a36Sopenharmony_ci	 * in case of subpage collapse. When we collapse normal pages
23762306a36Sopenharmony_ci	 * to hugepage, we first clear the pmd, then invalidate all
23862306a36Sopenharmony_ci	 * the PTE entries. The assumption here is that any low level
23962306a36Sopenharmony_ci	 * page fault will see a none pmd and take the slow path that
24062306a36Sopenharmony_ci	 * will wait on mmap_lock. But we could very well be in a
24162306a36Sopenharmony_ci	 * hash_page with local ptep pointer value. Such a hash page
24262306a36Sopenharmony_ci	 * can result in adding new HPTE entries for normal subpages.
24362306a36Sopenharmony_ci	 * That means we could be modifying the page content as we
24462306a36Sopenharmony_ci	 * copy them to a huge page. So wait for parallel hash_page
24562306a36Sopenharmony_ci	 * to finish before invalidating HPTE entries. We can do this
24662306a36Sopenharmony_ci	 * by sending an IPI to all the cpus and executing a dummy
24762306a36Sopenharmony_ci	 * function there.
24862306a36Sopenharmony_ci	 */
24962306a36Sopenharmony_ci	serialize_against_pte_lookup(vma->vm_mm);
25062306a36Sopenharmony_ci	/*
25162306a36Sopenharmony_ci	 * Now invalidate the hpte entries in the range
25262306a36Sopenharmony_ci	 * covered by pmd. This make sure we take a
25362306a36Sopenharmony_ci	 * fault and will find the pmd as none, which will
25462306a36Sopenharmony_ci	 * result in a major fault which takes mmap_lock and
25562306a36Sopenharmony_ci	 * hence wait for collapse to complete. Without this
25662306a36Sopenharmony_ci	 * the __collapse_huge_page_copy can result in copying
25762306a36Sopenharmony_ci	 * the old content.
25862306a36Sopenharmony_ci	 */
25962306a36Sopenharmony_ci	flush_hash_table_pmd_range(vma->vm_mm, &pmd, address);
26062306a36Sopenharmony_ci	return pmd;
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci/*
26462306a36Sopenharmony_ci * We want to put the pgtable in pmd and use pgtable for tracking
26562306a36Sopenharmony_ci * the base page size hptes
26662306a36Sopenharmony_ci */
26762306a36Sopenharmony_civoid hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
26862306a36Sopenharmony_ci				  pgtable_t pgtable)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	pgtable_t *pgtable_slot;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	assert_spin_locked(pmd_lockptr(mm, pmdp));
27362306a36Sopenharmony_ci	/*
27462306a36Sopenharmony_ci	 * we store the pgtable in the second half of PMD
27562306a36Sopenharmony_ci	 */
27662306a36Sopenharmony_ci	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
27762306a36Sopenharmony_ci	*pgtable_slot = pgtable;
27862306a36Sopenharmony_ci	/*
27962306a36Sopenharmony_ci	 * expose the deposited pgtable to other cpus.
28062306a36Sopenharmony_ci	 * before we set the hugepage PTE at pmd level
28162306a36Sopenharmony_ci	 * hash fault code looks at the deposted pgtable
28262306a36Sopenharmony_ci	 * to store hash index values.
28362306a36Sopenharmony_ci	 */
28462306a36Sopenharmony_ci	smp_wmb();
28562306a36Sopenharmony_ci}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_cipgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
28862306a36Sopenharmony_ci{
28962306a36Sopenharmony_ci	pgtable_t pgtable;
29062306a36Sopenharmony_ci	pgtable_t *pgtable_slot;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	assert_spin_locked(pmd_lockptr(mm, pmdp));
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
29562306a36Sopenharmony_ci	pgtable = *pgtable_slot;
29662306a36Sopenharmony_ci	/*
29762306a36Sopenharmony_ci	 * Once we withdraw, mark the entry NULL.
29862306a36Sopenharmony_ci	 */
29962306a36Sopenharmony_ci	*pgtable_slot = NULL;
30062306a36Sopenharmony_ci	/*
30162306a36Sopenharmony_ci	 * We store HPTE information in the deposited PTE fragment.
30262306a36Sopenharmony_ci	 * zero out the content on withdraw.
30362306a36Sopenharmony_ci	 */
30462306a36Sopenharmony_ci	memset(pgtable, 0, PTE_FRAG_SIZE);
30562306a36Sopenharmony_ci	return pgtable;
30662306a36Sopenharmony_ci}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci/*
30962306a36Sopenharmony_ci * A linux hugepage PMD was changed and the corresponding hash table entries
31062306a36Sopenharmony_ci * neesd to be flushed.
31162306a36Sopenharmony_ci */
31262306a36Sopenharmony_civoid hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
31362306a36Sopenharmony_ci			    pmd_t *pmdp, unsigned long old_pmd)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	int ssize;
31662306a36Sopenharmony_ci	unsigned int psize;
31762306a36Sopenharmony_ci	unsigned long vsid;
31862306a36Sopenharmony_ci	unsigned long flags = 0;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	/* get the base page size,vsid and segment size */
32162306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_VM
32262306a36Sopenharmony_ci	psize = get_slice_psize(mm, addr);
32362306a36Sopenharmony_ci	BUG_ON(psize == MMU_PAGE_16M);
32462306a36Sopenharmony_ci#endif
32562306a36Sopenharmony_ci	if (old_pmd & H_PAGE_COMBO)
32662306a36Sopenharmony_ci		psize = MMU_PAGE_4K;
32762306a36Sopenharmony_ci	else
32862306a36Sopenharmony_ci		psize = MMU_PAGE_64K;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci	if (!is_kernel_addr(addr)) {
33162306a36Sopenharmony_ci		ssize = user_segment_size(addr);
33262306a36Sopenharmony_ci		vsid = get_user_vsid(&mm->context, addr, ssize);
33362306a36Sopenharmony_ci		WARN_ON(vsid == 0);
33462306a36Sopenharmony_ci	} else {
33562306a36Sopenharmony_ci		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
33662306a36Sopenharmony_ci		ssize = mmu_kernel_ssize;
33762306a36Sopenharmony_ci	}
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	if (mm_is_thread_local(mm))
34062306a36Sopenharmony_ci		flags |= HPTE_LOCAL_UPDATE;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
34362306a36Sopenharmony_ci}
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_cipmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
34662306a36Sopenharmony_ci				unsigned long addr, pmd_t *pmdp)
34762306a36Sopenharmony_ci{
34862306a36Sopenharmony_ci	pmd_t old_pmd;
34962306a36Sopenharmony_ci	pgtable_t pgtable;
35062306a36Sopenharmony_ci	unsigned long old;
35162306a36Sopenharmony_ci	pgtable_t *pgtable_slot;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
35462306a36Sopenharmony_ci	old_pmd = __pmd(old);
35562306a36Sopenharmony_ci	/*
35662306a36Sopenharmony_ci	 * We have pmd == none and we are holding page_table_lock.
35762306a36Sopenharmony_ci	 * So we can safely go and clear the pgtable hash
35862306a36Sopenharmony_ci	 * index info.
35962306a36Sopenharmony_ci	 */
36062306a36Sopenharmony_ci	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
36162306a36Sopenharmony_ci	pgtable = *pgtable_slot;
36262306a36Sopenharmony_ci	/*
36362306a36Sopenharmony_ci	 * Let's zero out old valid and hash index details
36462306a36Sopenharmony_ci	 * hash fault look at them.
36562306a36Sopenharmony_ci	 */
36662306a36Sopenharmony_ci	memset(pgtable, 0, PTE_FRAG_SIZE);
36762306a36Sopenharmony_ci	return old_pmd;
36862306a36Sopenharmony_ci}
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ciint hash__has_transparent_hugepage(void)
37162306a36Sopenharmony_ci{
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
37462306a36Sopenharmony_ci		return 0;
37562306a36Sopenharmony_ci	/*
37662306a36Sopenharmony_ci	 * We support THP only if PMD_SIZE is 16MB.
37762306a36Sopenharmony_ci	 */
37862306a36Sopenharmony_ci	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
37962306a36Sopenharmony_ci		return 0;
38062306a36Sopenharmony_ci	/*
38162306a36Sopenharmony_ci	 * We need to make sure that we support 16MB hugepage in a segment
38262306a36Sopenharmony_ci	 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
38362306a36Sopenharmony_ci	 * of 64K.
38462306a36Sopenharmony_ci	 */
38562306a36Sopenharmony_ci	/*
38662306a36Sopenharmony_ci	 * If we have 64K HPTE, we will be using that by default
38762306a36Sopenharmony_ci	 */
38862306a36Sopenharmony_ci	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
38962306a36Sopenharmony_ci	    (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
39062306a36Sopenharmony_ci		return 0;
39162306a36Sopenharmony_ci	/*
39262306a36Sopenharmony_ci	 * Ok we only have 4K HPTE
39362306a36Sopenharmony_ci	 */
39462306a36Sopenharmony_ci	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
39562306a36Sopenharmony_ci		return 0;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	return 1;
39862306a36Sopenharmony_ci}
39962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci#ifdef CONFIG_STRICT_KERNEL_RWX
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_cistruct change_memory_parms {
40662306a36Sopenharmony_ci	unsigned long start, end, newpp;
40762306a36Sopenharmony_ci	unsigned int step, nr_cpus;
40862306a36Sopenharmony_ci	atomic_t master_cpu;
40962306a36Sopenharmony_ci	atomic_t cpu_counter;
41062306a36Sopenharmony_ci};
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci// We'd rather this was on the stack but it has to be in the RMO
41362306a36Sopenharmony_cistatic struct change_memory_parms chmem_parms;
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci// And therefore we need a lock to protect it from concurrent use
41662306a36Sopenharmony_cistatic DEFINE_MUTEX(chmem_lock);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cistatic void change_memory_range(unsigned long start, unsigned long end,
41962306a36Sopenharmony_ci				unsigned int step, unsigned long newpp)
42062306a36Sopenharmony_ci{
42162306a36Sopenharmony_ci	unsigned long idx;
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
42462306a36Sopenharmony_ci		 start, end, newpp, step);
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	for (idx = start; idx < end; idx += step)
42762306a36Sopenharmony_ci		/* Not sure if we can do much with the return value */
42862306a36Sopenharmony_ci		mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
42962306a36Sopenharmony_ci							mmu_kernel_ssize);
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_cistatic int notrace chmem_secondary_loop(struct change_memory_parms *parms)
43362306a36Sopenharmony_ci{
43462306a36Sopenharmony_ci	unsigned long msr, tmp, flags;
43562306a36Sopenharmony_ci	int *p;
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	p = &parms->cpu_counter.counter;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	local_irq_save(flags);
44062306a36Sopenharmony_ci	hard_irq_disable();
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	asm volatile (
44362306a36Sopenharmony_ci	// Switch to real mode and leave interrupts off
44462306a36Sopenharmony_ci	"mfmsr	%[msr]			;"
44562306a36Sopenharmony_ci	"li	%[tmp], %[MSR_IR_DR]	;"
44662306a36Sopenharmony_ci	"andc	%[tmp], %[msr], %[tmp]	;"
44762306a36Sopenharmony_ci	"mtmsrd %[tmp]			;"
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	// Tell the master we are in real mode
45062306a36Sopenharmony_ci	"1:				"
45162306a36Sopenharmony_ci	"lwarx	%[tmp], 0, %[p]		;"
45262306a36Sopenharmony_ci	"addic	%[tmp], %[tmp], -1	;"
45362306a36Sopenharmony_ci	"stwcx.	%[tmp], 0, %[p]		;"
45462306a36Sopenharmony_ci	"bne-	1b			;"
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	// Spin until the counter goes to zero
45762306a36Sopenharmony_ci	"2:				;"
45862306a36Sopenharmony_ci	"lwz	%[tmp], 0(%[p])		;"
45962306a36Sopenharmony_ci	"cmpwi	%[tmp], 0		;"
46062306a36Sopenharmony_ci	"bne-	2b			;"
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	// Switch back to virtual mode
46362306a36Sopenharmony_ci	"mtmsrd %[msr]			;"
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	: // outputs
46662306a36Sopenharmony_ci	  [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
46762306a36Sopenharmony_ci	: // inputs
46862306a36Sopenharmony_ci	  [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
46962306a36Sopenharmony_ci	: // clobbers
47062306a36Sopenharmony_ci	  "cc", "xer"
47162306a36Sopenharmony_ci	);
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	local_irq_restore(flags);
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	return 0;
47662306a36Sopenharmony_ci}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_cistatic int change_memory_range_fn(void *data)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	struct change_memory_parms *parms = data;
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	// First CPU goes through, all others wait.
48362306a36Sopenharmony_ci	if (atomic_xchg(&parms->master_cpu, 1) == 1)
48462306a36Sopenharmony_ci		return chmem_secondary_loop(parms);
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	// Wait for all but one CPU (this one) to call-in
48762306a36Sopenharmony_ci	while (atomic_read(&parms->cpu_counter) > 1)
48862306a36Sopenharmony_ci		barrier();
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	mb();
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	// Signal the other CPUs that we're done
49562306a36Sopenharmony_ci	atomic_dec(&parms->cpu_counter);
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	return 0;
49862306a36Sopenharmony_ci}
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_cistatic bool hash__change_memory_range(unsigned long start, unsigned long end,
50162306a36Sopenharmony_ci				      unsigned long newpp)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	unsigned int step, shift;
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	shift = mmu_psize_defs[mmu_linear_psize].shift;
50662306a36Sopenharmony_ci	step = 1 << shift;
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	start = ALIGN_DOWN(start, step);
50962306a36Sopenharmony_ci	end = ALIGN(end, step); // aligns up
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	if (start >= end)
51262306a36Sopenharmony_ci		return false;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_LPAR)) {
51562306a36Sopenharmony_ci		mutex_lock(&chmem_lock);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci		chmem_parms.start = start;
51862306a36Sopenharmony_ci		chmem_parms.end = end;
51962306a36Sopenharmony_ci		chmem_parms.step = step;
52062306a36Sopenharmony_ci		chmem_parms.newpp = newpp;
52162306a36Sopenharmony_ci		atomic_set(&chmem_parms.master_cpu, 0);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci		cpus_read_lock();
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci		atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci		// Ensure state is consistent before we call the other CPUs
52862306a36Sopenharmony_ci		mb();
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci		stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
53162306a36Sopenharmony_ci					cpu_online_mask);
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci		cpus_read_unlock();
53462306a36Sopenharmony_ci		mutex_unlock(&chmem_lock);
53562306a36Sopenharmony_ci	} else
53662306a36Sopenharmony_ci		change_memory_range(start, end, step, newpp);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	return true;
53962306a36Sopenharmony_ci}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_civoid hash__mark_rodata_ro(void)
54262306a36Sopenharmony_ci{
54362306a36Sopenharmony_ci	unsigned long start, end, pp;
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	start = (unsigned long)_stext;
54662306a36Sopenharmony_ci	end = (unsigned long)__end_rodata;
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	WARN_ON(!hash__change_memory_range(start, end, pp));
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_civoid hash__mark_initmem_nx(void)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	unsigned long start, end, pp;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	start = (unsigned long)__init_begin;
55862306a36Sopenharmony_ci	end = (unsigned long)__init_end;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	WARN_ON(!hash__change_memory_range(start, end, pp));
56362306a36Sopenharmony_ci}
56462306a36Sopenharmony_ci#endif
565