18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *    Copyright IBM Corp. 2006
48c2ecf20Sopenharmony_ci *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include <linux/memblock.h>
88c2ecf20Sopenharmony_ci#include <linux/pfn.h>
98c2ecf20Sopenharmony_ci#include <linux/mm.h>
108c2ecf20Sopenharmony_ci#include <linux/init.h>
118c2ecf20Sopenharmony_ci#include <linux/list.h>
128c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
138c2ecf20Sopenharmony_ci#include <linux/slab.h>
148c2ecf20Sopenharmony_ci#include <asm/cacheflush.h>
158c2ecf20Sopenharmony_ci#include <asm/pgalloc.h>
168c2ecf20Sopenharmony_ci#include <asm/setup.h>
178c2ecf20Sopenharmony_ci#include <asm/tlbflush.h>
188c2ecf20Sopenharmony_ci#include <asm/sections.h>
198c2ecf20Sopenharmony_ci#include <asm/set_memory.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(vmem_mutex);
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_cistatic void __ref *vmem_alloc_pages(unsigned int order)
248c2ecf20Sopenharmony_ci{
258c2ecf20Sopenharmony_ci	unsigned long size = PAGE_SIZE << order;
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci	if (slab_is_available())
288c2ecf20Sopenharmony_ci		return (void *)__get_free_pages(GFP_KERNEL, order);
298c2ecf20Sopenharmony_ci	return (void *) memblock_phys_alloc(size, size);
308c2ecf20Sopenharmony_ci}
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cistatic void vmem_free_pages(unsigned long addr, int order)
338c2ecf20Sopenharmony_ci{
348c2ecf20Sopenharmony_ci	/* We don't expect boot memory to be removed ever. */
358c2ecf20Sopenharmony_ci	if (!slab_is_available() ||
368c2ecf20Sopenharmony_ci	    WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
378c2ecf20Sopenharmony_ci		return;
388c2ecf20Sopenharmony_ci	free_pages(addr, order);
398c2ecf20Sopenharmony_ci}
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_civoid *vmem_crst_alloc(unsigned long val)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	unsigned long *table;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	table = vmem_alloc_pages(CRST_ALLOC_ORDER);
468c2ecf20Sopenharmony_ci	if (table)
478c2ecf20Sopenharmony_ci		crst_table_init(table, val);
488c2ecf20Sopenharmony_ci	return table;
498c2ecf20Sopenharmony_ci}
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cipte_t __ref *vmem_pte_alloc(void)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
548c2ecf20Sopenharmony_ci	pte_t *pte;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	if (slab_is_available())
578c2ecf20Sopenharmony_ci		pte = (pte_t *) page_table_alloc(&init_mm);
588c2ecf20Sopenharmony_ci	else
598c2ecf20Sopenharmony_ci		pte = (pte_t *) memblock_phys_alloc(size, size);
608c2ecf20Sopenharmony_ci	if (!pte)
618c2ecf20Sopenharmony_ci		return NULL;
628c2ecf20Sopenharmony_ci	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
638c2ecf20Sopenharmony_ci	return pte;
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_cistatic void vmem_pte_free(unsigned long *table)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	/* We don't expect boot memory to be removed ever. */
698c2ecf20Sopenharmony_ci	if (!slab_is_available() ||
708c2ecf20Sopenharmony_ci	    WARN_ON_ONCE(PageReserved(virt_to_page(table))))
718c2ecf20Sopenharmony_ci		return;
728c2ecf20Sopenharmony_ci	page_table_free(&init_mm, table);
738c2ecf20Sopenharmony_ci}
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci#define PAGE_UNUSED 0xFD
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci/*
788c2ecf20Sopenharmony_ci * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
798c2ecf20Sopenharmony_ci * from unused_pmd_start to next PMD_SIZE boundary.
808c2ecf20Sopenharmony_ci */
818c2ecf20Sopenharmony_cistatic unsigned long unused_pmd_start;
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_cistatic void vmemmap_flush_unused_pmd(void)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	if (!unused_pmd_start)
868c2ecf20Sopenharmony_ci		return;
878c2ecf20Sopenharmony_ci	memset(__va(unused_pmd_start), PAGE_UNUSED,
888c2ecf20Sopenharmony_ci	       ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
898c2ecf20Sopenharmony_ci	unused_pmd_start = 0;
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_cistatic void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	/*
958c2ecf20Sopenharmony_ci	 * As we expect to add in the same granularity as we remove, it's
968c2ecf20Sopenharmony_ci	 * sufficient to mark only some piece used to block the memmap page from
978c2ecf20Sopenharmony_ci	 * getting removed (just in case the memmap never gets initialized,
988c2ecf20Sopenharmony_ci	 * e.g., because the memory block never gets onlined).
998c2ecf20Sopenharmony_ci	 */
1008c2ecf20Sopenharmony_ci	memset(__va(start), 0, sizeof(struct page));
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cistatic void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
1048c2ecf20Sopenharmony_ci{
1058c2ecf20Sopenharmony_ci	/*
1068c2ecf20Sopenharmony_ci	 * We only optimize if the new used range directly follows the
1078c2ecf20Sopenharmony_ci	 * previously unused range (esp., when populating consecutive sections).
1088c2ecf20Sopenharmony_ci	 */
1098c2ecf20Sopenharmony_ci	if (unused_pmd_start == start) {
1108c2ecf20Sopenharmony_ci		unused_pmd_start = end;
1118c2ecf20Sopenharmony_ci		if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
1128c2ecf20Sopenharmony_ci			unused_pmd_start = 0;
1138c2ecf20Sopenharmony_ci		return;
1148c2ecf20Sopenharmony_ci	}
1158c2ecf20Sopenharmony_ci	vmemmap_flush_unused_pmd();
1168c2ecf20Sopenharmony_ci	__vmemmap_use_sub_pmd(start, end);
1178c2ecf20Sopenharmony_ci}
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_cistatic void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	vmemmap_flush_unused_pmd();
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	/* Could be our memmap page is filled with PAGE_UNUSED already ... */
1268c2ecf20Sopenharmony_ci	__vmemmap_use_sub_pmd(start, end);
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
1298c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(start, PMD_SIZE))
1308c2ecf20Sopenharmony_ci		memset(page, PAGE_UNUSED, start - __pa(page));
1318c2ecf20Sopenharmony_ci	/*
1328c2ecf20Sopenharmony_ci	 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
1338c2ecf20Sopenharmony_ci	 * consecutive sections. Remember for the last added PMD the last
1348c2ecf20Sopenharmony_ci	 * unused range in the populated PMD.
1358c2ecf20Sopenharmony_ci	 */
1368c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(end, PMD_SIZE))
1378c2ecf20Sopenharmony_ci		unused_pmd_start = end;
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci/* Returns true if the PMD is completely unused and can be freed. */
1418c2ecf20Sopenharmony_cistatic bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	vmemmap_flush_unused_pmd();
1468c2ecf20Sopenharmony_ci	memset(__va(start), PAGE_UNUSED, end - start);
1478c2ecf20Sopenharmony_ci	return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
1518c2ecf20Sopenharmony_cistatic int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
1528c2ecf20Sopenharmony_ci				  unsigned long end, bool add, bool direct)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	unsigned long prot, pages = 0;
1558c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
1568c2ecf20Sopenharmony_ci	pte_t *pte;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	prot = pgprot_val(PAGE_KERNEL);
1598c2ecf20Sopenharmony_ci	if (!MACHINE_HAS_NX)
1608c2ecf20Sopenharmony_ci		prot &= ~_PAGE_NOEXEC;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	pte = pte_offset_kernel(pmd, addr);
1638c2ecf20Sopenharmony_ci	for (; addr < end; addr += PAGE_SIZE, pte++) {
1648c2ecf20Sopenharmony_ci		if (!add) {
1658c2ecf20Sopenharmony_ci			if (pte_none(*pte))
1668c2ecf20Sopenharmony_ci				continue;
1678c2ecf20Sopenharmony_ci			if (!direct)
1688c2ecf20Sopenharmony_ci				vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
1698c2ecf20Sopenharmony_ci			pte_clear(&init_mm, addr, pte);
1708c2ecf20Sopenharmony_ci		} else if (pte_none(*pte)) {
1718c2ecf20Sopenharmony_ci			if (!direct) {
1728c2ecf20Sopenharmony_ci				void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci				if (!new_page)
1758c2ecf20Sopenharmony_ci					goto out;
1768c2ecf20Sopenharmony_ci				pte_val(*pte) = __pa(new_page) | prot;
1778c2ecf20Sopenharmony_ci			} else {
1788c2ecf20Sopenharmony_ci				pte_val(*pte) = addr | prot;
1798c2ecf20Sopenharmony_ci			}
1808c2ecf20Sopenharmony_ci		} else {
1818c2ecf20Sopenharmony_ci			continue;
1828c2ecf20Sopenharmony_ci		}
1838c2ecf20Sopenharmony_ci		pages++;
1848c2ecf20Sopenharmony_ci	}
1858c2ecf20Sopenharmony_ci	ret = 0;
1868c2ecf20Sopenharmony_ciout:
1878c2ecf20Sopenharmony_ci	if (direct)
1888c2ecf20Sopenharmony_ci		update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
1898c2ecf20Sopenharmony_ci	return ret;
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_cistatic void try_free_pte_table(pmd_t *pmd, unsigned long start)
1938c2ecf20Sopenharmony_ci{
1948c2ecf20Sopenharmony_ci	pte_t *pte;
1958c2ecf20Sopenharmony_ci	int i;
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	/* We can safely assume this is fully in 1:1 mapping & vmemmap area */
1988c2ecf20Sopenharmony_ci	pte = pte_offset_kernel(pmd, start);
1998c2ecf20Sopenharmony_ci	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
2008c2ecf20Sopenharmony_ci		if (!pte_none(*pte))
2018c2ecf20Sopenharmony_ci			return;
2028c2ecf20Sopenharmony_ci	}
2038c2ecf20Sopenharmony_ci	vmem_pte_free(__va(pmd_deref(*pmd)));
2048c2ecf20Sopenharmony_ci	pmd_clear(pmd);
2058c2ecf20Sopenharmony_ci}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
2088c2ecf20Sopenharmony_cistatic int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
2098c2ecf20Sopenharmony_ci				  unsigned long end, bool add, bool direct)
2108c2ecf20Sopenharmony_ci{
2118c2ecf20Sopenharmony_ci	unsigned long next, prot, pages = 0;
2128c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
2138c2ecf20Sopenharmony_ci	pmd_t *pmd;
2148c2ecf20Sopenharmony_ci	pte_t *pte;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	prot = pgprot_val(SEGMENT_KERNEL);
2178c2ecf20Sopenharmony_ci	if (!MACHINE_HAS_NX)
2188c2ecf20Sopenharmony_ci		prot &= ~_SEGMENT_ENTRY_NOEXEC;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	pmd = pmd_offset(pud, addr);
2218c2ecf20Sopenharmony_ci	for (; addr < end; addr = next, pmd++) {
2228c2ecf20Sopenharmony_ci		next = pmd_addr_end(addr, end);
2238c2ecf20Sopenharmony_ci		if (!add) {
2248c2ecf20Sopenharmony_ci			if (pmd_none(*pmd))
2258c2ecf20Sopenharmony_ci				continue;
2268c2ecf20Sopenharmony_ci			if (pmd_large(*pmd) && !add) {
2278c2ecf20Sopenharmony_ci				if (IS_ALIGNED(addr, PMD_SIZE) &&
2288c2ecf20Sopenharmony_ci				    IS_ALIGNED(next, PMD_SIZE)) {
2298c2ecf20Sopenharmony_ci					if (!direct)
2308c2ecf20Sopenharmony_ci						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
2318c2ecf20Sopenharmony_ci					pmd_clear(pmd);
2328c2ecf20Sopenharmony_ci					pages++;
2338c2ecf20Sopenharmony_ci				} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
2348c2ecf20Sopenharmony_ci					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
2358c2ecf20Sopenharmony_ci					pmd_clear(pmd);
2368c2ecf20Sopenharmony_ci				}
2378c2ecf20Sopenharmony_ci				continue;
2388c2ecf20Sopenharmony_ci			}
2398c2ecf20Sopenharmony_ci		} else if (pmd_none(*pmd)) {
2408c2ecf20Sopenharmony_ci			if (IS_ALIGNED(addr, PMD_SIZE) &&
2418c2ecf20Sopenharmony_ci			    IS_ALIGNED(next, PMD_SIZE) &&
2428c2ecf20Sopenharmony_ci			    MACHINE_HAS_EDAT1 && addr && direct &&
2438c2ecf20Sopenharmony_ci			    !debug_pagealloc_enabled()) {
2448c2ecf20Sopenharmony_ci				pmd_val(*pmd) = addr | prot;
2458c2ecf20Sopenharmony_ci				pages++;
2468c2ecf20Sopenharmony_ci				continue;
2478c2ecf20Sopenharmony_ci			} else if (!direct && MACHINE_HAS_EDAT1) {
2488c2ecf20Sopenharmony_ci				void *new_page;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci				/*
2518c2ecf20Sopenharmony_ci				 * Use 1MB frames for vmemmap if available. We
2528c2ecf20Sopenharmony_ci				 * always use large frames even if they are only
2538c2ecf20Sopenharmony_ci				 * partially used. Otherwise we would have also
2548c2ecf20Sopenharmony_ci				 * page tables since vmemmap_populate gets
2558c2ecf20Sopenharmony_ci				 * called for each section separately.
2568c2ecf20Sopenharmony_ci				 */
2578c2ecf20Sopenharmony_ci				new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
2588c2ecf20Sopenharmony_ci				if (new_page) {
2598c2ecf20Sopenharmony_ci					pmd_val(*pmd) = __pa(new_page) | prot;
2608c2ecf20Sopenharmony_ci					if (!IS_ALIGNED(addr, PMD_SIZE) ||
2618c2ecf20Sopenharmony_ci					    !IS_ALIGNED(next, PMD_SIZE)) {
2628c2ecf20Sopenharmony_ci						vmemmap_use_new_sub_pmd(addr, next);
2638c2ecf20Sopenharmony_ci					}
2648c2ecf20Sopenharmony_ci					continue;
2658c2ecf20Sopenharmony_ci				}
2668c2ecf20Sopenharmony_ci			}
2678c2ecf20Sopenharmony_ci			pte = vmem_pte_alloc();
2688c2ecf20Sopenharmony_ci			if (!pte)
2698c2ecf20Sopenharmony_ci				goto out;
2708c2ecf20Sopenharmony_ci			pmd_populate(&init_mm, pmd, pte);
2718c2ecf20Sopenharmony_ci		} else if (pmd_large(*pmd)) {
2728c2ecf20Sopenharmony_ci			if (!direct)
2738c2ecf20Sopenharmony_ci				vmemmap_use_sub_pmd(addr, next);
2748c2ecf20Sopenharmony_ci			continue;
2758c2ecf20Sopenharmony_ci		}
2768c2ecf20Sopenharmony_ci		ret = modify_pte_table(pmd, addr, next, add, direct);
2778c2ecf20Sopenharmony_ci		if (ret)
2788c2ecf20Sopenharmony_ci			goto out;
2798c2ecf20Sopenharmony_ci		if (!add)
2808c2ecf20Sopenharmony_ci			try_free_pte_table(pmd, addr & PMD_MASK);
2818c2ecf20Sopenharmony_ci	}
2828c2ecf20Sopenharmony_ci	ret = 0;
2838c2ecf20Sopenharmony_ciout:
2848c2ecf20Sopenharmony_ci	if (direct)
2858c2ecf20Sopenharmony_ci		update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
2868c2ecf20Sopenharmony_ci	return ret;
2878c2ecf20Sopenharmony_ci}
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_cistatic void try_free_pmd_table(pud_t *pud, unsigned long start)
2908c2ecf20Sopenharmony_ci{
2918c2ecf20Sopenharmony_ci	const unsigned long end = start + PUD_SIZE;
2928c2ecf20Sopenharmony_ci	pmd_t *pmd;
2938c2ecf20Sopenharmony_ci	int i;
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
2968c2ecf20Sopenharmony_ci	if (end > VMALLOC_START)
2978c2ecf20Sopenharmony_ci		return;
2988c2ecf20Sopenharmony_ci#ifdef CONFIG_KASAN
2998c2ecf20Sopenharmony_ci	if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
3008c2ecf20Sopenharmony_ci		return;
3018c2ecf20Sopenharmony_ci#endif
3028c2ecf20Sopenharmony_ci	pmd = pmd_offset(pud, start);
3038c2ecf20Sopenharmony_ci	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
3048c2ecf20Sopenharmony_ci		if (!pmd_none(*pmd))
3058c2ecf20Sopenharmony_ci			return;
3068c2ecf20Sopenharmony_ci	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
3078c2ecf20Sopenharmony_ci	pud_clear(pud);
3088c2ecf20Sopenharmony_ci}
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_cistatic int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
3118c2ecf20Sopenharmony_ci			    bool add, bool direct)
3128c2ecf20Sopenharmony_ci{
3138c2ecf20Sopenharmony_ci	unsigned long next, prot, pages = 0;
3148c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
3158c2ecf20Sopenharmony_ci	pud_t *pud;
3168c2ecf20Sopenharmony_ci	pmd_t *pmd;
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	prot = pgprot_val(REGION3_KERNEL);
3198c2ecf20Sopenharmony_ci	if (!MACHINE_HAS_NX)
3208c2ecf20Sopenharmony_ci		prot &= ~_REGION_ENTRY_NOEXEC;
3218c2ecf20Sopenharmony_ci	pud = pud_offset(p4d, addr);
3228c2ecf20Sopenharmony_ci	for (; addr < end; addr = next, pud++) {
3238c2ecf20Sopenharmony_ci		next = pud_addr_end(addr, end);
3248c2ecf20Sopenharmony_ci		if (!add) {
3258c2ecf20Sopenharmony_ci			if (pud_none(*pud))
3268c2ecf20Sopenharmony_ci				continue;
3278c2ecf20Sopenharmony_ci			if (pud_large(*pud)) {
3288c2ecf20Sopenharmony_ci				if (IS_ALIGNED(addr, PUD_SIZE) &&
3298c2ecf20Sopenharmony_ci				    IS_ALIGNED(next, PUD_SIZE)) {
3308c2ecf20Sopenharmony_ci					pud_clear(pud);
3318c2ecf20Sopenharmony_ci					pages++;
3328c2ecf20Sopenharmony_ci				}
3338c2ecf20Sopenharmony_ci				continue;
3348c2ecf20Sopenharmony_ci			}
3358c2ecf20Sopenharmony_ci		} else if (pud_none(*pud)) {
3368c2ecf20Sopenharmony_ci			if (IS_ALIGNED(addr, PUD_SIZE) &&
3378c2ecf20Sopenharmony_ci			    IS_ALIGNED(next, PUD_SIZE) &&
3388c2ecf20Sopenharmony_ci			    MACHINE_HAS_EDAT2 && addr && direct &&
3398c2ecf20Sopenharmony_ci			    !debug_pagealloc_enabled()) {
3408c2ecf20Sopenharmony_ci				pud_val(*pud) = addr | prot;
3418c2ecf20Sopenharmony_ci				pages++;
3428c2ecf20Sopenharmony_ci				continue;
3438c2ecf20Sopenharmony_ci			}
3448c2ecf20Sopenharmony_ci			pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
3458c2ecf20Sopenharmony_ci			if (!pmd)
3468c2ecf20Sopenharmony_ci				goto out;
3478c2ecf20Sopenharmony_ci			pud_populate(&init_mm, pud, pmd);
3488c2ecf20Sopenharmony_ci		} else if (pud_large(*pud)) {
3498c2ecf20Sopenharmony_ci			continue;
3508c2ecf20Sopenharmony_ci		}
3518c2ecf20Sopenharmony_ci		ret = modify_pmd_table(pud, addr, next, add, direct);
3528c2ecf20Sopenharmony_ci		if (ret)
3538c2ecf20Sopenharmony_ci			goto out;
3548c2ecf20Sopenharmony_ci		if (!add)
3558c2ecf20Sopenharmony_ci			try_free_pmd_table(pud, addr & PUD_MASK);
3568c2ecf20Sopenharmony_ci	}
3578c2ecf20Sopenharmony_ci	ret = 0;
3588c2ecf20Sopenharmony_ciout:
3598c2ecf20Sopenharmony_ci	if (direct)
3608c2ecf20Sopenharmony_ci		update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
3618c2ecf20Sopenharmony_ci	return ret;
3628c2ecf20Sopenharmony_ci}
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_cistatic void try_free_pud_table(p4d_t *p4d, unsigned long start)
3658c2ecf20Sopenharmony_ci{
3668c2ecf20Sopenharmony_ci	const unsigned long end = start + P4D_SIZE;
3678c2ecf20Sopenharmony_ci	pud_t *pud;
3688c2ecf20Sopenharmony_ci	int i;
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
3718c2ecf20Sopenharmony_ci	if (end > VMALLOC_START)
3728c2ecf20Sopenharmony_ci		return;
3738c2ecf20Sopenharmony_ci#ifdef CONFIG_KASAN
3748c2ecf20Sopenharmony_ci	if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
3758c2ecf20Sopenharmony_ci		return;
3768c2ecf20Sopenharmony_ci#endif
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci	pud = pud_offset(p4d, start);
3798c2ecf20Sopenharmony_ci	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
3808c2ecf20Sopenharmony_ci		if (!pud_none(*pud))
3818c2ecf20Sopenharmony_ci			return;
3828c2ecf20Sopenharmony_ci	}
3838c2ecf20Sopenharmony_ci	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
3848c2ecf20Sopenharmony_ci	p4d_clear(p4d);
3858c2ecf20Sopenharmony_ci}
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_cistatic int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
3888c2ecf20Sopenharmony_ci			    bool add, bool direct)
3898c2ecf20Sopenharmony_ci{
3908c2ecf20Sopenharmony_ci	unsigned long next;
3918c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
3928c2ecf20Sopenharmony_ci	p4d_t *p4d;
3938c2ecf20Sopenharmony_ci	pud_t *pud;
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci	p4d = p4d_offset(pgd, addr);
3968c2ecf20Sopenharmony_ci	for (; addr < end; addr = next, p4d++) {
3978c2ecf20Sopenharmony_ci		next = p4d_addr_end(addr, end);
3988c2ecf20Sopenharmony_ci		if (!add) {
3998c2ecf20Sopenharmony_ci			if (p4d_none(*p4d))
4008c2ecf20Sopenharmony_ci				continue;
4018c2ecf20Sopenharmony_ci		} else if (p4d_none(*p4d)) {
4028c2ecf20Sopenharmony_ci			pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
4038c2ecf20Sopenharmony_ci			if (!pud)
4048c2ecf20Sopenharmony_ci				goto out;
4058c2ecf20Sopenharmony_ci			p4d_populate(&init_mm, p4d, pud);
4068c2ecf20Sopenharmony_ci		}
4078c2ecf20Sopenharmony_ci		ret = modify_pud_table(p4d, addr, next, add, direct);
4088c2ecf20Sopenharmony_ci		if (ret)
4098c2ecf20Sopenharmony_ci			goto out;
4108c2ecf20Sopenharmony_ci		if (!add)
4118c2ecf20Sopenharmony_ci			try_free_pud_table(p4d, addr & P4D_MASK);
4128c2ecf20Sopenharmony_ci	}
4138c2ecf20Sopenharmony_ci	ret = 0;
4148c2ecf20Sopenharmony_ciout:
4158c2ecf20Sopenharmony_ci	return ret;
4168c2ecf20Sopenharmony_ci}
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_cistatic void try_free_p4d_table(pgd_t *pgd, unsigned long start)
4198c2ecf20Sopenharmony_ci{
4208c2ecf20Sopenharmony_ci	const unsigned long end = start + PGDIR_SIZE;
4218c2ecf20Sopenharmony_ci	p4d_t *p4d;
4228c2ecf20Sopenharmony_ci	int i;
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
4258c2ecf20Sopenharmony_ci	if (end > VMALLOC_START)
4268c2ecf20Sopenharmony_ci		return;
4278c2ecf20Sopenharmony_ci#ifdef CONFIG_KASAN
4288c2ecf20Sopenharmony_ci	if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
4298c2ecf20Sopenharmony_ci		return;
4308c2ecf20Sopenharmony_ci#endif
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	p4d = p4d_offset(pgd, start);
4338c2ecf20Sopenharmony_ci	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
4348c2ecf20Sopenharmony_ci		if (!p4d_none(*p4d))
4358c2ecf20Sopenharmony_ci			return;
4368c2ecf20Sopenharmony_ci	}
4378c2ecf20Sopenharmony_ci	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
4388c2ecf20Sopenharmony_ci	pgd_clear(pgd);
4398c2ecf20Sopenharmony_ci}
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_cistatic int modify_pagetable(unsigned long start, unsigned long end, bool add,
4428c2ecf20Sopenharmony_ci			    bool direct)
4438c2ecf20Sopenharmony_ci{
4448c2ecf20Sopenharmony_ci	unsigned long addr, next;
4458c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
4468c2ecf20Sopenharmony_ci	pgd_t *pgd;
4478c2ecf20Sopenharmony_ci	p4d_t *p4d;
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
4508c2ecf20Sopenharmony_ci		return -EINVAL;
4518c2ecf20Sopenharmony_ci	for (addr = start; addr < end; addr = next) {
4528c2ecf20Sopenharmony_ci		next = pgd_addr_end(addr, end);
4538c2ecf20Sopenharmony_ci		pgd = pgd_offset_k(addr);
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci		if (!add) {
4568c2ecf20Sopenharmony_ci			if (pgd_none(*pgd))
4578c2ecf20Sopenharmony_ci				continue;
4588c2ecf20Sopenharmony_ci		} else if (pgd_none(*pgd)) {
4598c2ecf20Sopenharmony_ci			p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
4608c2ecf20Sopenharmony_ci			if (!p4d)
4618c2ecf20Sopenharmony_ci				goto out;
4628c2ecf20Sopenharmony_ci			pgd_populate(&init_mm, pgd, p4d);
4638c2ecf20Sopenharmony_ci		}
4648c2ecf20Sopenharmony_ci		ret = modify_p4d_table(pgd, addr, next, add, direct);
4658c2ecf20Sopenharmony_ci		if (ret)
4668c2ecf20Sopenharmony_ci			goto out;
4678c2ecf20Sopenharmony_ci		if (!add)
4688c2ecf20Sopenharmony_ci			try_free_p4d_table(pgd, addr & PGDIR_MASK);
4698c2ecf20Sopenharmony_ci	}
4708c2ecf20Sopenharmony_ci	ret = 0;
4718c2ecf20Sopenharmony_ciout:
4728c2ecf20Sopenharmony_ci	if (!add)
4738c2ecf20Sopenharmony_ci		flush_tlb_kernel_range(start, end);
4748c2ecf20Sopenharmony_ci	return ret;
4758c2ecf20Sopenharmony_ci}
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_cistatic int add_pagetable(unsigned long start, unsigned long end, bool direct)
4788c2ecf20Sopenharmony_ci{
4798c2ecf20Sopenharmony_ci	return modify_pagetable(start, end, true, direct);
4808c2ecf20Sopenharmony_ci}
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_cistatic int remove_pagetable(unsigned long start, unsigned long end, bool direct)
4838c2ecf20Sopenharmony_ci{
4848c2ecf20Sopenharmony_ci	return modify_pagetable(start, end, false, direct);
4858c2ecf20Sopenharmony_ci}
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci/*
4888c2ecf20Sopenharmony_ci * Add a physical memory range to the 1:1 mapping.
4898c2ecf20Sopenharmony_ci */
4908c2ecf20Sopenharmony_cistatic int vmem_add_range(unsigned long start, unsigned long size)
4918c2ecf20Sopenharmony_ci{
4928c2ecf20Sopenharmony_ci	return add_pagetable(start, start + size, true);
4938c2ecf20Sopenharmony_ci}
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci/*
4968c2ecf20Sopenharmony_ci * Remove a physical memory range from the 1:1 mapping.
4978c2ecf20Sopenharmony_ci */
4988c2ecf20Sopenharmony_cistatic void vmem_remove_range(unsigned long start, unsigned long size)
4998c2ecf20Sopenharmony_ci{
5008c2ecf20Sopenharmony_ci	remove_pagetable(start, start + size, true);
5018c2ecf20Sopenharmony_ci}
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci/*
5048c2ecf20Sopenharmony_ci * Add a backed mem_map array to the virtual mem_map array.
5058c2ecf20Sopenharmony_ci */
5068c2ecf20Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
5078c2ecf20Sopenharmony_ci			       struct vmem_altmap *altmap)
5088c2ecf20Sopenharmony_ci{
5098c2ecf20Sopenharmony_ci	int ret;
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci	mutex_lock(&vmem_mutex);
5128c2ecf20Sopenharmony_ci	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
5138c2ecf20Sopenharmony_ci	ret = add_pagetable(start, end, false);
5148c2ecf20Sopenharmony_ci	if (ret)
5158c2ecf20Sopenharmony_ci		remove_pagetable(start, end, false);
5168c2ecf20Sopenharmony_ci	mutex_unlock(&vmem_mutex);
5178c2ecf20Sopenharmony_ci	return ret;
5188c2ecf20Sopenharmony_ci}
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_civoid vmemmap_free(unsigned long start, unsigned long end,
5218c2ecf20Sopenharmony_ci		  struct vmem_altmap *altmap)
5228c2ecf20Sopenharmony_ci{
5238c2ecf20Sopenharmony_ci	mutex_lock(&vmem_mutex);
5248c2ecf20Sopenharmony_ci	remove_pagetable(start, end, false);
5258c2ecf20Sopenharmony_ci	mutex_unlock(&vmem_mutex);
5268c2ecf20Sopenharmony_ci}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_civoid vmem_remove_mapping(unsigned long start, unsigned long size)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	mutex_lock(&vmem_mutex);
5318c2ecf20Sopenharmony_ci	vmem_remove_range(start, size);
5328c2ecf20Sopenharmony_ci	mutex_unlock(&vmem_mutex);
5338c2ecf20Sopenharmony_ci}
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ciint vmem_add_mapping(unsigned long start, unsigned long size)
5368c2ecf20Sopenharmony_ci{
5378c2ecf20Sopenharmony_ci	int ret;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	if (start + size > VMEM_MAX_PHYS ||
5408c2ecf20Sopenharmony_ci	    start + size < start)
5418c2ecf20Sopenharmony_ci		return -ERANGE;
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci	mutex_lock(&vmem_mutex);
5448c2ecf20Sopenharmony_ci	ret = vmem_add_range(start, size);
5458c2ecf20Sopenharmony_ci	if (ret)
5468c2ecf20Sopenharmony_ci		vmem_remove_range(start, size);
5478c2ecf20Sopenharmony_ci	mutex_unlock(&vmem_mutex);
5488c2ecf20Sopenharmony_ci	return ret;
5498c2ecf20Sopenharmony_ci}
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci/*
5528c2ecf20Sopenharmony_ci * map whole physical memory to virtual memory (identity mapping)
5538c2ecf20Sopenharmony_ci * we reserve enough space in the vmalloc area for vmemmap to hotplug
5548c2ecf20Sopenharmony_ci * additional memory segments.
5558c2ecf20Sopenharmony_ci */
5568c2ecf20Sopenharmony_civoid __init vmem_map_init(void)
5578c2ecf20Sopenharmony_ci{
5588c2ecf20Sopenharmony_ci	phys_addr_t base, end;
5598c2ecf20Sopenharmony_ci	u64 i;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	for_each_mem_range(i, &base, &end)
5628c2ecf20Sopenharmony_ci		vmem_add_range(base, end - base);
5638c2ecf20Sopenharmony_ci	__set_memory((unsigned long)_stext,
5648c2ecf20Sopenharmony_ci		     (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
5658c2ecf20Sopenharmony_ci		     SET_MEMORY_RO | SET_MEMORY_X);
5668c2ecf20Sopenharmony_ci	__set_memory((unsigned long)_etext,
5678c2ecf20Sopenharmony_ci		     (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
5688c2ecf20Sopenharmony_ci		     SET_MEMORY_RO);
5698c2ecf20Sopenharmony_ci	__set_memory((unsigned long)_sinittext,
5708c2ecf20Sopenharmony_ci		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
5718c2ecf20Sopenharmony_ci		     SET_MEMORY_RO | SET_MEMORY_X);
5728c2ecf20Sopenharmony_ci	__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
5738c2ecf20Sopenharmony_ci		     SET_MEMORY_RO | SET_MEMORY_X);
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	/* we need lowcore executable for our LPSWE instructions */
5768c2ecf20Sopenharmony_ci	set_memory_x(0, 1);
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	pr_info("Write protected kernel read-only data: %luk\n",
5798c2ecf20Sopenharmony_ci		(unsigned long)(__end_rodata - _stext) >> 10);
5808c2ecf20Sopenharmony_ci}
581