162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *    Copyright IBM Corp. 2006
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/memory_hotplug.h>
762306a36Sopenharmony_ci#include <linux/memblock.h>
862306a36Sopenharmony_ci#include <linux/pfn.h>
962306a36Sopenharmony_ci#include <linux/mm.h>
1062306a36Sopenharmony_ci#include <linux/init.h>
1162306a36Sopenharmony_ci#include <linux/list.h>
1262306a36Sopenharmony_ci#include <linux/hugetlb.h>
1362306a36Sopenharmony_ci#include <linux/slab.h>
1462306a36Sopenharmony_ci#include <linux/sort.h>
1562306a36Sopenharmony_ci#include <asm/page-states.h>
1662306a36Sopenharmony_ci#include <asm/cacheflush.h>
1762306a36Sopenharmony_ci#include <asm/nospec-branch.h>
1862306a36Sopenharmony_ci#include <asm/pgalloc.h>
1962306a36Sopenharmony_ci#include <asm/setup.h>
2062306a36Sopenharmony_ci#include <asm/tlbflush.h>
2162306a36Sopenharmony_ci#include <asm/sections.h>
2262306a36Sopenharmony_ci#include <asm/set_memory.h>
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistatic DEFINE_MUTEX(vmem_mutex);
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic void __ref *vmem_alloc_pages(unsigned int order)
2762306a36Sopenharmony_ci{
2862306a36Sopenharmony_ci	unsigned long size = PAGE_SIZE << order;
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci	if (slab_is_available())
3162306a36Sopenharmony_ci		return (void *)__get_free_pages(GFP_KERNEL, order);
3262306a36Sopenharmony_ci	return memblock_alloc(size, size);
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic void vmem_free_pages(unsigned long addr, int order)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	/* We don't expect boot memory to be removed ever. */
3862306a36Sopenharmony_ci	if (!slab_is_available() ||
3962306a36Sopenharmony_ci	    WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
4062306a36Sopenharmony_ci		return;
4162306a36Sopenharmony_ci	free_pages(addr, order);
4262306a36Sopenharmony_ci}
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_civoid *vmem_crst_alloc(unsigned long val)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	unsigned long *table;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	table = vmem_alloc_pages(CRST_ALLOC_ORDER);
4962306a36Sopenharmony_ci	if (!table)
5062306a36Sopenharmony_ci		return NULL;
5162306a36Sopenharmony_ci	crst_table_init(table, val);
5262306a36Sopenharmony_ci	if (slab_is_available())
5362306a36Sopenharmony_ci		arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
5462306a36Sopenharmony_ci	return table;
5562306a36Sopenharmony_ci}
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cipte_t __ref *vmem_pte_alloc(void)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
6062306a36Sopenharmony_ci	pte_t *pte;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	if (slab_is_available())
6362306a36Sopenharmony_ci		pte = (pte_t *) page_table_alloc(&init_mm);
6462306a36Sopenharmony_ci	else
6562306a36Sopenharmony_ci		pte = (pte_t *) memblock_alloc(size, size);
6662306a36Sopenharmony_ci	if (!pte)
6762306a36Sopenharmony_ci		return NULL;
6862306a36Sopenharmony_ci	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
6962306a36Sopenharmony_ci	return pte;
7062306a36Sopenharmony_ci}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_cistatic void vmem_pte_free(unsigned long *table)
7362306a36Sopenharmony_ci{
7462306a36Sopenharmony_ci	/* We don't expect boot memory to be removed ever. */
7562306a36Sopenharmony_ci	if (!slab_is_available() ||
7662306a36Sopenharmony_ci	    WARN_ON_ONCE(PageReserved(virt_to_page(table))))
7762306a36Sopenharmony_ci		return;
7862306a36Sopenharmony_ci	page_table_free(&init_mm, table);
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci#define PAGE_UNUSED 0xFD
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci/*
8462306a36Sopenharmony_ci * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
8562306a36Sopenharmony_ci * from unused_sub_pmd_start to next PMD_SIZE boundary.
8662306a36Sopenharmony_ci */
8762306a36Sopenharmony_cistatic unsigned long unused_sub_pmd_start;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_cistatic void vmemmap_flush_unused_sub_pmd(void)
9062306a36Sopenharmony_ci{
9162306a36Sopenharmony_ci	if (!unused_sub_pmd_start)
9262306a36Sopenharmony_ci		return;
9362306a36Sopenharmony_ci	memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
9462306a36Sopenharmony_ci	       ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
9562306a36Sopenharmony_ci	unused_sub_pmd_start = 0;
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistatic void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
9962306a36Sopenharmony_ci{
10062306a36Sopenharmony_ci	/*
10162306a36Sopenharmony_ci	 * As we expect to add in the same granularity as we remove, it's
10262306a36Sopenharmony_ci	 * sufficient to mark only some piece used to block the memmap page from
10362306a36Sopenharmony_ci	 * getting removed (just in case the memmap never gets initialized,
10462306a36Sopenharmony_ci	 * e.g., because the memory block never gets onlined).
10562306a36Sopenharmony_ci	 */
10662306a36Sopenharmony_ci	memset((void *)start, 0, sizeof(struct page));
10762306a36Sopenharmony_ci}
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistatic void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci	/*
11262306a36Sopenharmony_ci	 * We only optimize if the new used range directly follows the
11362306a36Sopenharmony_ci	 * previously unused range (esp., when populating consecutive sections).
11462306a36Sopenharmony_ci	 */
11562306a36Sopenharmony_ci	if (unused_sub_pmd_start == start) {
11662306a36Sopenharmony_ci		unused_sub_pmd_start = end;
11762306a36Sopenharmony_ci		if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
11862306a36Sopenharmony_ci			unused_sub_pmd_start = 0;
11962306a36Sopenharmony_ci		return;
12062306a36Sopenharmony_ci	}
12162306a36Sopenharmony_ci	vmemmap_flush_unused_sub_pmd();
12262306a36Sopenharmony_ci	vmemmap_mark_sub_pmd_used(start, end);
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_cistatic void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
12662306a36Sopenharmony_ci{
12762306a36Sopenharmony_ci	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	vmemmap_flush_unused_sub_pmd();
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	/* Could be our memmap page is filled with PAGE_UNUSED already ... */
13262306a36Sopenharmony_ci	vmemmap_mark_sub_pmd_used(start, end);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
13562306a36Sopenharmony_ci	if (!IS_ALIGNED(start, PMD_SIZE))
13662306a36Sopenharmony_ci		memset((void *)page, PAGE_UNUSED, start - page);
13762306a36Sopenharmony_ci	/*
13862306a36Sopenharmony_ci	 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
13962306a36Sopenharmony_ci	 * consecutive sections. Remember for the last added PMD the last
14062306a36Sopenharmony_ci	 * unused range in the populated PMD.
14162306a36Sopenharmony_ci	 */
14262306a36Sopenharmony_ci	if (!IS_ALIGNED(end, PMD_SIZE))
14362306a36Sopenharmony_ci		unused_sub_pmd_start = end;
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci/* Returns true if the PMD is completely unused and can be freed. */
14762306a36Sopenharmony_cistatic bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	vmemmap_flush_unused_sub_pmd();
15262306a36Sopenharmony_ci	memset((void *)start, PAGE_UNUSED, end - start);
15362306a36Sopenharmony_ci	return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
15762306a36Sopenharmony_cistatic int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
15862306a36Sopenharmony_ci				  unsigned long end, bool add, bool direct)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	unsigned long prot, pages = 0;
16162306a36Sopenharmony_ci	int ret = -ENOMEM;
16262306a36Sopenharmony_ci	pte_t *pte;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	prot = pgprot_val(PAGE_KERNEL);
16562306a36Sopenharmony_ci	if (!MACHINE_HAS_NX)
16662306a36Sopenharmony_ci		prot &= ~_PAGE_NOEXEC;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	pte = pte_offset_kernel(pmd, addr);
16962306a36Sopenharmony_ci	for (; addr < end; addr += PAGE_SIZE, pte++) {
17062306a36Sopenharmony_ci		if (!add) {
17162306a36Sopenharmony_ci			if (pte_none(*pte))
17262306a36Sopenharmony_ci				continue;
17362306a36Sopenharmony_ci			if (!direct)
17462306a36Sopenharmony_ci				vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
17562306a36Sopenharmony_ci			pte_clear(&init_mm, addr, pte);
17662306a36Sopenharmony_ci		} else if (pte_none(*pte)) {
17762306a36Sopenharmony_ci			if (!direct) {
17862306a36Sopenharmony_ci				void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci				if (!new_page)
18162306a36Sopenharmony_ci					goto out;
18262306a36Sopenharmony_ci				set_pte(pte, __pte(__pa(new_page) | prot));
18362306a36Sopenharmony_ci			} else {
18462306a36Sopenharmony_ci				set_pte(pte, __pte(__pa(addr) | prot));
18562306a36Sopenharmony_ci			}
18662306a36Sopenharmony_ci		} else {
18762306a36Sopenharmony_ci			continue;
18862306a36Sopenharmony_ci		}
18962306a36Sopenharmony_ci		pages++;
19062306a36Sopenharmony_ci	}
19162306a36Sopenharmony_ci	ret = 0;
19262306a36Sopenharmony_ciout:
19362306a36Sopenharmony_ci	if (direct)
19462306a36Sopenharmony_ci		update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
19562306a36Sopenharmony_ci	return ret;
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_cistatic void try_free_pte_table(pmd_t *pmd, unsigned long start)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	pte_t *pte;
20162306a36Sopenharmony_ci	int i;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	/* We can safely assume this is fully in 1:1 mapping & vmemmap area */
20462306a36Sopenharmony_ci	pte = pte_offset_kernel(pmd, start);
20562306a36Sopenharmony_ci	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
20662306a36Sopenharmony_ci		if (!pte_none(*pte))
20762306a36Sopenharmony_ci			return;
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci	vmem_pte_free((unsigned long *) pmd_deref(*pmd));
21062306a36Sopenharmony_ci	pmd_clear(pmd);
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
21462306a36Sopenharmony_cistatic int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
21562306a36Sopenharmony_ci				  unsigned long end, bool add, bool direct)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	unsigned long next, prot, pages = 0;
21862306a36Sopenharmony_ci	int ret = -ENOMEM;
21962306a36Sopenharmony_ci	pmd_t *pmd;
22062306a36Sopenharmony_ci	pte_t *pte;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	prot = pgprot_val(SEGMENT_KERNEL);
22362306a36Sopenharmony_ci	if (!MACHINE_HAS_NX)
22462306a36Sopenharmony_ci		prot &= ~_SEGMENT_ENTRY_NOEXEC;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	pmd = pmd_offset(pud, addr);
22762306a36Sopenharmony_ci	for (; addr < end; addr = next, pmd++) {
22862306a36Sopenharmony_ci		next = pmd_addr_end(addr, end);
22962306a36Sopenharmony_ci		if (!add) {
23062306a36Sopenharmony_ci			if (pmd_none(*pmd))
23162306a36Sopenharmony_ci				continue;
23262306a36Sopenharmony_ci			if (pmd_large(*pmd)) {
23362306a36Sopenharmony_ci				if (IS_ALIGNED(addr, PMD_SIZE) &&
23462306a36Sopenharmony_ci				    IS_ALIGNED(next, PMD_SIZE)) {
23562306a36Sopenharmony_ci					if (!direct)
23662306a36Sopenharmony_ci						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
23762306a36Sopenharmony_ci					pmd_clear(pmd);
23862306a36Sopenharmony_ci					pages++;
23962306a36Sopenharmony_ci				} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
24062306a36Sopenharmony_ci					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
24162306a36Sopenharmony_ci					pmd_clear(pmd);
24262306a36Sopenharmony_ci				}
24362306a36Sopenharmony_ci				continue;
24462306a36Sopenharmony_ci			}
24562306a36Sopenharmony_ci		} else if (pmd_none(*pmd)) {
24662306a36Sopenharmony_ci			if (IS_ALIGNED(addr, PMD_SIZE) &&
24762306a36Sopenharmony_ci			    IS_ALIGNED(next, PMD_SIZE) &&
24862306a36Sopenharmony_ci			    MACHINE_HAS_EDAT1 && direct &&
24962306a36Sopenharmony_ci			    !debug_pagealloc_enabled()) {
25062306a36Sopenharmony_ci				set_pmd(pmd, __pmd(__pa(addr) | prot));
25162306a36Sopenharmony_ci				pages++;
25262306a36Sopenharmony_ci				continue;
25362306a36Sopenharmony_ci			} else if (!direct && MACHINE_HAS_EDAT1) {
25462306a36Sopenharmony_ci				void *new_page;
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci				/*
25762306a36Sopenharmony_ci				 * Use 1MB frames for vmemmap if available. We
25862306a36Sopenharmony_ci				 * always use large frames even if they are only
25962306a36Sopenharmony_ci				 * partially used. Otherwise we would have also
26062306a36Sopenharmony_ci				 * page tables since vmemmap_populate gets
26162306a36Sopenharmony_ci				 * called for each section separately.
26262306a36Sopenharmony_ci				 */
26362306a36Sopenharmony_ci				new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
26462306a36Sopenharmony_ci				if (new_page) {
26562306a36Sopenharmony_ci					set_pmd(pmd, __pmd(__pa(new_page) | prot));
26662306a36Sopenharmony_ci					if (!IS_ALIGNED(addr, PMD_SIZE) ||
26762306a36Sopenharmony_ci					    !IS_ALIGNED(next, PMD_SIZE)) {
26862306a36Sopenharmony_ci						vmemmap_use_new_sub_pmd(addr, next);
26962306a36Sopenharmony_ci					}
27062306a36Sopenharmony_ci					continue;
27162306a36Sopenharmony_ci				}
27262306a36Sopenharmony_ci			}
27362306a36Sopenharmony_ci			pte = vmem_pte_alloc();
27462306a36Sopenharmony_ci			if (!pte)
27562306a36Sopenharmony_ci				goto out;
27662306a36Sopenharmony_ci			pmd_populate(&init_mm, pmd, pte);
27762306a36Sopenharmony_ci		} else if (pmd_large(*pmd)) {
27862306a36Sopenharmony_ci			if (!direct)
27962306a36Sopenharmony_ci				vmemmap_use_sub_pmd(addr, next);
28062306a36Sopenharmony_ci			continue;
28162306a36Sopenharmony_ci		}
28262306a36Sopenharmony_ci		ret = modify_pte_table(pmd, addr, next, add, direct);
28362306a36Sopenharmony_ci		if (ret)
28462306a36Sopenharmony_ci			goto out;
28562306a36Sopenharmony_ci		if (!add)
28662306a36Sopenharmony_ci			try_free_pte_table(pmd, addr & PMD_MASK);
28762306a36Sopenharmony_ci	}
28862306a36Sopenharmony_ci	ret = 0;
28962306a36Sopenharmony_ciout:
29062306a36Sopenharmony_ci	if (direct)
29162306a36Sopenharmony_ci		update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
29262306a36Sopenharmony_ci	return ret;
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic void try_free_pmd_table(pud_t *pud, unsigned long start)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	pmd_t *pmd;
29862306a36Sopenharmony_ci	int i;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	pmd = pmd_offset(pud, start);
30162306a36Sopenharmony_ci	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
30262306a36Sopenharmony_ci		if (!pmd_none(*pmd))
30362306a36Sopenharmony_ci			return;
30462306a36Sopenharmony_ci	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
30562306a36Sopenharmony_ci	pud_clear(pud);
30662306a36Sopenharmony_ci}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_cistatic int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
30962306a36Sopenharmony_ci			    bool add, bool direct)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	unsigned long next, prot, pages = 0;
31262306a36Sopenharmony_ci	int ret = -ENOMEM;
31362306a36Sopenharmony_ci	pud_t *pud;
31462306a36Sopenharmony_ci	pmd_t *pmd;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	prot = pgprot_val(REGION3_KERNEL);
31762306a36Sopenharmony_ci	if (!MACHINE_HAS_NX)
31862306a36Sopenharmony_ci		prot &= ~_REGION_ENTRY_NOEXEC;
31962306a36Sopenharmony_ci	pud = pud_offset(p4d, addr);
32062306a36Sopenharmony_ci	for (; addr < end; addr = next, pud++) {
32162306a36Sopenharmony_ci		next = pud_addr_end(addr, end);
32262306a36Sopenharmony_ci		if (!add) {
32362306a36Sopenharmony_ci			if (pud_none(*pud))
32462306a36Sopenharmony_ci				continue;
32562306a36Sopenharmony_ci			if (pud_large(*pud)) {
32662306a36Sopenharmony_ci				if (IS_ALIGNED(addr, PUD_SIZE) &&
32762306a36Sopenharmony_ci				    IS_ALIGNED(next, PUD_SIZE)) {
32862306a36Sopenharmony_ci					pud_clear(pud);
32962306a36Sopenharmony_ci					pages++;
33062306a36Sopenharmony_ci				}
33162306a36Sopenharmony_ci				continue;
33262306a36Sopenharmony_ci			}
33362306a36Sopenharmony_ci		} else if (pud_none(*pud)) {
33462306a36Sopenharmony_ci			if (IS_ALIGNED(addr, PUD_SIZE) &&
33562306a36Sopenharmony_ci			    IS_ALIGNED(next, PUD_SIZE) &&
33662306a36Sopenharmony_ci			    MACHINE_HAS_EDAT2 && direct &&
33762306a36Sopenharmony_ci			    !debug_pagealloc_enabled()) {
33862306a36Sopenharmony_ci				set_pud(pud, __pud(__pa(addr) | prot));
33962306a36Sopenharmony_ci				pages++;
34062306a36Sopenharmony_ci				continue;
34162306a36Sopenharmony_ci			}
34262306a36Sopenharmony_ci			pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
34362306a36Sopenharmony_ci			if (!pmd)
34462306a36Sopenharmony_ci				goto out;
34562306a36Sopenharmony_ci			pud_populate(&init_mm, pud, pmd);
34662306a36Sopenharmony_ci		} else if (pud_large(*pud)) {
34762306a36Sopenharmony_ci			continue;
34862306a36Sopenharmony_ci		}
34962306a36Sopenharmony_ci		ret = modify_pmd_table(pud, addr, next, add, direct);
35062306a36Sopenharmony_ci		if (ret)
35162306a36Sopenharmony_ci			goto out;
35262306a36Sopenharmony_ci		if (!add)
35362306a36Sopenharmony_ci			try_free_pmd_table(pud, addr & PUD_MASK);
35462306a36Sopenharmony_ci	}
35562306a36Sopenharmony_ci	ret = 0;
35662306a36Sopenharmony_ciout:
35762306a36Sopenharmony_ci	if (direct)
35862306a36Sopenharmony_ci		update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
35962306a36Sopenharmony_ci	return ret;
36062306a36Sopenharmony_ci}
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_cistatic void try_free_pud_table(p4d_t *p4d, unsigned long start)
36362306a36Sopenharmony_ci{
36462306a36Sopenharmony_ci	pud_t *pud;
36562306a36Sopenharmony_ci	int i;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	pud = pud_offset(p4d, start);
36862306a36Sopenharmony_ci	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
36962306a36Sopenharmony_ci		if (!pud_none(*pud))
37062306a36Sopenharmony_ci			return;
37162306a36Sopenharmony_ci	}
37262306a36Sopenharmony_ci	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
37362306a36Sopenharmony_ci	p4d_clear(p4d);
37462306a36Sopenharmony_ci}
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_cistatic int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
37762306a36Sopenharmony_ci			    bool add, bool direct)
37862306a36Sopenharmony_ci{
37962306a36Sopenharmony_ci	unsigned long next;
38062306a36Sopenharmony_ci	int ret = -ENOMEM;
38162306a36Sopenharmony_ci	p4d_t *p4d;
38262306a36Sopenharmony_ci	pud_t *pud;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	p4d = p4d_offset(pgd, addr);
38562306a36Sopenharmony_ci	for (; addr < end; addr = next, p4d++) {
38662306a36Sopenharmony_ci		next = p4d_addr_end(addr, end);
38762306a36Sopenharmony_ci		if (!add) {
38862306a36Sopenharmony_ci			if (p4d_none(*p4d))
38962306a36Sopenharmony_ci				continue;
39062306a36Sopenharmony_ci		} else if (p4d_none(*p4d)) {
39162306a36Sopenharmony_ci			pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
39262306a36Sopenharmony_ci			if (!pud)
39362306a36Sopenharmony_ci				goto out;
39462306a36Sopenharmony_ci			p4d_populate(&init_mm, p4d, pud);
39562306a36Sopenharmony_ci		}
39662306a36Sopenharmony_ci		ret = modify_pud_table(p4d, addr, next, add, direct);
39762306a36Sopenharmony_ci		if (ret)
39862306a36Sopenharmony_ci			goto out;
39962306a36Sopenharmony_ci		if (!add)
40062306a36Sopenharmony_ci			try_free_pud_table(p4d, addr & P4D_MASK);
40162306a36Sopenharmony_ci	}
40262306a36Sopenharmony_ci	ret = 0;
40362306a36Sopenharmony_ciout:
40462306a36Sopenharmony_ci	return ret;
40562306a36Sopenharmony_ci}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_cistatic void try_free_p4d_table(pgd_t *pgd, unsigned long start)
40862306a36Sopenharmony_ci{
40962306a36Sopenharmony_ci	p4d_t *p4d;
41062306a36Sopenharmony_ci	int i;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	p4d = p4d_offset(pgd, start);
41362306a36Sopenharmony_ci	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
41462306a36Sopenharmony_ci		if (!p4d_none(*p4d))
41562306a36Sopenharmony_ci			return;
41662306a36Sopenharmony_ci	}
41762306a36Sopenharmony_ci	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
41862306a36Sopenharmony_ci	pgd_clear(pgd);
41962306a36Sopenharmony_ci}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_cistatic int modify_pagetable(unsigned long start, unsigned long end, bool add,
42262306a36Sopenharmony_ci			    bool direct)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	unsigned long addr, next;
42562306a36Sopenharmony_ci	int ret = -ENOMEM;
42662306a36Sopenharmony_ci	pgd_t *pgd;
42762306a36Sopenharmony_ci	p4d_t *p4d;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
43062306a36Sopenharmony_ci		return -EINVAL;
43162306a36Sopenharmony_ci	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
43262306a36Sopenharmony_ci	if (WARN_ON_ONCE(end > VMALLOC_START))
43362306a36Sopenharmony_ci		return -EINVAL;
43462306a36Sopenharmony_ci	for (addr = start; addr < end; addr = next) {
43562306a36Sopenharmony_ci		next = pgd_addr_end(addr, end);
43662306a36Sopenharmony_ci		pgd = pgd_offset_k(addr);
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci		if (!add) {
43962306a36Sopenharmony_ci			if (pgd_none(*pgd))
44062306a36Sopenharmony_ci				continue;
44162306a36Sopenharmony_ci		} else if (pgd_none(*pgd)) {
44262306a36Sopenharmony_ci			p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
44362306a36Sopenharmony_ci			if (!p4d)
44462306a36Sopenharmony_ci				goto out;
44562306a36Sopenharmony_ci			pgd_populate(&init_mm, pgd, p4d);
44662306a36Sopenharmony_ci		}
44762306a36Sopenharmony_ci		ret = modify_p4d_table(pgd, addr, next, add, direct);
44862306a36Sopenharmony_ci		if (ret)
44962306a36Sopenharmony_ci			goto out;
45062306a36Sopenharmony_ci		if (!add)
45162306a36Sopenharmony_ci			try_free_p4d_table(pgd, addr & PGDIR_MASK);
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci	ret = 0;
45462306a36Sopenharmony_ciout:
45562306a36Sopenharmony_ci	if (!add)
45662306a36Sopenharmony_ci		flush_tlb_kernel_range(start, end);
45762306a36Sopenharmony_ci	return ret;
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_cistatic int add_pagetable(unsigned long start, unsigned long end, bool direct)
46162306a36Sopenharmony_ci{
46262306a36Sopenharmony_ci	return modify_pagetable(start, end, true, direct);
46362306a36Sopenharmony_ci}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_cistatic int remove_pagetable(unsigned long start, unsigned long end, bool direct)
46662306a36Sopenharmony_ci{
46762306a36Sopenharmony_ci	return modify_pagetable(start, end, false, direct);
46862306a36Sopenharmony_ci}
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci/*
47162306a36Sopenharmony_ci * Add a physical memory range to the 1:1 mapping.
47262306a36Sopenharmony_ci */
47362306a36Sopenharmony_cistatic int vmem_add_range(unsigned long start, unsigned long size)
47462306a36Sopenharmony_ci{
47562306a36Sopenharmony_ci	start = (unsigned long)__va(start);
47662306a36Sopenharmony_ci	return add_pagetable(start, start + size, true);
47762306a36Sopenharmony_ci}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci/*
48062306a36Sopenharmony_ci * Remove a physical memory range from the 1:1 mapping.
48162306a36Sopenharmony_ci */
48262306a36Sopenharmony_cistatic void vmem_remove_range(unsigned long start, unsigned long size)
48362306a36Sopenharmony_ci{
48462306a36Sopenharmony_ci	start = (unsigned long)__va(start);
48562306a36Sopenharmony_ci	remove_pagetable(start, start + size, true);
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci/*
48962306a36Sopenharmony_ci * Add a backed mem_map array to the virtual mem_map array.
49062306a36Sopenharmony_ci */
49162306a36Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
49262306a36Sopenharmony_ci			       struct vmem_altmap *altmap)
49362306a36Sopenharmony_ci{
49462306a36Sopenharmony_ci	int ret;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	mutex_lock(&vmem_mutex);
49762306a36Sopenharmony_ci	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
49862306a36Sopenharmony_ci	ret = add_pagetable(start, end, false);
49962306a36Sopenharmony_ci	if (ret)
50062306a36Sopenharmony_ci		remove_pagetable(start, end, false);
50162306a36Sopenharmony_ci	mutex_unlock(&vmem_mutex);
50262306a36Sopenharmony_ci	return ret;
50362306a36Sopenharmony_ci}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_civoid vmemmap_free(unsigned long start, unsigned long end,
50662306a36Sopenharmony_ci		  struct vmem_altmap *altmap)
50762306a36Sopenharmony_ci{
50862306a36Sopenharmony_ci	mutex_lock(&vmem_mutex);
50962306a36Sopenharmony_ci	remove_pagetable(start, end, false);
51062306a36Sopenharmony_ci	mutex_unlock(&vmem_mutex);
51162306a36Sopenharmony_ci}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_civoid vmem_remove_mapping(unsigned long start, unsigned long size)
51462306a36Sopenharmony_ci{
51562306a36Sopenharmony_ci	mutex_lock(&vmem_mutex);
51662306a36Sopenharmony_ci	vmem_remove_range(start, size);
51762306a36Sopenharmony_ci	mutex_unlock(&vmem_mutex);
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_cistruct range arch_get_mappable_range(void)
52162306a36Sopenharmony_ci{
52262306a36Sopenharmony_ci	struct range mhp_range;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	mhp_range.start = 0;
52562306a36Sopenharmony_ci	mhp_range.end = max_mappable - 1;
52662306a36Sopenharmony_ci	return mhp_range;
52762306a36Sopenharmony_ci}
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ciint vmem_add_mapping(unsigned long start, unsigned long size)
53062306a36Sopenharmony_ci{
53162306a36Sopenharmony_ci	struct range range = arch_get_mappable_range();
53262306a36Sopenharmony_ci	int ret;
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	if (start < range.start ||
53562306a36Sopenharmony_ci	    start + size > range.end + 1 ||
53662306a36Sopenharmony_ci	    start + size < start)
53762306a36Sopenharmony_ci		return -ERANGE;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	mutex_lock(&vmem_mutex);
54062306a36Sopenharmony_ci	ret = vmem_add_range(start, size);
54162306a36Sopenharmony_ci	if (ret)
54262306a36Sopenharmony_ci		vmem_remove_range(start, size);
54362306a36Sopenharmony_ci	mutex_unlock(&vmem_mutex);
54462306a36Sopenharmony_ci	return ret;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci/*
54862306a36Sopenharmony_ci * Allocate new or return existing page-table entry, but do not map it
54962306a36Sopenharmony_ci * to any physical address. If missing, allocate segment- and region-
55062306a36Sopenharmony_ci * table entries along. Meeting a large segment- or region-table entry
55162306a36Sopenharmony_ci * while traversing is an error, since the function is expected to be
55262306a36Sopenharmony_ci * called against virtual regions reserved for 4KB mappings only.
55362306a36Sopenharmony_ci */
55462306a36Sopenharmony_cipte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	pte_t *ptep = NULL;
55762306a36Sopenharmony_ci	pgd_t *pgd;
55862306a36Sopenharmony_ci	p4d_t *p4d;
55962306a36Sopenharmony_ci	pud_t *pud;
56062306a36Sopenharmony_ci	pmd_t *pmd;
56162306a36Sopenharmony_ci	pte_t *pte;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	pgd = pgd_offset_k(addr);
56462306a36Sopenharmony_ci	if (pgd_none(*pgd)) {
56562306a36Sopenharmony_ci		if (!alloc)
56662306a36Sopenharmony_ci			goto out;
56762306a36Sopenharmony_ci		p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
56862306a36Sopenharmony_ci		if (!p4d)
56962306a36Sopenharmony_ci			goto out;
57062306a36Sopenharmony_ci		pgd_populate(&init_mm, pgd, p4d);
57162306a36Sopenharmony_ci	}
57262306a36Sopenharmony_ci	p4d = p4d_offset(pgd, addr);
57362306a36Sopenharmony_ci	if (p4d_none(*p4d)) {
57462306a36Sopenharmony_ci		if (!alloc)
57562306a36Sopenharmony_ci			goto out;
57662306a36Sopenharmony_ci		pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
57762306a36Sopenharmony_ci		if (!pud)
57862306a36Sopenharmony_ci			goto out;
57962306a36Sopenharmony_ci		p4d_populate(&init_mm, p4d, pud);
58062306a36Sopenharmony_ci	}
58162306a36Sopenharmony_ci	pud = pud_offset(p4d, addr);
58262306a36Sopenharmony_ci	if (pud_none(*pud)) {
58362306a36Sopenharmony_ci		if (!alloc)
58462306a36Sopenharmony_ci			goto out;
58562306a36Sopenharmony_ci		pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
58662306a36Sopenharmony_ci		if (!pmd)
58762306a36Sopenharmony_ci			goto out;
58862306a36Sopenharmony_ci		pud_populate(&init_mm, pud, pmd);
58962306a36Sopenharmony_ci	} else if (WARN_ON_ONCE(pud_large(*pud))) {
59062306a36Sopenharmony_ci		goto out;
59162306a36Sopenharmony_ci	}
59262306a36Sopenharmony_ci	pmd = pmd_offset(pud, addr);
59362306a36Sopenharmony_ci	if (pmd_none(*pmd)) {
59462306a36Sopenharmony_ci		if (!alloc)
59562306a36Sopenharmony_ci			goto out;
59662306a36Sopenharmony_ci		pte = vmem_pte_alloc();
59762306a36Sopenharmony_ci		if (!pte)
59862306a36Sopenharmony_ci			goto out;
59962306a36Sopenharmony_ci		pmd_populate(&init_mm, pmd, pte);
60062306a36Sopenharmony_ci	} else if (WARN_ON_ONCE(pmd_large(*pmd))) {
60162306a36Sopenharmony_ci		goto out;
60262306a36Sopenharmony_ci	}
60362306a36Sopenharmony_ci	ptep = pte_offset_kernel(pmd, addr);
60462306a36Sopenharmony_ciout:
60562306a36Sopenharmony_ci	return ptep;
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ciint __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	pte_t *ptep, pte;
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	if (!IS_ALIGNED(addr, PAGE_SIZE))
61362306a36Sopenharmony_ci		return -EINVAL;
61462306a36Sopenharmony_ci	ptep = vmem_get_alloc_pte(addr, alloc);
61562306a36Sopenharmony_ci	if (!ptep)
61662306a36Sopenharmony_ci		return -ENOMEM;
61762306a36Sopenharmony_ci	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
61862306a36Sopenharmony_ci	pte = mk_pte_phys(phys, prot);
61962306a36Sopenharmony_ci	set_pte(ptep, pte);
62062306a36Sopenharmony_ci	return 0;
62162306a36Sopenharmony_ci}
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ciint vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
62462306a36Sopenharmony_ci{
62562306a36Sopenharmony_ci	int rc;
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	mutex_lock(&vmem_mutex);
62862306a36Sopenharmony_ci	rc = __vmem_map_4k_page(addr, phys, prot, true);
62962306a36Sopenharmony_ci	mutex_unlock(&vmem_mutex);
63062306a36Sopenharmony_ci	return rc;
63162306a36Sopenharmony_ci}
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_civoid vmem_unmap_4k_page(unsigned long addr)
63462306a36Sopenharmony_ci{
63562306a36Sopenharmony_ci	pte_t *ptep;
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	mutex_lock(&vmem_mutex);
63862306a36Sopenharmony_ci	ptep = virt_to_kpte(addr);
63962306a36Sopenharmony_ci	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
64062306a36Sopenharmony_ci	pte_clear(&init_mm, addr, ptep);
64162306a36Sopenharmony_ci	mutex_unlock(&vmem_mutex);
64262306a36Sopenharmony_ci}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_civoid __init vmem_map_init(void)
64562306a36Sopenharmony_ci{
64662306a36Sopenharmony_ci	__set_memory_rox(_stext, _etext);
64762306a36Sopenharmony_ci	__set_memory_ro(_etext, __end_rodata);
64862306a36Sopenharmony_ci	__set_memory_rox(_sinittext, _einittext);
64962306a36Sopenharmony_ci	__set_memory_rox(__stext_amode31, __etext_amode31);
65062306a36Sopenharmony_ci	/*
65162306a36Sopenharmony_ci	 * If the BEAR-enhancement facility is not installed the first
65262306a36Sopenharmony_ci	 * prefix page is used to return to the previous context with
65362306a36Sopenharmony_ci	 * an LPSWE instruction and therefore must be executable.
65462306a36Sopenharmony_ci	 */
65562306a36Sopenharmony_ci	if (!static_key_enabled(&cpu_has_bear))
65662306a36Sopenharmony_ci		set_memory_x(0, 1);
65762306a36Sopenharmony_ci	if (debug_pagealloc_enabled()) {
65862306a36Sopenharmony_ci		/*
65962306a36Sopenharmony_ci		 * Use RELOC_HIDE() as long as __va(0) translates to NULL,
66062306a36Sopenharmony_ci		 * since performing pointer arithmetic on a NULL pointer
66162306a36Sopenharmony_ci		 * has undefined behavior and generates compiler warnings.
66262306a36Sopenharmony_ci		 */
66362306a36Sopenharmony_ci		__set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
66462306a36Sopenharmony_ci	}
66562306a36Sopenharmony_ci	if (MACHINE_HAS_NX)
66662306a36Sopenharmony_ci		ctl_set_bit(0, 20);
66762306a36Sopenharmony_ci	pr_info("Write protected kernel read-only data: %luk\n",
66862306a36Sopenharmony_ci		(unsigned long)(__end_rodata - _stext) >> 10);
66962306a36Sopenharmony_ci}
670