162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright IBM Corp. 2006 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/memory_hotplug.h> 762306a36Sopenharmony_ci#include <linux/memblock.h> 862306a36Sopenharmony_ci#include <linux/pfn.h> 962306a36Sopenharmony_ci#include <linux/mm.h> 1062306a36Sopenharmony_ci#include <linux/init.h> 1162306a36Sopenharmony_ci#include <linux/list.h> 1262306a36Sopenharmony_ci#include <linux/hugetlb.h> 1362306a36Sopenharmony_ci#include <linux/slab.h> 1462306a36Sopenharmony_ci#include <linux/sort.h> 1562306a36Sopenharmony_ci#include <asm/page-states.h> 1662306a36Sopenharmony_ci#include <asm/cacheflush.h> 1762306a36Sopenharmony_ci#include <asm/nospec-branch.h> 1862306a36Sopenharmony_ci#include <asm/pgalloc.h> 1962306a36Sopenharmony_ci#include <asm/setup.h> 2062306a36Sopenharmony_ci#include <asm/tlbflush.h> 2162306a36Sopenharmony_ci#include <asm/sections.h> 2262306a36Sopenharmony_ci#include <asm/set_memory.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic DEFINE_MUTEX(vmem_mutex); 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_cistatic void __ref *vmem_alloc_pages(unsigned int order) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci unsigned long size = PAGE_SIZE << order; 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci if (slab_is_available()) 3162306a36Sopenharmony_ci return (void *)__get_free_pages(GFP_KERNEL, order); 3262306a36Sopenharmony_ci return memblock_alloc(size, size); 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cistatic void vmem_free_pages(unsigned long addr, int order) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci /* We don't expect boot memory to be removed ever. */ 3862306a36Sopenharmony_ci if (!slab_is_available() || 3962306a36Sopenharmony_ci WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr)))) 4062306a36Sopenharmony_ci return; 4162306a36Sopenharmony_ci free_pages(addr, order); 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_civoid *vmem_crst_alloc(unsigned long val) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci unsigned long *table; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci table = vmem_alloc_pages(CRST_ALLOC_ORDER); 4962306a36Sopenharmony_ci if (!table) 5062306a36Sopenharmony_ci return NULL; 5162306a36Sopenharmony_ci crst_table_init(table, val); 5262306a36Sopenharmony_ci if (slab_is_available()) 5362306a36Sopenharmony_ci arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER); 5462306a36Sopenharmony_ci return table; 5562306a36Sopenharmony_ci} 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cipte_t __ref *vmem_pte_alloc(void) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci unsigned long size = PTRS_PER_PTE * sizeof(pte_t); 6062306a36Sopenharmony_ci pte_t *pte; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (slab_is_available()) 6362306a36Sopenharmony_ci pte = (pte_t *) page_table_alloc(&init_mm); 6462306a36Sopenharmony_ci else 6562306a36Sopenharmony_ci pte = (pte_t *) memblock_alloc(size, size); 6662306a36Sopenharmony_ci if (!pte) 6762306a36Sopenharmony_ci return NULL; 6862306a36Sopenharmony_ci memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 6962306a36Sopenharmony_ci return pte; 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cistatic void vmem_pte_free(unsigned long *table) 7362306a36Sopenharmony_ci{ 7462306a36Sopenharmony_ci /* We don't expect boot memory to be removed ever. */ 7562306a36Sopenharmony_ci if (!slab_is_available() || 7662306a36Sopenharmony_ci WARN_ON_ONCE(PageReserved(virt_to_page(table)))) 7762306a36Sopenharmony_ci return; 7862306a36Sopenharmony_ci page_table_free(&init_mm, table); 7962306a36Sopenharmony_ci} 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci#define PAGE_UNUSED 0xFD 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci/* 8462306a36Sopenharmony_ci * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges 8562306a36Sopenharmony_ci * from unused_sub_pmd_start to next PMD_SIZE boundary. 8662306a36Sopenharmony_ci */ 8762306a36Sopenharmony_cistatic unsigned long unused_sub_pmd_start; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_cistatic void vmemmap_flush_unused_sub_pmd(void) 9062306a36Sopenharmony_ci{ 9162306a36Sopenharmony_ci if (!unused_sub_pmd_start) 9262306a36Sopenharmony_ci return; 9362306a36Sopenharmony_ci memset((void *)unused_sub_pmd_start, PAGE_UNUSED, 9462306a36Sopenharmony_ci ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); 9562306a36Sopenharmony_ci unused_sub_pmd_start = 0; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci /* 10162306a36Sopenharmony_ci * As we expect to add in the same granularity as we remove, it's 10262306a36Sopenharmony_ci * sufficient to mark only some piece used to block the memmap page from 10362306a36Sopenharmony_ci * getting removed (just in case the memmap never gets initialized, 10462306a36Sopenharmony_ci * e.g., because the memory block never gets onlined). 10562306a36Sopenharmony_ci */ 10662306a36Sopenharmony_ci memset((void *)start, 0, sizeof(struct page)); 10762306a36Sopenharmony_ci} 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cistatic void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci /* 11262306a36Sopenharmony_ci * We only optimize if the new used range directly follows the 11362306a36Sopenharmony_ci * previously unused range (esp., when populating consecutive sections). 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci if (unused_sub_pmd_start == start) { 11662306a36Sopenharmony_ci unused_sub_pmd_start = end; 11762306a36Sopenharmony_ci if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE))) 11862306a36Sopenharmony_ci unused_sub_pmd_start = 0; 11962306a36Sopenharmony_ci return; 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci vmemmap_flush_unused_sub_pmd(); 12262306a36Sopenharmony_ci vmemmap_mark_sub_pmd_used(start, end); 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci unsigned long page = ALIGN_DOWN(start, PMD_SIZE); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci vmemmap_flush_unused_sub_pmd(); 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci /* Could be our memmap page is filled with PAGE_UNUSED already ... */ 13262306a36Sopenharmony_ci vmemmap_mark_sub_pmd_used(start, end); 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ 13562306a36Sopenharmony_ci if (!IS_ALIGNED(start, PMD_SIZE)) 13662306a36Sopenharmony_ci memset((void *)page, PAGE_UNUSED, start - page); 13762306a36Sopenharmony_ci /* 13862306a36Sopenharmony_ci * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of 13962306a36Sopenharmony_ci * consecutive sections. Remember for the last added PMD the last 14062306a36Sopenharmony_ci * unused range in the populated PMD. 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_ci if (!IS_ALIGNED(end, PMD_SIZE)) 14362306a36Sopenharmony_ci unused_sub_pmd_start = end; 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci/* Returns true if the PMD is completely unused and can be freed. */ 14762306a36Sopenharmony_cistatic bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci unsigned long page = ALIGN_DOWN(start, PMD_SIZE); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci vmemmap_flush_unused_sub_pmd(); 15262306a36Sopenharmony_ci memset((void *)start, PAGE_UNUSED, end - start); 15362306a36Sopenharmony_ci return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE); 15462306a36Sopenharmony_ci} 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ 15762306a36Sopenharmony_cistatic int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, 15862306a36Sopenharmony_ci unsigned long end, bool add, bool direct) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci unsigned long prot, pages = 0; 16162306a36Sopenharmony_ci int ret = -ENOMEM; 16262306a36Sopenharmony_ci pte_t *pte; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci prot = pgprot_val(PAGE_KERNEL); 16562306a36Sopenharmony_ci if (!MACHINE_HAS_NX) 16662306a36Sopenharmony_ci prot &= ~_PAGE_NOEXEC; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci pte = pte_offset_kernel(pmd, addr); 16962306a36Sopenharmony_ci for (; addr < end; addr += PAGE_SIZE, pte++) { 17062306a36Sopenharmony_ci if (!add) { 17162306a36Sopenharmony_ci if (pte_none(*pte)) 17262306a36Sopenharmony_ci continue; 17362306a36Sopenharmony_ci if (!direct) 17462306a36Sopenharmony_ci vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0); 17562306a36Sopenharmony_ci pte_clear(&init_mm, addr, pte); 17662306a36Sopenharmony_ci } else if (pte_none(*pte)) { 17762306a36Sopenharmony_ci if (!direct) { 17862306a36Sopenharmony_ci void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci if (!new_page) 18162306a36Sopenharmony_ci goto out; 18262306a36Sopenharmony_ci set_pte(pte, __pte(__pa(new_page) | prot)); 18362306a36Sopenharmony_ci } else { 18462306a36Sopenharmony_ci set_pte(pte, __pte(__pa(addr) | prot)); 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci } else { 18762306a36Sopenharmony_ci continue; 18862306a36Sopenharmony_ci } 18962306a36Sopenharmony_ci pages++; 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci ret = 0; 19262306a36Sopenharmony_ciout: 19362306a36Sopenharmony_ci if (direct) 19462306a36Sopenharmony_ci update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); 19562306a36Sopenharmony_ci return ret; 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_cistatic void try_free_pte_table(pmd_t *pmd, unsigned long start) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci pte_t *pte; 20162306a36Sopenharmony_ci int i; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ 20462306a36Sopenharmony_ci pte = pte_offset_kernel(pmd, start); 20562306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 20662306a36Sopenharmony_ci if (!pte_none(*pte)) 20762306a36Sopenharmony_ci return; 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci vmem_pte_free((unsigned long *) pmd_deref(*pmd)); 21062306a36Sopenharmony_ci pmd_clear(pmd); 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ 21462306a36Sopenharmony_cistatic int __ref modify_pmd_table(pud_t *pud, unsigned long addr, 21562306a36Sopenharmony_ci unsigned long end, bool add, bool direct) 21662306a36Sopenharmony_ci{ 21762306a36Sopenharmony_ci unsigned long next, prot, pages = 0; 21862306a36Sopenharmony_ci int ret = -ENOMEM; 21962306a36Sopenharmony_ci pmd_t *pmd; 22062306a36Sopenharmony_ci pte_t *pte; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci prot = pgprot_val(SEGMENT_KERNEL); 22362306a36Sopenharmony_ci if (!MACHINE_HAS_NX) 22462306a36Sopenharmony_ci prot &= ~_SEGMENT_ENTRY_NOEXEC; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 22762306a36Sopenharmony_ci for (; addr < end; addr = next, pmd++) { 22862306a36Sopenharmony_ci next = pmd_addr_end(addr, end); 22962306a36Sopenharmony_ci if (!add) { 23062306a36Sopenharmony_ci if (pmd_none(*pmd)) 23162306a36Sopenharmony_ci continue; 23262306a36Sopenharmony_ci if (pmd_large(*pmd)) { 23362306a36Sopenharmony_ci if (IS_ALIGNED(addr, PMD_SIZE) && 23462306a36Sopenharmony_ci IS_ALIGNED(next, PMD_SIZE)) { 23562306a36Sopenharmony_ci if (!direct) 23662306a36Sopenharmony_ci vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); 23762306a36Sopenharmony_ci pmd_clear(pmd); 23862306a36Sopenharmony_ci pages++; 23962306a36Sopenharmony_ci } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) { 24062306a36Sopenharmony_ci vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); 24162306a36Sopenharmony_ci pmd_clear(pmd); 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci continue; 24462306a36Sopenharmony_ci } 24562306a36Sopenharmony_ci } else if (pmd_none(*pmd)) { 24662306a36Sopenharmony_ci if (IS_ALIGNED(addr, PMD_SIZE) && 24762306a36Sopenharmony_ci IS_ALIGNED(next, PMD_SIZE) && 24862306a36Sopenharmony_ci MACHINE_HAS_EDAT1 && direct && 24962306a36Sopenharmony_ci !debug_pagealloc_enabled()) { 25062306a36Sopenharmony_ci set_pmd(pmd, __pmd(__pa(addr) | prot)); 25162306a36Sopenharmony_ci pages++; 25262306a36Sopenharmony_ci continue; 25362306a36Sopenharmony_ci } else if (!direct && MACHINE_HAS_EDAT1) { 25462306a36Sopenharmony_ci void *new_page; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci /* 25762306a36Sopenharmony_ci * Use 1MB frames for vmemmap if available. We 25862306a36Sopenharmony_ci * always use large frames even if they are only 25962306a36Sopenharmony_ci * partially used. Otherwise we would have also 26062306a36Sopenharmony_ci * page tables since vmemmap_populate gets 26162306a36Sopenharmony_ci * called for each section separately. 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_ci new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); 26462306a36Sopenharmony_ci if (new_page) { 26562306a36Sopenharmony_ci set_pmd(pmd, __pmd(__pa(new_page) | prot)); 26662306a36Sopenharmony_ci if (!IS_ALIGNED(addr, PMD_SIZE) || 26762306a36Sopenharmony_ci !IS_ALIGNED(next, PMD_SIZE)) { 26862306a36Sopenharmony_ci vmemmap_use_new_sub_pmd(addr, next); 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci continue; 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci } 27362306a36Sopenharmony_ci pte = vmem_pte_alloc(); 27462306a36Sopenharmony_ci if (!pte) 27562306a36Sopenharmony_ci goto out; 27662306a36Sopenharmony_ci pmd_populate(&init_mm, pmd, pte); 27762306a36Sopenharmony_ci } else if (pmd_large(*pmd)) { 27862306a36Sopenharmony_ci if (!direct) 27962306a36Sopenharmony_ci vmemmap_use_sub_pmd(addr, next); 28062306a36Sopenharmony_ci continue; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci ret = modify_pte_table(pmd, addr, next, add, direct); 28362306a36Sopenharmony_ci if (ret) 28462306a36Sopenharmony_ci goto out; 28562306a36Sopenharmony_ci if (!add) 28662306a36Sopenharmony_ci try_free_pte_table(pmd, addr & PMD_MASK); 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ci ret = 0; 28962306a36Sopenharmony_ciout: 29062306a36Sopenharmony_ci if (direct) 29162306a36Sopenharmony_ci update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); 29262306a36Sopenharmony_ci return ret; 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_cistatic void try_free_pmd_table(pud_t *pud, unsigned long start) 29662306a36Sopenharmony_ci{ 29762306a36Sopenharmony_ci pmd_t *pmd; 29862306a36Sopenharmony_ci int i; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci pmd = pmd_offset(pud, start); 30162306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; i++, pmd++) 30262306a36Sopenharmony_ci if (!pmd_none(*pmd)) 30362306a36Sopenharmony_ci return; 30462306a36Sopenharmony_ci vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); 30562306a36Sopenharmony_ci pud_clear(pud); 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, 30962306a36Sopenharmony_ci bool add, bool direct) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci unsigned long next, prot, pages = 0; 31262306a36Sopenharmony_ci int ret = -ENOMEM; 31362306a36Sopenharmony_ci pud_t *pud; 31462306a36Sopenharmony_ci pmd_t *pmd; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci prot = pgprot_val(REGION3_KERNEL); 31762306a36Sopenharmony_ci if (!MACHINE_HAS_NX) 31862306a36Sopenharmony_ci prot &= ~_REGION_ENTRY_NOEXEC; 31962306a36Sopenharmony_ci pud = pud_offset(p4d, addr); 32062306a36Sopenharmony_ci for (; addr < end; addr = next, pud++) { 32162306a36Sopenharmony_ci next = pud_addr_end(addr, end); 32262306a36Sopenharmony_ci if (!add) { 32362306a36Sopenharmony_ci if (pud_none(*pud)) 32462306a36Sopenharmony_ci continue; 32562306a36Sopenharmony_ci if (pud_large(*pud)) { 32662306a36Sopenharmony_ci if (IS_ALIGNED(addr, PUD_SIZE) && 32762306a36Sopenharmony_ci IS_ALIGNED(next, PUD_SIZE)) { 32862306a36Sopenharmony_ci pud_clear(pud); 32962306a36Sopenharmony_ci pages++; 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci continue; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci } else if (pud_none(*pud)) { 33462306a36Sopenharmony_ci if (IS_ALIGNED(addr, PUD_SIZE) && 33562306a36Sopenharmony_ci IS_ALIGNED(next, PUD_SIZE) && 33662306a36Sopenharmony_ci MACHINE_HAS_EDAT2 && direct && 33762306a36Sopenharmony_ci !debug_pagealloc_enabled()) { 33862306a36Sopenharmony_ci set_pud(pud, __pud(__pa(addr) | prot)); 33962306a36Sopenharmony_ci pages++; 34062306a36Sopenharmony_ci continue; 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 34362306a36Sopenharmony_ci if (!pmd) 34462306a36Sopenharmony_ci goto out; 34562306a36Sopenharmony_ci pud_populate(&init_mm, pud, pmd); 34662306a36Sopenharmony_ci } else if (pud_large(*pud)) { 34762306a36Sopenharmony_ci continue; 34862306a36Sopenharmony_ci } 34962306a36Sopenharmony_ci ret = modify_pmd_table(pud, addr, next, add, direct); 35062306a36Sopenharmony_ci if (ret) 35162306a36Sopenharmony_ci goto out; 35262306a36Sopenharmony_ci if (!add) 35362306a36Sopenharmony_ci try_free_pmd_table(pud, addr & PUD_MASK); 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci ret = 0; 35662306a36Sopenharmony_ciout: 35762306a36Sopenharmony_ci if (direct) 35862306a36Sopenharmony_ci update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); 35962306a36Sopenharmony_ci return ret; 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_cistatic void try_free_pud_table(p4d_t *p4d, unsigned long start) 36362306a36Sopenharmony_ci{ 36462306a36Sopenharmony_ci pud_t *pud; 36562306a36Sopenharmony_ci int i; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci pud = pud_offset(p4d, start); 36862306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 36962306a36Sopenharmony_ci if (!pud_none(*pud)) 37062306a36Sopenharmony_ci return; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); 37362306a36Sopenharmony_ci p4d_clear(p4d); 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cistatic int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, 37762306a36Sopenharmony_ci bool add, bool direct) 37862306a36Sopenharmony_ci{ 37962306a36Sopenharmony_ci unsigned long next; 38062306a36Sopenharmony_ci int ret = -ENOMEM; 38162306a36Sopenharmony_ci p4d_t *p4d; 38262306a36Sopenharmony_ci pud_t *pud; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 38562306a36Sopenharmony_ci for (; addr < end; addr = next, p4d++) { 38662306a36Sopenharmony_ci next = p4d_addr_end(addr, end); 38762306a36Sopenharmony_ci if (!add) { 38862306a36Sopenharmony_ci if (p4d_none(*p4d)) 38962306a36Sopenharmony_ci continue; 39062306a36Sopenharmony_ci } else if (p4d_none(*p4d)) { 39162306a36Sopenharmony_ci pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); 39262306a36Sopenharmony_ci if (!pud) 39362306a36Sopenharmony_ci goto out; 39462306a36Sopenharmony_ci p4d_populate(&init_mm, p4d, pud); 39562306a36Sopenharmony_ci } 39662306a36Sopenharmony_ci ret = modify_pud_table(p4d, addr, next, add, direct); 39762306a36Sopenharmony_ci if (ret) 39862306a36Sopenharmony_ci goto out; 39962306a36Sopenharmony_ci if (!add) 40062306a36Sopenharmony_ci try_free_pud_table(p4d, addr & P4D_MASK); 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci ret = 0; 40362306a36Sopenharmony_ciout: 40462306a36Sopenharmony_ci return ret; 40562306a36Sopenharmony_ci} 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_cistatic void try_free_p4d_table(pgd_t *pgd, unsigned long start) 40862306a36Sopenharmony_ci{ 40962306a36Sopenharmony_ci p4d_t *p4d; 41062306a36Sopenharmony_ci int i; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci p4d = p4d_offset(pgd, start); 41362306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { 41462306a36Sopenharmony_ci if (!p4d_none(*p4d)) 41562306a36Sopenharmony_ci return; 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); 41862306a36Sopenharmony_ci pgd_clear(pgd); 41962306a36Sopenharmony_ci} 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_cistatic int modify_pagetable(unsigned long start, unsigned long end, bool add, 42262306a36Sopenharmony_ci bool direct) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci unsigned long addr, next; 42562306a36Sopenharmony_ci int ret = -ENOMEM; 42662306a36Sopenharmony_ci pgd_t *pgd; 42762306a36Sopenharmony_ci p4d_t *p4d; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) 43062306a36Sopenharmony_ci return -EINVAL; 43162306a36Sopenharmony_ci /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ 43262306a36Sopenharmony_ci if (WARN_ON_ONCE(end > VMALLOC_START)) 43362306a36Sopenharmony_ci return -EINVAL; 43462306a36Sopenharmony_ci for (addr = start; addr < end; addr = next) { 43562306a36Sopenharmony_ci next = pgd_addr_end(addr, end); 43662306a36Sopenharmony_ci pgd = pgd_offset_k(addr); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci if (!add) { 43962306a36Sopenharmony_ci if (pgd_none(*pgd)) 44062306a36Sopenharmony_ci continue; 44162306a36Sopenharmony_ci } else if (pgd_none(*pgd)) { 44262306a36Sopenharmony_ci p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); 44362306a36Sopenharmony_ci if (!p4d) 44462306a36Sopenharmony_ci goto out; 44562306a36Sopenharmony_ci pgd_populate(&init_mm, pgd, p4d); 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci ret = modify_p4d_table(pgd, addr, next, add, direct); 44862306a36Sopenharmony_ci if (ret) 44962306a36Sopenharmony_ci goto out; 45062306a36Sopenharmony_ci if (!add) 45162306a36Sopenharmony_ci try_free_p4d_table(pgd, addr & PGDIR_MASK); 45262306a36Sopenharmony_ci } 45362306a36Sopenharmony_ci ret = 0; 45462306a36Sopenharmony_ciout: 45562306a36Sopenharmony_ci if (!add) 45662306a36Sopenharmony_ci flush_tlb_kernel_range(start, end); 45762306a36Sopenharmony_ci return ret; 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_cistatic int add_pagetable(unsigned long start, unsigned long end, bool direct) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci return modify_pagetable(start, end, true, direct); 46362306a36Sopenharmony_ci} 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_cistatic int remove_pagetable(unsigned long start, unsigned long end, bool direct) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci return modify_pagetable(start, end, false, direct); 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci/* 47162306a36Sopenharmony_ci * Add a physical memory range to the 1:1 mapping. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_cistatic int vmem_add_range(unsigned long start, unsigned long size) 47462306a36Sopenharmony_ci{ 47562306a36Sopenharmony_ci start = (unsigned long)__va(start); 47662306a36Sopenharmony_ci return add_pagetable(start, start + size, true); 47762306a36Sopenharmony_ci} 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci/* 48062306a36Sopenharmony_ci * Remove a physical memory range from the 1:1 mapping. 48162306a36Sopenharmony_ci */ 48262306a36Sopenharmony_cistatic void vmem_remove_range(unsigned long start, unsigned long size) 48362306a36Sopenharmony_ci{ 48462306a36Sopenharmony_ci start = (unsigned long)__va(start); 48562306a36Sopenharmony_ci remove_pagetable(start, start + size, true); 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci/* 48962306a36Sopenharmony_ci * Add a backed mem_map array to the virtual mem_map array. 49062306a36Sopenharmony_ci */ 49162306a36Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 49262306a36Sopenharmony_ci struct vmem_altmap *altmap) 49362306a36Sopenharmony_ci{ 49462306a36Sopenharmony_ci int ret; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci mutex_lock(&vmem_mutex); 49762306a36Sopenharmony_ci /* We don't care about the node, just use NUMA_NO_NODE on allocations */ 49862306a36Sopenharmony_ci ret = add_pagetable(start, end, false); 49962306a36Sopenharmony_ci if (ret) 50062306a36Sopenharmony_ci remove_pagetable(start, end, false); 50162306a36Sopenharmony_ci mutex_unlock(&vmem_mutex); 50262306a36Sopenharmony_ci return ret; 50362306a36Sopenharmony_ci} 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_civoid vmemmap_free(unsigned long start, unsigned long end, 50662306a36Sopenharmony_ci struct vmem_altmap *altmap) 50762306a36Sopenharmony_ci{ 50862306a36Sopenharmony_ci mutex_lock(&vmem_mutex); 50962306a36Sopenharmony_ci remove_pagetable(start, end, false); 51062306a36Sopenharmony_ci mutex_unlock(&vmem_mutex); 51162306a36Sopenharmony_ci} 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_civoid vmem_remove_mapping(unsigned long start, unsigned long size) 51462306a36Sopenharmony_ci{ 51562306a36Sopenharmony_ci mutex_lock(&vmem_mutex); 51662306a36Sopenharmony_ci vmem_remove_range(start, size); 51762306a36Sopenharmony_ci mutex_unlock(&vmem_mutex); 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_cistruct range arch_get_mappable_range(void) 52162306a36Sopenharmony_ci{ 52262306a36Sopenharmony_ci struct range mhp_range; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci mhp_range.start = 0; 52562306a36Sopenharmony_ci mhp_range.end = max_mappable - 1; 52662306a36Sopenharmony_ci return mhp_range; 52762306a36Sopenharmony_ci} 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ciint vmem_add_mapping(unsigned long start, unsigned long size) 53062306a36Sopenharmony_ci{ 53162306a36Sopenharmony_ci struct range range = arch_get_mappable_range(); 53262306a36Sopenharmony_ci int ret; 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci if (start < range.start || 53562306a36Sopenharmony_ci start + size > range.end + 1 || 53662306a36Sopenharmony_ci start + size < start) 53762306a36Sopenharmony_ci return -ERANGE; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci mutex_lock(&vmem_mutex); 54062306a36Sopenharmony_ci ret = vmem_add_range(start, size); 54162306a36Sopenharmony_ci if (ret) 54262306a36Sopenharmony_ci vmem_remove_range(start, size); 54362306a36Sopenharmony_ci mutex_unlock(&vmem_mutex); 54462306a36Sopenharmony_ci return ret; 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/* 54862306a36Sopenharmony_ci * Allocate new or return existing page-table entry, but do not map it 54962306a36Sopenharmony_ci * to any physical address. If missing, allocate segment- and region- 55062306a36Sopenharmony_ci * table entries along. Meeting a large segment- or region-table entry 55162306a36Sopenharmony_ci * while traversing is an error, since the function is expected to be 55262306a36Sopenharmony_ci * called against virtual regions reserved for 4KB mappings only. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_cipte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci pte_t *ptep = NULL; 55762306a36Sopenharmony_ci pgd_t *pgd; 55862306a36Sopenharmony_ci p4d_t *p4d; 55962306a36Sopenharmony_ci pud_t *pud; 56062306a36Sopenharmony_ci pmd_t *pmd; 56162306a36Sopenharmony_ci pte_t *pte; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci pgd = pgd_offset_k(addr); 56462306a36Sopenharmony_ci if (pgd_none(*pgd)) { 56562306a36Sopenharmony_ci if (!alloc) 56662306a36Sopenharmony_ci goto out; 56762306a36Sopenharmony_ci p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); 56862306a36Sopenharmony_ci if (!p4d) 56962306a36Sopenharmony_ci goto out; 57062306a36Sopenharmony_ci pgd_populate(&init_mm, pgd, p4d); 57162306a36Sopenharmony_ci } 57262306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 57362306a36Sopenharmony_ci if (p4d_none(*p4d)) { 57462306a36Sopenharmony_ci if (!alloc) 57562306a36Sopenharmony_ci goto out; 57662306a36Sopenharmony_ci pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); 57762306a36Sopenharmony_ci if (!pud) 57862306a36Sopenharmony_ci goto out; 57962306a36Sopenharmony_ci p4d_populate(&init_mm, p4d, pud); 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci pud = pud_offset(p4d, addr); 58262306a36Sopenharmony_ci if (pud_none(*pud)) { 58362306a36Sopenharmony_ci if (!alloc) 58462306a36Sopenharmony_ci goto out; 58562306a36Sopenharmony_ci pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 58662306a36Sopenharmony_ci if (!pmd) 58762306a36Sopenharmony_ci goto out; 58862306a36Sopenharmony_ci pud_populate(&init_mm, pud, pmd); 58962306a36Sopenharmony_ci } else if (WARN_ON_ONCE(pud_large(*pud))) { 59062306a36Sopenharmony_ci goto out; 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 59362306a36Sopenharmony_ci if (pmd_none(*pmd)) { 59462306a36Sopenharmony_ci if (!alloc) 59562306a36Sopenharmony_ci goto out; 59662306a36Sopenharmony_ci pte = vmem_pte_alloc(); 59762306a36Sopenharmony_ci if (!pte) 59862306a36Sopenharmony_ci goto out; 59962306a36Sopenharmony_ci pmd_populate(&init_mm, pmd, pte); 60062306a36Sopenharmony_ci } else if (WARN_ON_ONCE(pmd_large(*pmd))) { 60162306a36Sopenharmony_ci goto out; 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci ptep = pte_offset_kernel(pmd, addr); 60462306a36Sopenharmony_ciout: 60562306a36Sopenharmony_ci return ptep; 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ciint __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci pte_t *ptep, pte; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci if (!IS_ALIGNED(addr, PAGE_SIZE)) 61362306a36Sopenharmony_ci return -EINVAL; 61462306a36Sopenharmony_ci ptep = vmem_get_alloc_pte(addr, alloc); 61562306a36Sopenharmony_ci if (!ptep) 61662306a36Sopenharmony_ci return -ENOMEM; 61762306a36Sopenharmony_ci __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 61862306a36Sopenharmony_ci pte = mk_pte_phys(phys, prot); 61962306a36Sopenharmony_ci set_pte(ptep, pte); 62062306a36Sopenharmony_ci return 0; 62162306a36Sopenharmony_ci} 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ciint vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) 62462306a36Sopenharmony_ci{ 62562306a36Sopenharmony_ci int rc; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci mutex_lock(&vmem_mutex); 62862306a36Sopenharmony_ci rc = __vmem_map_4k_page(addr, phys, prot, true); 62962306a36Sopenharmony_ci mutex_unlock(&vmem_mutex); 63062306a36Sopenharmony_ci return rc; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_civoid vmem_unmap_4k_page(unsigned long addr) 63462306a36Sopenharmony_ci{ 63562306a36Sopenharmony_ci pte_t *ptep; 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci mutex_lock(&vmem_mutex); 63862306a36Sopenharmony_ci ptep = virt_to_kpte(addr); 63962306a36Sopenharmony_ci __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 64062306a36Sopenharmony_ci pte_clear(&init_mm, addr, ptep); 64162306a36Sopenharmony_ci mutex_unlock(&vmem_mutex); 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_civoid __init vmem_map_init(void) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci __set_memory_rox(_stext, _etext); 64762306a36Sopenharmony_ci __set_memory_ro(_etext, __end_rodata); 64862306a36Sopenharmony_ci __set_memory_rox(_sinittext, _einittext); 64962306a36Sopenharmony_ci __set_memory_rox(__stext_amode31, __etext_amode31); 65062306a36Sopenharmony_ci /* 65162306a36Sopenharmony_ci * If the BEAR-enhancement facility is not installed the first 65262306a36Sopenharmony_ci * prefix page is used to return to the previous context with 65362306a36Sopenharmony_ci * an LPSWE instruction and therefore must be executable. 65462306a36Sopenharmony_ci */ 65562306a36Sopenharmony_ci if (!static_key_enabled(&cpu_has_bear)) 65662306a36Sopenharmony_ci set_memory_x(0, 1); 65762306a36Sopenharmony_ci if (debug_pagealloc_enabled()) { 65862306a36Sopenharmony_ci /* 65962306a36Sopenharmony_ci * Use RELOC_HIDE() as long as __va(0) translates to NULL, 66062306a36Sopenharmony_ci * since performing pointer arithmetic on a NULL pointer 66162306a36Sopenharmony_ci * has undefined behavior and generates compiler warnings. 66262306a36Sopenharmony_ci */ 66362306a36Sopenharmony_ci __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); 66462306a36Sopenharmony_ci } 66562306a36Sopenharmony_ci if (MACHINE_HAS_NX) 66662306a36Sopenharmony_ci ctl_set_bit(0, 20); 66762306a36Sopenharmony_ci pr_info("Write protected kernel read-only data: %luk\n", 66862306a36Sopenharmony_ci (unsigned long)(__end_rodata - _stext) >> 10); 66962306a36Sopenharmony_ci} 670