18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/arch/x86_64/mm/init.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1995 Linus Torvalds 68c2ecf20Sopenharmony_ci * Copyright (C) 2000 Pavel Machek <pavel@ucw.cz> 78c2ecf20Sopenharmony_ci * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/signal.h> 118c2ecf20Sopenharmony_ci#include <linux/sched.h> 128c2ecf20Sopenharmony_ci#include <linux/kernel.h> 138c2ecf20Sopenharmony_ci#include <linux/errno.h> 148c2ecf20Sopenharmony_ci#include <linux/string.h> 158c2ecf20Sopenharmony_ci#include <linux/types.h> 168c2ecf20Sopenharmony_ci#include <linux/ptrace.h> 178c2ecf20Sopenharmony_ci#include <linux/mman.h> 188c2ecf20Sopenharmony_ci#include <linux/mm.h> 198c2ecf20Sopenharmony_ci#include <linux/swap.h> 208c2ecf20Sopenharmony_ci#include <linux/smp.h> 218c2ecf20Sopenharmony_ci#include <linux/init.h> 228c2ecf20Sopenharmony_ci#include <linux/initrd.h> 238c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 248c2ecf20Sopenharmony_ci#include <linux/memblock.h> 258c2ecf20Sopenharmony_ci#include <linux/proc_fs.h> 268c2ecf20Sopenharmony_ci#include <linux/pci.h> 278c2ecf20Sopenharmony_ci#include <linux/pfn.h> 288c2ecf20Sopenharmony_ci#include <linux/poison.h> 298c2ecf20Sopenharmony_ci#include <linux/dma-mapping.h> 308c2ecf20Sopenharmony_ci#include <linux/memory.h> 318c2ecf20Sopenharmony_ci#include <linux/memory_hotplug.h> 328c2ecf20Sopenharmony_ci#include <linux/memremap.h> 338c2ecf20Sopenharmony_ci#include <linux/nmi.h> 348c2ecf20Sopenharmony_ci#include <linux/gfp.h> 358c2ecf20Sopenharmony_ci#include <linux/kcore.h> 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#include <asm/processor.h> 388c2ecf20Sopenharmony_ci#include <asm/bios_ebda.h> 398c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 408c2ecf20Sopenharmony_ci#include <asm/pgalloc.h> 418c2ecf20Sopenharmony_ci#include <asm/dma.h> 428c2ecf20Sopenharmony_ci#include <asm/fixmap.h> 438c2ecf20Sopenharmony_ci#include <asm/e820/api.h> 448c2ecf20Sopenharmony_ci#include <asm/apic.h> 458c2ecf20Sopenharmony_ci#include <asm/tlb.h> 468c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 478c2ecf20Sopenharmony_ci#include <asm/proto.h> 488c2ecf20Sopenharmony_ci#include <asm/smp.h> 498c2ecf20Sopenharmony_ci#include <asm/sections.h> 508c2ecf20Sopenharmony_ci#include <asm/kdebug.h> 518c2ecf20Sopenharmony_ci#include <asm/numa.h> 528c2ecf20Sopenharmony_ci#include <asm/set_memory.h> 538c2ecf20Sopenharmony_ci#include <asm/init.h> 548c2ecf20Sopenharmony_ci#include <asm/uv/uv.h> 558c2ecf20Sopenharmony_ci#include <asm/setup.h> 568c2ecf20Sopenharmony_ci#include <asm/ftrace.h> 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#include "mm_internal.h" 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#include "ident_map.c" 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define DEFINE_POPULATE(fname, type1, type2, init) \ 638c2ecf20Sopenharmony_cistatic inline void fname##_init(struct mm_struct *mm, \ 648c2ecf20Sopenharmony_ci type1##_t *arg1, type2##_t *arg2, bool init) \ 658c2ecf20Sopenharmony_ci{ \ 668c2ecf20Sopenharmony_ci if (init) \ 678c2ecf20Sopenharmony_ci fname##_safe(mm, arg1, arg2); \ 688c2ecf20Sopenharmony_ci else \ 698c2ecf20Sopenharmony_ci fname(mm, arg1, arg2); \ 708c2ecf20Sopenharmony_ci} 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ciDEFINE_POPULATE(p4d_populate, p4d, pud, init) 738c2ecf20Sopenharmony_ciDEFINE_POPULATE(pgd_populate, pgd, p4d, init) 748c2ecf20Sopenharmony_ciDEFINE_POPULATE(pud_populate, pud, pmd, init) 758c2ecf20Sopenharmony_ciDEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init) 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci#define DEFINE_ENTRY(type1, type2, init) \ 788c2ecf20Sopenharmony_cistatic inline void set_##type1##_init(type1##_t *arg1, \ 798c2ecf20Sopenharmony_ci type2##_t arg2, bool init) \ 808c2ecf20Sopenharmony_ci{ \ 818c2ecf20Sopenharmony_ci if (init) \ 828c2ecf20Sopenharmony_ci set_##type1##_safe(arg1, arg2); \ 838c2ecf20Sopenharmony_ci else \ 848c2ecf20Sopenharmony_ci set_##type1(arg1, arg2); \ 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ciDEFINE_ENTRY(p4d, p4d, init) 888c2ecf20Sopenharmony_ciDEFINE_ENTRY(pud, pud, init) 898c2ecf20Sopenharmony_ciDEFINE_ENTRY(pmd, pmd, init) 908c2ecf20Sopenharmony_ciDEFINE_ENTRY(pte, pte, init) 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci/* 948c2ecf20Sopenharmony_ci * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 958c2ecf20Sopenharmony_ci * physical space so we can cache the place of the first one and move 968c2ecf20Sopenharmony_ci * around without checking the pgd every time. 978c2ecf20Sopenharmony_ci */ 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci/* Bits supported by the hardware: */ 1008c2ecf20Sopenharmony_cipteval_t __supported_pte_mask __read_mostly = ~0; 1018c2ecf20Sopenharmony_ci/* Bits allowed in normal kernel mappings: */ 1028c2ecf20Sopenharmony_cipteval_t __default_kernel_pte_mask __read_mostly = ~0; 1038c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__supported_pte_mask); 1048c2ecf20Sopenharmony_ci/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ 1058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__default_kernel_pte_mask); 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ciint force_personality32; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci/* 1108c2ecf20Sopenharmony_ci * noexec32=on|off 1118c2ecf20Sopenharmony_ci * Control non executable heap for 32bit processes. 1128c2ecf20Sopenharmony_ci * To control the stack too use noexec=off 1138c2ecf20Sopenharmony_ci * 1148c2ecf20Sopenharmony_ci * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) 1158c2ecf20Sopenharmony_ci * off PROT_READ implies PROT_EXEC 1168c2ecf20Sopenharmony_ci */ 1178c2ecf20Sopenharmony_cistatic int __init nonx32_setup(char *str) 1188c2ecf20Sopenharmony_ci{ 1198c2ecf20Sopenharmony_ci if (!strcmp(str, "on")) 1208c2ecf20Sopenharmony_ci force_personality32 &= ~READ_IMPLIES_EXEC; 1218c2ecf20Sopenharmony_ci else if (!strcmp(str, "off")) 1228c2ecf20Sopenharmony_ci force_personality32 |= READ_IMPLIES_EXEC; 1238c2ecf20Sopenharmony_ci return 1; 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci__setup("noexec32=", nonx32_setup); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_cistatic void sync_global_pgds_l5(unsigned long start, unsigned long end) 1288c2ecf20Sopenharmony_ci{ 1298c2ecf20Sopenharmony_ci unsigned long addr; 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 1328c2ecf20Sopenharmony_ci const pgd_t *pgd_ref = pgd_offset_k(addr); 1338c2ecf20Sopenharmony_ci struct page *page; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci /* Check for overflow */ 1368c2ecf20Sopenharmony_ci if (addr < start) 1378c2ecf20Sopenharmony_ci break; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci if (pgd_none(*pgd_ref)) 1408c2ecf20Sopenharmony_ci continue; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci spin_lock(&pgd_lock); 1438c2ecf20Sopenharmony_ci list_for_each_entry(page, &pgd_list, lru) { 1448c2ecf20Sopenharmony_ci pgd_t *pgd; 1458c2ecf20Sopenharmony_ci spinlock_t *pgt_lock; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci pgd = (pgd_t *)page_address(page) + pgd_index(addr); 1488c2ecf20Sopenharmony_ci /* the pgt_lock only for Xen */ 1498c2ecf20Sopenharmony_ci pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 1508c2ecf20Sopenharmony_ci spin_lock(pgt_lock); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci if (!pgd_none(*pgd_ref) && !pgd_none(*pgd)) 1538c2ecf20Sopenharmony_ci BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) 1568c2ecf20Sopenharmony_ci set_pgd(pgd, *pgd_ref); 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci spin_unlock(pgt_lock); 1598c2ecf20Sopenharmony_ci } 1608c2ecf20Sopenharmony_ci spin_unlock(&pgd_lock); 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_cistatic void sync_global_pgds_l4(unsigned long start, unsigned long end) 1658c2ecf20Sopenharmony_ci{ 1668c2ecf20Sopenharmony_ci unsigned long addr; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 1698c2ecf20Sopenharmony_ci pgd_t *pgd_ref = pgd_offset_k(addr); 1708c2ecf20Sopenharmony_ci const p4d_t *p4d_ref; 1718c2ecf20Sopenharmony_ci struct page *page; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci /* 1748c2ecf20Sopenharmony_ci * With folded p4d, pgd_none() is always false, we need to 1758c2ecf20Sopenharmony_ci * handle synchonization on p4d level. 1768c2ecf20Sopenharmony_ci */ 1778c2ecf20Sopenharmony_ci MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref)); 1788c2ecf20Sopenharmony_ci p4d_ref = p4d_offset(pgd_ref, addr); 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci if (p4d_none(*p4d_ref)) 1818c2ecf20Sopenharmony_ci continue; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci spin_lock(&pgd_lock); 1848c2ecf20Sopenharmony_ci list_for_each_entry(page, &pgd_list, lru) { 1858c2ecf20Sopenharmony_ci pgd_t *pgd; 1868c2ecf20Sopenharmony_ci p4d_t *p4d; 1878c2ecf20Sopenharmony_ci spinlock_t *pgt_lock; 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci pgd = (pgd_t *)page_address(page) + pgd_index(addr); 1908c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, addr); 1918c2ecf20Sopenharmony_ci /* the pgt_lock only for Xen */ 1928c2ecf20Sopenharmony_ci pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 1938c2ecf20Sopenharmony_ci spin_lock(pgt_lock); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci if (!p4d_none(*p4d_ref) && !p4d_none(*p4d)) 1968c2ecf20Sopenharmony_ci BUG_ON(p4d_pgtable(*p4d) 1978c2ecf20Sopenharmony_ci != p4d_pgtable(*p4d_ref)); 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci if (p4d_none(*p4d)) 2008c2ecf20Sopenharmony_ci set_p4d(p4d, *p4d_ref); 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci spin_unlock(pgt_lock); 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci spin_unlock(&pgd_lock); 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci} 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci/* 2098c2ecf20Sopenharmony_ci * When memory was added make sure all the processes MM have 2108c2ecf20Sopenharmony_ci * suitable PGD entries in the local PGD level page. 2118c2ecf20Sopenharmony_ci */ 2128c2ecf20Sopenharmony_cistatic void sync_global_pgds(unsigned long start, unsigned long end) 2138c2ecf20Sopenharmony_ci{ 2148c2ecf20Sopenharmony_ci if (pgtable_l5_enabled()) 2158c2ecf20Sopenharmony_ci sync_global_pgds_l5(start, end); 2168c2ecf20Sopenharmony_ci else 2178c2ecf20Sopenharmony_ci sync_global_pgds_l4(start, end); 2188c2ecf20Sopenharmony_ci} 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci/* 2218c2ecf20Sopenharmony_ci * NOTE: This function is marked __ref because it calls __init function 2228c2ecf20Sopenharmony_ci * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 2238c2ecf20Sopenharmony_ci */ 2248c2ecf20Sopenharmony_cistatic __ref void *spp_getpage(void) 2258c2ecf20Sopenharmony_ci{ 2268c2ecf20Sopenharmony_ci void *ptr; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (after_bootmem) 2298c2ecf20Sopenharmony_ci ptr = (void *) get_zeroed_page(GFP_ATOMIC); 2308c2ecf20Sopenharmony_ci else 2318c2ecf20Sopenharmony_ci ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { 2348c2ecf20Sopenharmony_ci panic("set_pte_phys: cannot allocate page data %s\n", 2358c2ecf20Sopenharmony_ci after_bootmem ? "after bootmem" : ""); 2368c2ecf20Sopenharmony_ci } 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci pr_debug("spp_getpage %p\n", ptr); 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci return ptr; 2418c2ecf20Sopenharmony_ci} 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_cistatic p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr) 2448c2ecf20Sopenharmony_ci{ 2458c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) { 2468c2ecf20Sopenharmony_ci p4d_t *p4d = (p4d_t *)spp_getpage(); 2478c2ecf20Sopenharmony_ci pgd_populate(&init_mm, pgd, p4d); 2488c2ecf20Sopenharmony_ci if (p4d != p4d_offset(pgd, 0)) 2498c2ecf20Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", 2508c2ecf20Sopenharmony_ci p4d, p4d_offset(pgd, 0)); 2518c2ecf20Sopenharmony_ci } 2528c2ecf20Sopenharmony_ci return p4d_offset(pgd, vaddr); 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_cistatic pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr) 2568c2ecf20Sopenharmony_ci{ 2578c2ecf20Sopenharmony_ci if (p4d_none(*p4d)) { 2588c2ecf20Sopenharmony_ci pud_t *pud = (pud_t *)spp_getpage(); 2598c2ecf20Sopenharmony_ci p4d_populate(&init_mm, p4d, pud); 2608c2ecf20Sopenharmony_ci if (pud != pud_offset(p4d, 0)) 2618c2ecf20Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", 2628c2ecf20Sopenharmony_ci pud, pud_offset(p4d, 0)); 2638c2ecf20Sopenharmony_ci } 2648c2ecf20Sopenharmony_ci return pud_offset(p4d, vaddr); 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_cistatic pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci if (pud_none(*pud)) { 2708c2ecf20Sopenharmony_ci pmd_t *pmd = (pmd_t *) spp_getpage(); 2718c2ecf20Sopenharmony_ci pud_populate(&init_mm, pud, pmd); 2728c2ecf20Sopenharmony_ci if (pmd != pmd_offset(pud, 0)) 2738c2ecf20Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n", 2748c2ecf20Sopenharmony_ci pmd, pmd_offset(pud, 0)); 2758c2ecf20Sopenharmony_ci } 2768c2ecf20Sopenharmony_ci return pmd_offset(pud, vaddr); 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cistatic pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci if (pmd_none(*pmd)) { 2828c2ecf20Sopenharmony_ci pte_t *pte = (pte_t *) spp_getpage(); 2838c2ecf20Sopenharmony_ci pmd_populate_kernel(&init_mm, pmd, pte); 2848c2ecf20Sopenharmony_ci if (pte != pte_offset_kernel(pmd, 0)) 2858c2ecf20Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #03!\n"); 2868c2ecf20Sopenharmony_ci } 2878c2ecf20Sopenharmony_ci return pte_offset_kernel(pmd, vaddr); 2888c2ecf20Sopenharmony_ci} 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_cistatic void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci pmd_t *pmd = fill_pmd(pud, vaddr); 2938c2ecf20Sopenharmony_ci pte_t *pte = fill_pte(pmd, vaddr); 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci set_pte(pte, new_pte); 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci /* 2988c2ecf20Sopenharmony_ci * It's enough to flush this one mapping. 2998c2ecf20Sopenharmony_ci * (PGE mappings get flushed as well) 3008c2ecf20Sopenharmony_ci */ 3018c2ecf20Sopenharmony_ci flush_tlb_one_kernel(vaddr); 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_civoid set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) 3058c2ecf20Sopenharmony_ci{ 3068c2ecf20Sopenharmony_ci p4d_t *p4d = p4d_page + p4d_index(vaddr); 3078c2ecf20Sopenharmony_ci pud_t *pud = fill_pud(p4d, vaddr); 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci __set_pte_vaddr(pud, vaddr, new_pte); 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_civoid set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) 3138c2ecf20Sopenharmony_ci{ 3148c2ecf20Sopenharmony_ci pud_t *pud = pud_page + pud_index(vaddr); 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci __set_pte_vaddr(pud, vaddr, new_pte); 3178c2ecf20Sopenharmony_ci} 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_civoid set_pte_vaddr(unsigned long vaddr, pte_t pteval) 3208c2ecf20Sopenharmony_ci{ 3218c2ecf20Sopenharmony_ci pgd_t *pgd; 3228c2ecf20Sopenharmony_ci p4d_t *p4d_page; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval)); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci pgd = pgd_offset_k(vaddr); 3278c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) { 3288c2ecf20Sopenharmony_ci printk(KERN_ERR 3298c2ecf20Sopenharmony_ci "PGD FIXMAP MISSING, it should be setup in head.S!\n"); 3308c2ecf20Sopenharmony_ci return; 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci p4d_page = p4d_offset(pgd, 0); 3348c2ecf20Sopenharmony_ci set_pte_vaddr_p4d(p4d_page, vaddr, pteval); 3358c2ecf20Sopenharmony_ci} 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_cipmd_t * __init populate_extra_pmd(unsigned long vaddr) 3388c2ecf20Sopenharmony_ci{ 3398c2ecf20Sopenharmony_ci pgd_t *pgd; 3408c2ecf20Sopenharmony_ci p4d_t *p4d; 3418c2ecf20Sopenharmony_ci pud_t *pud; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci pgd = pgd_offset_k(vaddr); 3448c2ecf20Sopenharmony_ci p4d = fill_p4d(pgd, vaddr); 3458c2ecf20Sopenharmony_ci pud = fill_pud(p4d, vaddr); 3468c2ecf20Sopenharmony_ci return fill_pmd(pud, vaddr); 3478c2ecf20Sopenharmony_ci} 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_cipte_t * __init populate_extra_pte(unsigned long vaddr) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci pmd_t *pmd; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci pmd = populate_extra_pmd(vaddr); 3548c2ecf20Sopenharmony_ci return fill_pte(pmd, vaddr); 3558c2ecf20Sopenharmony_ci} 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci/* 3588c2ecf20Sopenharmony_ci * Create large page table mappings for a range of physical addresses. 3598c2ecf20Sopenharmony_ci */ 3608c2ecf20Sopenharmony_cistatic void __init __init_extra_mapping(unsigned long phys, unsigned long size, 3618c2ecf20Sopenharmony_ci enum page_cache_mode cache) 3628c2ecf20Sopenharmony_ci{ 3638c2ecf20Sopenharmony_ci pgd_t *pgd; 3648c2ecf20Sopenharmony_ci p4d_t *p4d; 3658c2ecf20Sopenharmony_ci pud_t *pud; 3668c2ecf20Sopenharmony_ci pmd_t *pmd; 3678c2ecf20Sopenharmony_ci pgprot_t prot; 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci pgprot_val(prot) = pgprot_val(PAGE_KERNEL_LARGE) | 3708c2ecf20Sopenharmony_ci protval_4k_2_large(cachemode2protval(cache)); 3718c2ecf20Sopenharmony_ci BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); 3728c2ecf20Sopenharmony_ci for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { 3738c2ecf20Sopenharmony_ci pgd = pgd_offset_k((unsigned long)__va(phys)); 3748c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) { 3758c2ecf20Sopenharmony_ci p4d = (p4d_t *) spp_getpage(); 3768c2ecf20Sopenharmony_ci set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE | 3778c2ecf20Sopenharmony_ci _PAGE_USER)); 3788c2ecf20Sopenharmony_ci } 3798c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, (unsigned long)__va(phys)); 3808c2ecf20Sopenharmony_ci if (p4d_none(*p4d)) { 3818c2ecf20Sopenharmony_ci pud = (pud_t *) spp_getpage(); 3828c2ecf20Sopenharmony_ci set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE | 3838c2ecf20Sopenharmony_ci _PAGE_USER)); 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci pud = pud_offset(p4d, (unsigned long)__va(phys)); 3868c2ecf20Sopenharmony_ci if (pud_none(*pud)) { 3878c2ecf20Sopenharmony_ci pmd = (pmd_t *) spp_getpage(); 3888c2ecf20Sopenharmony_ci set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | 3898c2ecf20Sopenharmony_ci _PAGE_USER)); 3908c2ecf20Sopenharmony_ci } 3918c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, phys); 3928c2ecf20Sopenharmony_ci BUG_ON(!pmd_none(*pmd)); 3938c2ecf20Sopenharmony_ci set_pmd(pmd, __pmd(phys | pgprot_val(prot))); 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci} 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_civoid __init init_extra_mapping_wb(unsigned long phys, unsigned long size) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci __init_extra_mapping(phys, size, _PAGE_CACHE_MODE_WB); 4008c2ecf20Sopenharmony_ci} 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_civoid __init init_extra_mapping_uc(unsigned long phys, unsigned long size) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci __init_extra_mapping(phys, size, _PAGE_CACHE_MODE_UC); 4058c2ecf20Sopenharmony_ci} 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci/* 4088c2ecf20Sopenharmony_ci * The head.S code sets up the kernel high mapping: 4098c2ecf20Sopenharmony_ci * 4108c2ecf20Sopenharmony_ci * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) 4118c2ecf20Sopenharmony_ci * 4128c2ecf20Sopenharmony_ci * phys_base holds the negative offset to the kernel, which is added 4138c2ecf20Sopenharmony_ci * to the compile time generated pmds. This results in invalid pmds up 4148c2ecf20Sopenharmony_ci * to the point where we hit the physaddr 0 mapping. 4158c2ecf20Sopenharmony_ci * 4168c2ecf20Sopenharmony_ci * We limit the mappings to the region from _text to _brk_end. _brk_end 4178c2ecf20Sopenharmony_ci * is rounded up to the 2MB boundary. This catches the invalid pmds as 4188c2ecf20Sopenharmony_ci * well, as they are located before _text: 4198c2ecf20Sopenharmony_ci */ 4208c2ecf20Sopenharmony_civoid __init cleanup_highmap(void) 4218c2ecf20Sopenharmony_ci{ 4228c2ecf20Sopenharmony_ci unsigned long vaddr = __START_KERNEL_map; 4238c2ecf20Sopenharmony_ci unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE; 4248c2ecf20Sopenharmony_ci unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; 4258c2ecf20Sopenharmony_ci pmd_t *pmd = level2_kernel_pgt; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci /* 4288c2ecf20Sopenharmony_ci * Native path, max_pfn_mapped is not set yet. 4298c2ecf20Sopenharmony_ci * Xen has valid max_pfn_mapped set in 4308c2ecf20Sopenharmony_ci * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable(). 4318c2ecf20Sopenharmony_ci */ 4328c2ecf20Sopenharmony_ci if (max_pfn_mapped) 4338c2ecf20Sopenharmony_ci vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { 4368c2ecf20Sopenharmony_ci if (pmd_none(*pmd)) 4378c2ecf20Sopenharmony_ci continue; 4388c2ecf20Sopenharmony_ci if (vaddr < (unsigned long) _text || vaddr > end) 4398c2ecf20Sopenharmony_ci set_pmd(pmd, __pmd(0)); 4408c2ecf20Sopenharmony_ci } 4418c2ecf20Sopenharmony_ci} 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci/* 4448c2ecf20Sopenharmony_ci * Create PTE level page table mapping for physical addresses. 4458c2ecf20Sopenharmony_ci * It returns the last physical address mapped. 4468c2ecf20Sopenharmony_ci */ 4478c2ecf20Sopenharmony_cistatic unsigned long __meminit 4488c2ecf20Sopenharmony_ciphys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, 4498c2ecf20Sopenharmony_ci pgprot_t prot, bool init) 4508c2ecf20Sopenharmony_ci{ 4518c2ecf20Sopenharmony_ci unsigned long pages = 0, paddr_next; 4528c2ecf20Sopenharmony_ci unsigned long paddr_last = paddr_end; 4538c2ecf20Sopenharmony_ci pte_t *pte; 4548c2ecf20Sopenharmony_ci int i; 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci pte = pte_page + pte_index(paddr); 4578c2ecf20Sopenharmony_ci i = pte_index(paddr); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) { 4608c2ecf20Sopenharmony_ci paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE; 4618c2ecf20Sopenharmony_ci if (paddr >= paddr_end) { 4628c2ecf20Sopenharmony_ci if (!after_bootmem && 4638c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 4648c2ecf20Sopenharmony_ci E820_TYPE_RAM) && 4658c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 4668c2ecf20Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 4678c2ecf20Sopenharmony_ci set_pte_init(pte, __pte(0), init); 4688c2ecf20Sopenharmony_ci continue; 4698c2ecf20Sopenharmony_ci } 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci /* 4728c2ecf20Sopenharmony_ci * We will re-use the existing mapping. 4738c2ecf20Sopenharmony_ci * Xen for example has some special requirements, like mapping 4748c2ecf20Sopenharmony_ci * pagetable pages as RO. So assume someone who pre-setup 4758c2ecf20Sopenharmony_ci * these mappings are more intelligent. 4768c2ecf20Sopenharmony_ci */ 4778c2ecf20Sopenharmony_ci if (!pte_none(*pte)) { 4788c2ecf20Sopenharmony_ci if (!after_bootmem) 4798c2ecf20Sopenharmony_ci pages++; 4808c2ecf20Sopenharmony_ci continue; 4818c2ecf20Sopenharmony_ci } 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci if (0) 4848c2ecf20Sopenharmony_ci pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, 4858c2ecf20Sopenharmony_ci pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); 4868c2ecf20Sopenharmony_ci pages++; 4878c2ecf20Sopenharmony_ci set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); 4888c2ecf20Sopenharmony_ci paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; 4898c2ecf20Sopenharmony_ci } 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_4K, pages); 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_ci return paddr_last; 4948c2ecf20Sopenharmony_ci} 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci/* 4978c2ecf20Sopenharmony_ci * Create PMD level page table mapping for physical addresses. The virtual 4988c2ecf20Sopenharmony_ci * and physical address have to be aligned at this level. 4998c2ecf20Sopenharmony_ci * It returns the last physical address mapped. 5008c2ecf20Sopenharmony_ci */ 5018c2ecf20Sopenharmony_cistatic unsigned long __meminit 5028c2ecf20Sopenharmony_ciphys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, 5038c2ecf20Sopenharmony_ci unsigned long page_size_mask, pgprot_t prot, bool init) 5048c2ecf20Sopenharmony_ci{ 5058c2ecf20Sopenharmony_ci unsigned long pages = 0, paddr_next; 5068c2ecf20Sopenharmony_ci unsigned long paddr_last = paddr_end; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci int i = pmd_index(paddr); 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_ci for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) { 5118c2ecf20Sopenharmony_ci pmd_t *pmd = pmd_page + pmd_index(paddr); 5128c2ecf20Sopenharmony_ci pte_t *pte; 5138c2ecf20Sopenharmony_ci pgprot_t new_prot = prot; 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci paddr_next = (paddr & PMD_MASK) + PMD_SIZE; 5168c2ecf20Sopenharmony_ci if (paddr >= paddr_end) { 5178c2ecf20Sopenharmony_ci if (!after_bootmem && 5188c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & PMD_MASK, paddr_next, 5198c2ecf20Sopenharmony_ci E820_TYPE_RAM) && 5208c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & PMD_MASK, paddr_next, 5218c2ecf20Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 5228c2ecf20Sopenharmony_ci set_pmd_init(pmd, __pmd(0), init); 5238c2ecf20Sopenharmony_ci continue; 5248c2ecf20Sopenharmony_ci } 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_ci if (!pmd_none(*pmd)) { 5278c2ecf20Sopenharmony_ci if (!pmd_large(*pmd)) { 5288c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 5298c2ecf20Sopenharmony_ci pte = (pte_t *)pmd_page_vaddr(*pmd); 5308c2ecf20Sopenharmony_ci paddr_last = phys_pte_init(pte, paddr, 5318c2ecf20Sopenharmony_ci paddr_end, prot, 5328c2ecf20Sopenharmony_ci init); 5338c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 5348c2ecf20Sopenharmony_ci continue; 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci /* 5378c2ecf20Sopenharmony_ci * If we are ok with PG_LEVEL_2M mapping, then we will 5388c2ecf20Sopenharmony_ci * use the existing mapping, 5398c2ecf20Sopenharmony_ci * 5408c2ecf20Sopenharmony_ci * Otherwise, we will split the large page mapping but 5418c2ecf20Sopenharmony_ci * use the same existing protection bits except for 5428c2ecf20Sopenharmony_ci * large page, so that we don't violate Intel's TLB 5438c2ecf20Sopenharmony_ci * Application note (317080) which says, while changing 5448c2ecf20Sopenharmony_ci * the page sizes, new and old translations should 5458c2ecf20Sopenharmony_ci * not differ with respect to page frame and 5468c2ecf20Sopenharmony_ci * attributes. 5478c2ecf20Sopenharmony_ci */ 5488c2ecf20Sopenharmony_ci if (page_size_mask & (1 << PG_LEVEL_2M)) { 5498c2ecf20Sopenharmony_ci if (!after_bootmem) 5508c2ecf20Sopenharmony_ci pages++; 5518c2ecf20Sopenharmony_ci paddr_last = paddr_next; 5528c2ecf20Sopenharmony_ci continue; 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); 5558c2ecf20Sopenharmony_ci } 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci if (page_size_mask & (1<<PG_LEVEL_2M)) { 5588c2ecf20Sopenharmony_ci pages++; 5598c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 5608c2ecf20Sopenharmony_ci set_pte_init((pte_t *)pmd, 5618c2ecf20Sopenharmony_ci pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, 5628c2ecf20Sopenharmony_ci __pgprot(pgprot_val(prot) | _PAGE_PSE)), 5638c2ecf20Sopenharmony_ci init); 5648c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 5658c2ecf20Sopenharmony_ci paddr_last = paddr_next; 5668c2ecf20Sopenharmony_ci continue; 5678c2ecf20Sopenharmony_ci } 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci pte = alloc_low_page(); 5708c2ecf20Sopenharmony_ci paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init); 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 5738c2ecf20Sopenharmony_ci pmd_populate_kernel_init(&init_mm, pmd, pte, init); 5748c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 5758c2ecf20Sopenharmony_ci } 5768c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_2M, pages); 5778c2ecf20Sopenharmony_ci return paddr_last; 5788c2ecf20Sopenharmony_ci} 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci/* 5818c2ecf20Sopenharmony_ci * Create PUD level page table mapping for physical addresses. The virtual 5828c2ecf20Sopenharmony_ci * and physical address do not have to be aligned at this level. KASLR can 5838c2ecf20Sopenharmony_ci * randomize virtual addresses up to this level. 5848c2ecf20Sopenharmony_ci * It returns the last physical address mapped. 5858c2ecf20Sopenharmony_ci */ 5868c2ecf20Sopenharmony_cistatic unsigned long __meminit 5878c2ecf20Sopenharmony_ciphys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, 5888c2ecf20Sopenharmony_ci unsigned long page_size_mask, pgprot_t _prot, bool init) 5898c2ecf20Sopenharmony_ci{ 5908c2ecf20Sopenharmony_ci unsigned long pages = 0, paddr_next; 5918c2ecf20Sopenharmony_ci unsigned long paddr_last = paddr_end; 5928c2ecf20Sopenharmony_ci unsigned long vaddr = (unsigned long)__va(paddr); 5938c2ecf20Sopenharmony_ci int i = pud_index(vaddr); 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { 5968c2ecf20Sopenharmony_ci pud_t *pud; 5978c2ecf20Sopenharmony_ci pmd_t *pmd; 5988c2ecf20Sopenharmony_ci pgprot_t prot = _prot; 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci vaddr = (unsigned long)__va(paddr); 6018c2ecf20Sopenharmony_ci pud = pud_page + pud_index(vaddr); 6028c2ecf20Sopenharmony_ci paddr_next = (paddr & PUD_MASK) + PUD_SIZE; 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci if (paddr >= paddr_end) { 6058c2ecf20Sopenharmony_ci if (!after_bootmem && 6068c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & PUD_MASK, paddr_next, 6078c2ecf20Sopenharmony_ci E820_TYPE_RAM) && 6088c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & PUD_MASK, paddr_next, 6098c2ecf20Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 6108c2ecf20Sopenharmony_ci set_pud_init(pud, __pud(0), init); 6118c2ecf20Sopenharmony_ci continue; 6128c2ecf20Sopenharmony_ci } 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci if (!pud_none(*pud)) { 6158c2ecf20Sopenharmony_ci if (!pud_large(*pud)) { 6168c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, 0); 6178c2ecf20Sopenharmony_ci paddr_last = phys_pmd_init(pmd, paddr, 6188c2ecf20Sopenharmony_ci paddr_end, 6198c2ecf20Sopenharmony_ci page_size_mask, 6208c2ecf20Sopenharmony_ci prot, init); 6218c2ecf20Sopenharmony_ci continue; 6228c2ecf20Sopenharmony_ci } 6238c2ecf20Sopenharmony_ci /* 6248c2ecf20Sopenharmony_ci * If we are ok with PG_LEVEL_1G mapping, then we will 6258c2ecf20Sopenharmony_ci * use the existing mapping. 6268c2ecf20Sopenharmony_ci * 6278c2ecf20Sopenharmony_ci * Otherwise, we will split the gbpage mapping but use 6288c2ecf20Sopenharmony_ci * the same existing protection bits except for large 6298c2ecf20Sopenharmony_ci * page, so that we don't violate Intel's TLB 6308c2ecf20Sopenharmony_ci * Application note (317080) which says, while changing 6318c2ecf20Sopenharmony_ci * the page sizes, new and old translations should 6328c2ecf20Sopenharmony_ci * not differ with respect to page frame and 6338c2ecf20Sopenharmony_ci * attributes. 6348c2ecf20Sopenharmony_ci */ 6358c2ecf20Sopenharmony_ci if (page_size_mask & (1 << PG_LEVEL_1G)) { 6368c2ecf20Sopenharmony_ci if (!after_bootmem) 6378c2ecf20Sopenharmony_ci pages++; 6388c2ecf20Sopenharmony_ci paddr_last = paddr_next; 6398c2ecf20Sopenharmony_ci continue; 6408c2ecf20Sopenharmony_ci } 6418c2ecf20Sopenharmony_ci prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); 6428c2ecf20Sopenharmony_ci } 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci if (page_size_mask & (1<<PG_LEVEL_1G)) { 6458c2ecf20Sopenharmony_ci pages++; 6468c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci set_pte_init((pte_t *)pud, 6518c2ecf20Sopenharmony_ci pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, 6528c2ecf20Sopenharmony_ci prot), 6538c2ecf20Sopenharmony_ci init); 6548c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 6558c2ecf20Sopenharmony_ci paddr_last = paddr_next; 6568c2ecf20Sopenharmony_ci continue; 6578c2ecf20Sopenharmony_ci } 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci pmd = alloc_low_page(); 6608c2ecf20Sopenharmony_ci paddr_last = phys_pmd_init(pmd, paddr, paddr_end, 6618c2ecf20Sopenharmony_ci page_size_mask, prot, init); 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 6648c2ecf20Sopenharmony_ci pud_populate_init(&init_mm, pud, pmd, init); 6658c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 6668c2ecf20Sopenharmony_ci } 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_1G, pages); 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci return paddr_last; 6718c2ecf20Sopenharmony_ci} 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_cistatic unsigned long __meminit 6748c2ecf20Sopenharmony_ciphys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, 6758c2ecf20Sopenharmony_ci unsigned long page_size_mask, pgprot_t prot, bool init) 6768c2ecf20Sopenharmony_ci{ 6778c2ecf20Sopenharmony_ci unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci paddr_last = paddr_end; 6808c2ecf20Sopenharmony_ci vaddr = (unsigned long)__va(paddr); 6818c2ecf20Sopenharmony_ci vaddr_end = (unsigned long)__va(paddr_end); 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci if (!pgtable_l5_enabled()) 6848c2ecf20Sopenharmony_ci return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, 6858c2ecf20Sopenharmony_ci page_size_mask, prot, init); 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci for (; vaddr < vaddr_end; vaddr = vaddr_next) { 6888c2ecf20Sopenharmony_ci p4d_t *p4d = p4d_page + p4d_index(vaddr); 6898c2ecf20Sopenharmony_ci pud_t *pud; 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; 6928c2ecf20Sopenharmony_ci paddr = __pa(vaddr); 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_ci if (paddr >= paddr_end) { 6958c2ecf20Sopenharmony_ci paddr_next = __pa(vaddr_next); 6968c2ecf20Sopenharmony_ci if (!after_bootmem && 6978c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & P4D_MASK, paddr_next, 6988c2ecf20Sopenharmony_ci E820_TYPE_RAM) && 6998c2ecf20Sopenharmony_ci !e820__mapped_any(paddr & P4D_MASK, paddr_next, 7008c2ecf20Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 7018c2ecf20Sopenharmony_ci set_p4d_init(p4d, __p4d(0), init); 7028c2ecf20Sopenharmony_ci continue; 7038c2ecf20Sopenharmony_ci } 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci if (!p4d_none(*p4d)) { 7068c2ecf20Sopenharmony_ci pud = pud_offset(p4d, 0); 7078c2ecf20Sopenharmony_ci paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), 7088c2ecf20Sopenharmony_ci page_size_mask, prot, init); 7098c2ecf20Sopenharmony_ci continue; 7108c2ecf20Sopenharmony_ci } 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci pud = alloc_low_page(); 7138c2ecf20Sopenharmony_ci paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), 7148c2ecf20Sopenharmony_ci page_size_mask, prot, init); 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 7178c2ecf20Sopenharmony_ci p4d_populate_init(&init_mm, p4d, pud, init); 7188c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 7198c2ecf20Sopenharmony_ci } 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_ci return paddr_last; 7228c2ecf20Sopenharmony_ci} 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_cistatic unsigned long __meminit 7258c2ecf20Sopenharmony_ci__kernel_physical_mapping_init(unsigned long paddr_start, 7268c2ecf20Sopenharmony_ci unsigned long paddr_end, 7278c2ecf20Sopenharmony_ci unsigned long page_size_mask, 7288c2ecf20Sopenharmony_ci pgprot_t prot, bool init) 7298c2ecf20Sopenharmony_ci{ 7308c2ecf20Sopenharmony_ci bool pgd_changed = false; 7318c2ecf20Sopenharmony_ci unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci paddr_last = paddr_end; 7348c2ecf20Sopenharmony_ci vaddr = (unsigned long)__va(paddr_start); 7358c2ecf20Sopenharmony_ci vaddr_end = (unsigned long)__va(paddr_end); 7368c2ecf20Sopenharmony_ci vaddr_start = vaddr; 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci for (; vaddr < vaddr_end; vaddr = vaddr_next) { 7398c2ecf20Sopenharmony_ci pgd_t *pgd = pgd_offset_k(vaddr); 7408c2ecf20Sopenharmony_ci p4d_t *p4d; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci if (pgd_val(*pgd)) { 7458c2ecf20Sopenharmony_ci p4d = (p4d_t *)pgd_page_vaddr(*pgd); 7468c2ecf20Sopenharmony_ci paddr_last = phys_p4d_init(p4d, __pa(vaddr), 7478c2ecf20Sopenharmony_ci __pa(vaddr_end), 7488c2ecf20Sopenharmony_ci page_size_mask, 7498c2ecf20Sopenharmony_ci prot, init); 7508c2ecf20Sopenharmony_ci continue; 7518c2ecf20Sopenharmony_ci } 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci p4d = alloc_low_page(); 7548c2ecf20Sopenharmony_ci paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), 7558c2ecf20Sopenharmony_ci page_size_mask, prot, init); 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 7588c2ecf20Sopenharmony_ci if (pgtable_l5_enabled()) 7598c2ecf20Sopenharmony_ci pgd_populate_init(&init_mm, pgd, p4d, init); 7608c2ecf20Sopenharmony_ci else 7618c2ecf20Sopenharmony_ci p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), 7628c2ecf20Sopenharmony_ci (pud_t *) p4d, init); 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 7658c2ecf20Sopenharmony_ci pgd_changed = true; 7668c2ecf20Sopenharmony_ci } 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci if (pgd_changed) 7698c2ecf20Sopenharmony_ci sync_global_pgds(vaddr_start, vaddr_end - 1); 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci return paddr_last; 7728c2ecf20Sopenharmony_ci} 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci/* 7768c2ecf20Sopenharmony_ci * Create page table mapping for the physical memory for specific physical 7778c2ecf20Sopenharmony_ci * addresses. Note that it can only be used to populate non-present entries. 7788c2ecf20Sopenharmony_ci * The virtual and physical addresses have to be aligned on PMD level 7798c2ecf20Sopenharmony_ci * down. It returns the last physical address mapped. 7808c2ecf20Sopenharmony_ci */ 7818c2ecf20Sopenharmony_ciunsigned long __meminit 7828c2ecf20Sopenharmony_cikernel_physical_mapping_init(unsigned long paddr_start, 7838c2ecf20Sopenharmony_ci unsigned long paddr_end, 7848c2ecf20Sopenharmony_ci unsigned long page_size_mask, pgprot_t prot) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci return __kernel_physical_mapping_init(paddr_start, paddr_end, 7878c2ecf20Sopenharmony_ci page_size_mask, prot, true); 7888c2ecf20Sopenharmony_ci} 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci/* 7918c2ecf20Sopenharmony_ci * This function is similar to kernel_physical_mapping_init() above with the 7928c2ecf20Sopenharmony_ci * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe() 7938c2ecf20Sopenharmony_ci * when updating the mapping. The caller is responsible to flush the TLBs after 7948c2ecf20Sopenharmony_ci * the function returns. 7958c2ecf20Sopenharmony_ci */ 7968c2ecf20Sopenharmony_ciunsigned long __meminit 7978c2ecf20Sopenharmony_cikernel_physical_mapping_change(unsigned long paddr_start, 7988c2ecf20Sopenharmony_ci unsigned long paddr_end, 7998c2ecf20Sopenharmony_ci unsigned long page_size_mask) 8008c2ecf20Sopenharmony_ci{ 8018c2ecf20Sopenharmony_ci return __kernel_physical_mapping_init(paddr_start, paddr_end, 8028c2ecf20Sopenharmony_ci page_size_mask, PAGE_KERNEL, 8038c2ecf20Sopenharmony_ci false); 8048c2ecf20Sopenharmony_ci} 8058c2ecf20Sopenharmony_ci 8068c2ecf20Sopenharmony_ci#ifndef CONFIG_NUMA 8078c2ecf20Sopenharmony_civoid __init initmem_init(void) 8088c2ecf20Sopenharmony_ci{ 8098c2ecf20Sopenharmony_ci memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); 8108c2ecf20Sopenharmony_ci} 8118c2ecf20Sopenharmony_ci#endif 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_civoid __init paging_init(void) 8148c2ecf20Sopenharmony_ci{ 8158c2ecf20Sopenharmony_ci sparse_init(); 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_ci /* 8188c2ecf20Sopenharmony_ci * clear the default setting with node 0 8198c2ecf20Sopenharmony_ci * note: don't use nodes_clear here, that is really clearing when 8208c2ecf20Sopenharmony_ci * numa support is not compiled in, and later node_set_state 8218c2ecf20Sopenharmony_ci * will not set it back. 8228c2ecf20Sopenharmony_ci */ 8238c2ecf20Sopenharmony_ci node_clear_state(0, N_MEMORY); 8248c2ecf20Sopenharmony_ci node_clear_state(0, N_NORMAL_MEMORY); 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci zone_sizes_init(); 8278c2ecf20Sopenharmony_ci} 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci/* 8308c2ecf20Sopenharmony_ci * Memory hotplug specific functions 8318c2ecf20Sopenharmony_ci */ 8328c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 8338c2ecf20Sopenharmony_ci/* 8348c2ecf20Sopenharmony_ci * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need 8358c2ecf20Sopenharmony_ci * updating. 8368c2ecf20Sopenharmony_ci */ 8378c2ecf20Sopenharmony_cistatic void update_end_of_memory_vars(u64 start, u64 size) 8388c2ecf20Sopenharmony_ci{ 8398c2ecf20Sopenharmony_ci unsigned long end_pfn = PFN_UP(start + size); 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci if (end_pfn > max_pfn) { 8428c2ecf20Sopenharmony_ci max_pfn = end_pfn; 8438c2ecf20Sopenharmony_ci max_low_pfn = end_pfn; 8448c2ecf20Sopenharmony_ci high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; 8458c2ecf20Sopenharmony_ci } 8468c2ecf20Sopenharmony_ci} 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ciint add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, 8498c2ecf20Sopenharmony_ci struct mhp_params *params) 8508c2ecf20Sopenharmony_ci{ 8518c2ecf20Sopenharmony_ci int ret; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci ret = __add_pages(nid, start_pfn, nr_pages, params); 8548c2ecf20Sopenharmony_ci WARN_ON_ONCE(ret); 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci /* update max_pfn, max_low_pfn and high_memory */ 8578c2ecf20Sopenharmony_ci update_end_of_memory_vars(start_pfn << PAGE_SHIFT, 8588c2ecf20Sopenharmony_ci nr_pages << PAGE_SHIFT); 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci return ret; 8618c2ecf20Sopenharmony_ci} 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ciint arch_add_memory(int nid, u64 start, u64 size, 8648c2ecf20Sopenharmony_ci struct mhp_params *params) 8658c2ecf20Sopenharmony_ci{ 8668c2ecf20Sopenharmony_ci unsigned long start_pfn = start >> PAGE_SHIFT; 8678c2ecf20Sopenharmony_ci unsigned long nr_pages = size >> PAGE_SHIFT; 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci init_memory_mapping(start, start + size, params->pgprot); 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci return add_pages(nid, start_pfn, nr_pages, params); 8728c2ecf20Sopenharmony_ci} 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci#define PAGE_INUSE 0xFD 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_cistatic void __meminit free_pagetable(struct page *page, int order) 8778c2ecf20Sopenharmony_ci{ 8788c2ecf20Sopenharmony_ci unsigned long magic; 8798c2ecf20Sopenharmony_ci unsigned int nr_pages = 1 << order; 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci /* bootmem page has reserved flag */ 8828c2ecf20Sopenharmony_ci if (PageReserved(page)) { 8838c2ecf20Sopenharmony_ci __ClearPageReserved(page); 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci magic = (unsigned long)page->freelist; 8868c2ecf20Sopenharmony_ci if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { 8878c2ecf20Sopenharmony_ci while (nr_pages--) 8888c2ecf20Sopenharmony_ci put_page_bootmem(page++); 8898c2ecf20Sopenharmony_ci } else 8908c2ecf20Sopenharmony_ci while (nr_pages--) 8918c2ecf20Sopenharmony_ci free_reserved_page(page++); 8928c2ecf20Sopenharmony_ci } else 8938c2ecf20Sopenharmony_ci free_pages((unsigned long)page_address(page), order); 8948c2ecf20Sopenharmony_ci} 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_cistatic void __meminit free_hugepage_table(struct page *page, 8978c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 8988c2ecf20Sopenharmony_ci{ 8998c2ecf20Sopenharmony_ci if (altmap) 9008c2ecf20Sopenharmony_ci vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); 9018c2ecf20Sopenharmony_ci else 9028c2ecf20Sopenharmony_ci free_pagetable(page, get_order(PMD_SIZE)); 9038c2ecf20Sopenharmony_ci} 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_cistatic void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) 9068c2ecf20Sopenharmony_ci{ 9078c2ecf20Sopenharmony_ci pte_t *pte; 9088c2ecf20Sopenharmony_ci int i; 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci for (i = 0; i < PTRS_PER_PTE; i++) { 9118c2ecf20Sopenharmony_ci pte = pte_start + i; 9128c2ecf20Sopenharmony_ci if (!pte_none(*pte)) 9138c2ecf20Sopenharmony_ci return; 9148c2ecf20Sopenharmony_ci } 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci /* free a pte talbe */ 9178c2ecf20Sopenharmony_ci free_pagetable(pmd_page(*pmd), 0); 9188c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 9198c2ecf20Sopenharmony_ci pmd_clear(pmd); 9208c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 9218c2ecf20Sopenharmony_ci} 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_cistatic void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) 9248c2ecf20Sopenharmony_ci{ 9258c2ecf20Sopenharmony_ci pmd_t *pmd; 9268c2ecf20Sopenharmony_ci int i; 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; i++) { 9298c2ecf20Sopenharmony_ci pmd = pmd_start + i; 9308c2ecf20Sopenharmony_ci if (!pmd_none(*pmd)) 9318c2ecf20Sopenharmony_ci return; 9328c2ecf20Sopenharmony_ci } 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci /* free a pmd talbe */ 9358c2ecf20Sopenharmony_ci free_pagetable(pud_page(*pud), 0); 9368c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 9378c2ecf20Sopenharmony_ci pud_clear(pud); 9388c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 9398c2ecf20Sopenharmony_ci} 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_cistatic void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) 9428c2ecf20Sopenharmony_ci{ 9438c2ecf20Sopenharmony_ci pud_t *pud; 9448c2ecf20Sopenharmony_ci int i; 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ci for (i = 0; i < PTRS_PER_PUD; i++) { 9478c2ecf20Sopenharmony_ci pud = pud_start + i; 9488c2ecf20Sopenharmony_ci if (!pud_none(*pud)) 9498c2ecf20Sopenharmony_ci return; 9508c2ecf20Sopenharmony_ci } 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci /* free a pud talbe */ 9538c2ecf20Sopenharmony_ci free_pagetable(p4d_page(*p4d), 0); 9548c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 9558c2ecf20Sopenharmony_ci p4d_clear(p4d); 9568c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 9578c2ecf20Sopenharmony_ci} 9588c2ecf20Sopenharmony_ci 9598c2ecf20Sopenharmony_cistatic void __meminit 9608c2ecf20Sopenharmony_ciremove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, 9618c2ecf20Sopenharmony_ci bool direct) 9628c2ecf20Sopenharmony_ci{ 9638c2ecf20Sopenharmony_ci unsigned long next, pages = 0; 9648c2ecf20Sopenharmony_ci pte_t *pte; 9658c2ecf20Sopenharmony_ci void *page_addr; 9668c2ecf20Sopenharmony_ci phys_addr_t phys_addr; 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci pte = pte_start + pte_index(addr); 9698c2ecf20Sopenharmony_ci for (; addr < end; addr = next, pte++) { 9708c2ecf20Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 9718c2ecf20Sopenharmony_ci if (next > end) 9728c2ecf20Sopenharmony_ci next = end; 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci if (!pte_present(*pte)) 9758c2ecf20Sopenharmony_ci continue; 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci /* 9788c2ecf20Sopenharmony_ci * We mapped [0,1G) memory as identity mapping when 9798c2ecf20Sopenharmony_ci * initializing, in arch/x86/kernel/head_64.S. These 9808c2ecf20Sopenharmony_ci * pagetables cannot be removed. 9818c2ecf20Sopenharmony_ci */ 9828c2ecf20Sopenharmony_ci phys_addr = pte_val(*pte) + (addr & PAGE_MASK); 9838c2ecf20Sopenharmony_ci if (phys_addr < (phys_addr_t)0x40000000) 9848c2ecf20Sopenharmony_ci return; 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { 9878c2ecf20Sopenharmony_ci /* 9888c2ecf20Sopenharmony_ci * Do not free direct mapping pages since they were 9898c2ecf20Sopenharmony_ci * freed when offlining, or simplely not in use. 9908c2ecf20Sopenharmony_ci */ 9918c2ecf20Sopenharmony_ci if (!direct) 9928c2ecf20Sopenharmony_ci free_pagetable(pte_page(*pte), 0); 9938c2ecf20Sopenharmony_ci 9948c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 9958c2ecf20Sopenharmony_ci pte_clear(&init_mm, addr, pte); 9968c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci /* For non-direct mapping, pages means nothing. */ 9998c2ecf20Sopenharmony_ci pages++; 10008c2ecf20Sopenharmony_ci } else { 10018c2ecf20Sopenharmony_ci /* 10028c2ecf20Sopenharmony_ci * If we are here, we are freeing vmemmap pages since 10038c2ecf20Sopenharmony_ci * direct mapped memory ranges to be freed are aligned. 10048c2ecf20Sopenharmony_ci * 10058c2ecf20Sopenharmony_ci * If we are not removing the whole page, it means 10068c2ecf20Sopenharmony_ci * other page structs in this page are being used and 10078c2ecf20Sopenharmony_ci * we canot remove them. So fill the unused page_structs 10088c2ecf20Sopenharmony_ci * with 0xFD, and remove the page when it is wholly 10098c2ecf20Sopenharmony_ci * filled with 0xFD. 10108c2ecf20Sopenharmony_ci */ 10118c2ecf20Sopenharmony_ci memset((void *)addr, PAGE_INUSE, next - addr); 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci page_addr = page_address(pte_page(*pte)); 10148c2ecf20Sopenharmony_ci if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { 10158c2ecf20Sopenharmony_ci free_pagetable(pte_page(*pte), 0); 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 10188c2ecf20Sopenharmony_ci pte_clear(&init_mm, addr, pte); 10198c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 10208c2ecf20Sopenharmony_ci } 10218c2ecf20Sopenharmony_ci } 10228c2ecf20Sopenharmony_ci } 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ci /* Call free_pte_table() in remove_pmd_table(). */ 10258c2ecf20Sopenharmony_ci flush_tlb_all(); 10268c2ecf20Sopenharmony_ci if (direct) 10278c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_4K, -pages); 10288c2ecf20Sopenharmony_ci} 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_cistatic void __meminit 10318c2ecf20Sopenharmony_ciremove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, 10328c2ecf20Sopenharmony_ci bool direct, struct vmem_altmap *altmap) 10338c2ecf20Sopenharmony_ci{ 10348c2ecf20Sopenharmony_ci unsigned long next, pages = 0; 10358c2ecf20Sopenharmony_ci pte_t *pte_base; 10368c2ecf20Sopenharmony_ci pmd_t *pmd; 10378c2ecf20Sopenharmony_ci void *page_addr; 10388c2ecf20Sopenharmony_ci 10398c2ecf20Sopenharmony_ci pmd = pmd_start + pmd_index(addr); 10408c2ecf20Sopenharmony_ci for (; addr < end; addr = next, pmd++) { 10418c2ecf20Sopenharmony_ci next = pmd_addr_end(addr, end); 10428c2ecf20Sopenharmony_ci 10438c2ecf20Sopenharmony_ci if (!pmd_present(*pmd)) 10448c2ecf20Sopenharmony_ci continue; 10458c2ecf20Sopenharmony_ci 10468c2ecf20Sopenharmony_ci if (pmd_large(*pmd)) { 10478c2ecf20Sopenharmony_ci if (IS_ALIGNED(addr, PMD_SIZE) && 10488c2ecf20Sopenharmony_ci IS_ALIGNED(next, PMD_SIZE)) { 10498c2ecf20Sopenharmony_ci if (!direct) 10508c2ecf20Sopenharmony_ci free_hugepage_table(pmd_page(*pmd), 10518c2ecf20Sopenharmony_ci altmap); 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 10548c2ecf20Sopenharmony_ci pmd_clear(pmd); 10558c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 10568c2ecf20Sopenharmony_ci pages++; 10578c2ecf20Sopenharmony_ci } else { 10588c2ecf20Sopenharmony_ci /* If here, we are freeing vmemmap pages. */ 10598c2ecf20Sopenharmony_ci memset((void *)addr, PAGE_INUSE, next - addr); 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci page_addr = page_address(pmd_page(*pmd)); 10628c2ecf20Sopenharmony_ci if (!memchr_inv(page_addr, PAGE_INUSE, 10638c2ecf20Sopenharmony_ci PMD_SIZE)) { 10648c2ecf20Sopenharmony_ci free_hugepage_table(pmd_page(*pmd), 10658c2ecf20Sopenharmony_ci altmap); 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 10688c2ecf20Sopenharmony_ci pmd_clear(pmd); 10698c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 10708c2ecf20Sopenharmony_ci } 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci continue; 10748c2ecf20Sopenharmony_ci } 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci pte_base = (pte_t *)pmd_page_vaddr(*pmd); 10778c2ecf20Sopenharmony_ci remove_pte_table(pte_base, addr, next, direct); 10788c2ecf20Sopenharmony_ci free_pte_table(pte_base, pmd); 10798c2ecf20Sopenharmony_ci } 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci /* Call free_pmd_table() in remove_pud_table(). */ 10828c2ecf20Sopenharmony_ci if (direct) 10838c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_2M, -pages); 10848c2ecf20Sopenharmony_ci} 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_cistatic void __meminit 10878c2ecf20Sopenharmony_ciremove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, 10888c2ecf20Sopenharmony_ci struct vmem_altmap *altmap, bool direct) 10898c2ecf20Sopenharmony_ci{ 10908c2ecf20Sopenharmony_ci unsigned long next, pages = 0; 10918c2ecf20Sopenharmony_ci pmd_t *pmd_base; 10928c2ecf20Sopenharmony_ci pud_t *pud; 10938c2ecf20Sopenharmony_ci void *page_addr; 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci pud = pud_start + pud_index(addr); 10968c2ecf20Sopenharmony_ci for (; addr < end; addr = next, pud++) { 10978c2ecf20Sopenharmony_ci next = pud_addr_end(addr, end); 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci if (!pud_present(*pud)) 11008c2ecf20Sopenharmony_ci continue; 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci if (pud_large(*pud)) { 11038c2ecf20Sopenharmony_ci if (IS_ALIGNED(addr, PUD_SIZE) && 11048c2ecf20Sopenharmony_ci IS_ALIGNED(next, PUD_SIZE)) { 11058c2ecf20Sopenharmony_ci if (!direct) 11068c2ecf20Sopenharmony_ci free_pagetable(pud_page(*pud), 11078c2ecf20Sopenharmony_ci get_order(PUD_SIZE)); 11088c2ecf20Sopenharmony_ci 11098c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 11108c2ecf20Sopenharmony_ci pud_clear(pud); 11118c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 11128c2ecf20Sopenharmony_ci pages++; 11138c2ecf20Sopenharmony_ci } else { 11148c2ecf20Sopenharmony_ci /* If here, we are freeing vmemmap pages. */ 11158c2ecf20Sopenharmony_ci memset((void *)addr, PAGE_INUSE, next - addr); 11168c2ecf20Sopenharmony_ci 11178c2ecf20Sopenharmony_ci page_addr = page_address(pud_page(*pud)); 11188c2ecf20Sopenharmony_ci if (!memchr_inv(page_addr, PAGE_INUSE, 11198c2ecf20Sopenharmony_ci PUD_SIZE)) { 11208c2ecf20Sopenharmony_ci free_pagetable(pud_page(*pud), 11218c2ecf20Sopenharmony_ci get_order(PUD_SIZE)); 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 11248c2ecf20Sopenharmony_ci pud_clear(pud); 11258c2ecf20Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 11268c2ecf20Sopenharmony_ci } 11278c2ecf20Sopenharmony_ci } 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci continue; 11308c2ecf20Sopenharmony_ci } 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci pmd_base = pmd_offset(pud, 0); 11338c2ecf20Sopenharmony_ci remove_pmd_table(pmd_base, addr, next, direct, altmap); 11348c2ecf20Sopenharmony_ci free_pmd_table(pmd_base, pud); 11358c2ecf20Sopenharmony_ci } 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci if (direct) 11388c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_1G, -pages); 11398c2ecf20Sopenharmony_ci} 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_cistatic void __meminit 11428c2ecf20Sopenharmony_ciremove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, 11438c2ecf20Sopenharmony_ci struct vmem_altmap *altmap, bool direct) 11448c2ecf20Sopenharmony_ci{ 11458c2ecf20Sopenharmony_ci unsigned long next, pages = 0; 11468c2ecf20Sopenharmony_ci pud_t *pud_base; 11478c2ecf20Sopenharmony_ci p4d_t *p4d; 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci p4d = p4d_start + p4d_index(addr); 11508c2ecf20Sopenharmony_ci for (; addr < end; addr = next, p4d++) { 11518c2ecf20Sopenharmony_ci next = p4d_addr_end(addr, end); 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_ci if (!p4d_present(*p4d)) 11548c2ecf20Sopenharmony_ci continue; 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci BUILD_BUG_ON(p4d_large(*p4d)); 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci pud_base = pud_offset(p4d, 0); 11598c2ecf20Sopenharmony_ci remove_pud_table(pud_base, addr, next, altmap, direct); 11608c2ecf20Sopenharmony_ci /* 11618c2ecf20Sopenharmony_ci * For 4-level page tables we do not want to free PUDs, but in the 11628c2ecf20Sopenharmony_ci * 5-level case we should free them. This code will have to change 11638c2ecf20Sopenharmony_ci * to adapt for boot-time switching between 4 and 5 level page tables. 11648c2ecf20Sopenharmony_ci */ 11658c2ecf20Sopenharmony_ci if (pgtable_l5_enabled()) 11668c2ecf20Sopenharmony_ci free_pud_table(pud_base, p4d); 11678c2ecf20Sopenharmony_ci } 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci if (direct) 11708c2ecf20Sopenharmony_ci update_page_count(PG_LEVEL_512G, -pages); 11718c2ecf20Sopenharmony_ci} 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ci/* start and end are both virtual address. */ 11748c2ecf20Sopenharmony_cistatic void __meminit 11758c2ecf20Sopenharmony_ciremove_pagetable(unsigned long start, unsigned long end, bool direct, 11768c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 11778c2ecf20Sopenharmony_ci{ 11788c2ecf20Sopenharmony_ci unsigned long next; 11798c2ecf20Sopenharmony_ci unsigned long addr; 11808c2ecf20Sopenharmony_ci pgd_t *pgd; 11818c2ecf20Sopenharmony_ci p4d_t *p4d; 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci for (addr = start; addr < end; addr = next) { 11848c2ecf20Sopenharmony_ci next = pgd_addr_end(addr, end); 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci pgd = pgd_offset_k(addr); 11878c2ecf20Sopenharmony_ci if (!pgd_present(*pgd)) 11888c2ecf20Sopenharmony_ci continue; 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, 0); 11918c2ecf20Sopenharmony_ci remove_p4d_table(p4d, addr, next, altmap, direct); 11928c2ecf20Sopenharmony_ci } 11938c2ecf20Sopenharmony_ci 11948c2ecf20Sopenharmony_ci flush_tlb_all(); 11958c2ecf20Sopenharmony_ci} 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_civoid __ref vmemmap_free(unsigned long start, unsigned long end, 11988c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 11998c2ecf20Sopenharmony_ci{ 12008c2ecf20Sopenharmony_ci remove_pagetable(start, end, false, altmap); 12018c2ecf20Sopenharmony_ci} 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_cistatic void __meminit 12048c2ecf20Sopenharmony_cikernel_physical_mapping_remove(unsigned long start, unsigned long end) 12058c2ecf20Sopenharmony_ci{ 12068c2ecf20Sopenharmony_ci start = (unsigned long)__va(start); 12078c2ecf20Sopenharmony_ci end = (unsigned long)__va(end); 12088c2ecf20Sopenharmony_ci 12098c2ecf20Sopenharmony_ci remove_pagetable(start, end, true, NULL); 12108c2ecf20Sopenharmony_ci} 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_civoid __ref arch_remove_memory(int nid, u64 start, u64 size, 12138c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 12148c2ecf20Sopenharmony_ci{ 12158c2ecf20Sopenharmony_ci unsigned long start_pfn = start >> PAGE_SHIFT; 12168c2ecf20Sopenharmony_ci unsigned long nr_pages = size >> PAGE_SHIFT; 12178c2ecf20Sopenharmony_ci 12188c2ecf20Sopenharmony_ci __remove_pages(start_pfn, nr_pages, altmap); 12198c2ecf20Sopenharmony_ci kernel_physical_mapping_remove(start, start + size); 12208c2ecf20Sopenharmony_ci} 12218c2ecf20Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */ 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_cistatic struct kcore_list kcore_vsyscall; 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_cistatic void __init register_page_bootmem_info(void) 12268c2ecf20Sopenharmony_ci{ 12278c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA 12288c2ecf20Sopenharmony_ci int i; 12298c2ecf20Sopenharmony_ci 12308c2ecf20Sopenharmony_ci for_each_online_node(i) 12318c2ecf20Sopenharmony_ci register_page_bootmem_info_node(NODE_DATA(i)); 12328c2ecf20Sopenharmony_ci#endif 12338c2ecf20Sopenharmony_ci} 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ci/* 12368c2ecf20Sopenharmony_ci * Pre-allocates page-table pages for the vmalloc area in the kernel page-table. 12378c2ecf20Sopenharmony_ci * Only the level which needs to be synchronized between all page-tables is 12388c2ecf20Sopenharmony_ci * allocated because the synchronization can be expensive. 12398c2ecf20Sopenharmony_ci */ 12408c2ecf20Sopenharmony_cistatic void __init preallocate_vmalloc_pages(void) 12418c2ecf20Sopenharmony_ci{ 12428c2ecf20Sopenharmony_ci unsigned long addr; 12438c2ecf20Sopenharmony_ci const char *lvl; 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 12468c2ecf20Sopenharmony_ci pgd_t *pgd = pgd_offset_k(addr); 12478c2ecf20Sopenharmony_ci p4d_t *p4d; 12488c2ecf20Sopenharmony_ci pud_t *pud; 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci lvl = "p4d"; 12518c2ecf20Sopenharmony_ci p4d = p4d_alloc(&init_mm, pgd, addr); 12528c2ecf20Sopenharmony_ci if (!p4d) 12538c2ecf20Sopenharmony_ci goto failed; 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci if (pgtable_l5_enabled()) 12568c2ecf20Sopenharmony_ci continue; 12578c2ecf20Sopenharmony_ci 12588c2ecf20Sopenharmony_ci /* 12598c2ecf20Sopenharmony_ci * The goal here is to allocate all possibly required 12608c2ecf20Sopenharmony_ci * hardware page tables pointed to by the top hardware 12618c2ecf20Sopenharmony_ci * level. 12628c2ecf20Sopenharmony_ci * 12638c2ecf20Sopenharmony_ci * On 4-level systems, the P4D layer is folded away and 12648c2ecf20Sopenharmony_ci * the above code does no preallocation. Below, go down 12658c2ecf20Sopenharmony_ci * to the pud _software_ level to ensure the second 12668c2ecf20Sopenharmony_ci * hardware level is allocated on 4-level systems too. 12678c2ecf20Sopenharmony_ci */ 12688c2ecf20Sopenharmony_ci lvl = "pud"; 12698c2ecf20Sopenharmony_ci pud = pud_alloc(&init_mm, p4d, addr); 12708c2ecf20Sopenharmony_ci if (!pud) 12718c2ecf20Sopenharmony_ci goto failed; 12728c2ecf20Sopenharmony_ci } 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci return; 12758c2ecf20Sopenharmony_ci 12768c2ecf20Sopenharmony_cifailed: 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_ci /* 12798c2ecf20Sopenharmony_ci * The pages have to be there now or they will be missing in 12808c2ecf20Sopenharmony_ci * process page-tables later. 12818c2ecf20Sopenharmony_ci */ 12828c2ecf20Sopenharmony_ci panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl); 12838c2ecf20Sopenharmony_ci} 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_civoid __init mem_init(void) 12868c2ecf20Sopenharmony_ci{ 12878c2ecf20Sopenharmony_ci pci_iommu_alloc(); 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ci /* clear_bss() already clear the empty_zero_page */ 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci /* this will put all memory onto the freelists */ 12928c2ecf20Sopenharmony_ci memblock_free_all(); 12938c2ecf20Sopenharmony_ci after_bootmem = 1; 12948c2ecf20Sopenharmony_ci x86_init.hyper.init_after_bootmem(); 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci /* 12978c2ecf20Sopenharmony_ci * Must be done after boot memory is put on freelist, because here we 12988c2ecf20Sopenharmony_ci * might set fields in deferred struct pages that have not yet been 12998c2ecf20Sopenharmony_ci * initialized, and memblock_free_all() initializes all the reserved 13008c2ecf20Sopenharmony_ci * deferred pages for us. 13018c2ecf20Sopenharmony_ci */ 13028c2ecf20Sopenharmony_ci register_page_bootmem_info(); 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci /* Register memory areas for /proc/kcore */ 13058c2ecf20Sopenharmony_ci if (get_gate_vma(&init_mm)) 13068c2ecf20Sopenharmony_ci kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER); 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci preallocate_vmalloc_pages(); 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_ci mem_init_print_info(NULL); 13118c2ecf20Sopenharmony_ci} 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 13148c2ecf20Sopenharmony_ciint __init deferred_page_init_max_threads(const struct cpumask *node_cpumask) 13158c2ecf20Sopenharmony_ci{ 13168c2ecf20Sopenharmony_ci /* 13178c2ecf20Sopenharmony_ci * More CPUs always led to greater speedups on tested systems, up to 13188c2ecf20Sopenharmony_ci * all the nodes' CPUs. Use all since the system is otherwise idle 13198c2ecf20Sopenharmony_ci * now. 13208c2ecf20Sopenharmony_ci */ 13218c2ecf20Sopenharmony_ci return max_t(int, cpumask_weight(node_cpumask), 1); 13228c2ecf20Sopenharmony_ci} 13238c2ecf20Sopenharmony_ci#endif 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ciint kernel_set_to_readonly; 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_civoid mark_rodata_ro(void) 13288c2ecf20Sopenharmony_ci{ 13298c2ecf20Sopenharmony_ci unsigned long start = PFN_ALIGN(_text); 13308c2ecf20Sopenharmony_ci unsigned long rodata_start = PFN_ALIGN(__start_rodata); 13318c2ecf20Sopenharmony_ci unsigned long end = (unsigned long)__end_rodata_hpage_align; 13328c2ecf20Sopenharmony_ci unsigned long text_end = PFN_ALIGN(_etext); 13338c2ecf20Sopenharmony_ci unsigned long rodata_end = PFN_ALIGN(__end_rodata); 13348c2ecf20Sopenharmony_ci unsigned long all_end; 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 13378c2ecf20Sopenharmony_ci (end - start) >> 10); 13388c2ecf20Sopenharmony_ci set_memory_ro(start, (end - start) >> PAGE_SHIFT); 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ci kernel_set_to_readonly = 1; 13418c2ecf20Sopenharmony_ci 13428c2ecf20Sopenharmony_ci /* 13438c2ecf20Sopenharmony_ci * The rodata/data/bss/brk section (but not the kernel text!) 13448c2ecf20Sopenharmony_ci * should also be not-executable. 13458c2ecf20Sopenharmony_ci * 13468c2ecf20Sopenharmony_ci * We align all_end to PMD_SIZE because the existing mapping 13478c2ecf20Sopenharmony_ci * is a full PMD. If we would align _brk_end to PAGE_SIZE we 13488c2ecf20Sopenharmony_ci * split the PMD and the reminder between _brk_end and the end 13498c2ecf20Sopenharmony_ci * of the PMD will remain mapped executable. 13508c2ecf20Sopenharmony_ci * 13518c2ecf20Sopenharmony_ci * Any PMD which was setup after the one which covers _brk_end 13528c2ecf20Sopenharmony_ci * has been zapped already via cleanup_highmem(). 13538c2ecf20Sopenharmony_ci */ 13548c2ecf20Sopenharmony_ci all_end = roundup((unsigned long)_brk_end, PMD_SIZE); 13558c2ecf20Sopenharmony_ci set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci set_ftrace_ops_ro(); 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_ci#ifdef CONFIG_CPA_DEBUG 13608c2ecf20Sopenharmony_ci printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); 13618c2ecf20Sopenharmony_ci set_memory_rw(start, (end-start) >> PAGE_SHIFT); 13628c2ecf20Sopenharmony_ci 13638c2ecf20Sopenharmony_ci printk(KERN_INFO "Testing CPA: again\n"); 13648c2ecf20Sopenharmony_ci set_memory_ro(start, (end-start) >> PAGE_SHIFT); 13658c2ecf20Sopenharmony_ci#endif 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci free_kernel_image_pages("unused kernel image (text/rodata gap)", 13688c2ecf20Sopenharmony_ci (void *)text_end, (void *)rodata_start); 13698c2ecf20Sopenharmony_ci free_kernel_image_pages("unused kernel image (rodata/data gap)", 13708c2ecf20Sopenharmony_ci (void *)rodata_end, (void *)_sdata); 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci debug_checkwx(); 13738c2ecf20Sopenharmony_ci} 13748c2ecf20Sopenharmony_ci 13758c2ecf20Sopenharmony_ciint kern_addr_valid(unsigned long addr) 13768c2ecf20Sopenharmony_ci{ 13778c2ecf20Sopenharmony_ci unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; 13788c2ecf20Sopenharmony_ci pgd_t *pgd; 13798c2ecf20Sopenharmony_ci p4d_t *p4d; 13808c2ecf20Sopenharmony_ci pud_t *pud; 13818c2ecf20Sopenharmony_ci pmd_t *pmd; 13828c2ecf20Sopenharmony_ci pte_t *pte; 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_ci if (above != 0 && above != -1UL) 13858c2ecf20Sopenharmony_ci return 0; 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci pgd = pgd_offset_k(addr); 13888c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) 13898c2ecf20Sopenharmony_ci return 0; 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, addr); 13928c2ecf20Sopenharmony_ci if (!p4d_present(*p4d)) 13938c2ecf20Sopenharmony_ci return 0; 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci pud = pud_offset(p4d, addr); 13968c2ecf20Sopenharmony_ci if (!pud_present(*pud)) 13978c2ecf20Sopenharmony_ci return 0; 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci if (pud_large(*pud)) 14008c2ecf20Sopenharmony_ci return pfn_valid(pud_pfn(*pud)); 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, addr); 14038c2ecf20Sopenharmony_ci if (!pmd_present(*pmd)) 14048c2ecf20Sopenharmony_ci return 0; 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci if (pmd_large(*pmd)) 14078c2ecf20Sopenharmony_ci return pfn_valid(pmd_pfn(*pmd)); 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci pte = pte_offset_kernel(pmd, addr); 14108c2ecf20Sopenharmony_ci if (pte_none(*pte)) 14118c2ecf20Sopenharmony_ci return 0; 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci return pfn_valid(pte_pfn(*pte)); 14148c2ecf20Sopenharmony_ci} 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci/* 14178c2ecf20Sopenharmony_ci * Block size is the minimum amount of memory which can be hotplugged or 14188c2ecf20Sopenharmony_ci * hotremoved. It must be power of two and must be equal or larger than 14198c2ecf20Sopenharmony_ci * MIN_MEMORY_BLOCK_SIZE. 14208c2ecf20Sopenharmony_ci */ 14218c2ecf20Sopenharmony_ci#define MAX_BLOCK_SIZE (2UL << 30) 14228c2ecf20Sopenharmony_ci 14238c2ecf20Sopenharmony_ci/* Amount of ram needed to start using large blocks */ 14248c2ecf20Sopenharmony_ci#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci/* Adjustable memory block size */ 14278c2ecf20Sopenharmony_cistatic unsigned long set_memory_block_size; 14288c2ecf20Sopenharmony_ciint __init set_memory_block_size_order(unsigned int order) 14298c2ecf20Sopenharmony_ci{ 14308c2ecf20Sopenharmony_ci unsigned long size = 1UL << order; 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE) 14338c2ecf20Sopenharmony_ci return -EINVAL; 14348c2ecf20Sopenharmony_ci 14358c2ecf20Sopenharmony_ci set_memory_block_size = size; 14368c2ecf20Sopenharmony_ci return 0; 14378c2ecf20Sopenharmony_ci} 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_cistatic unsigned long probe_memory_block_size(void) 14408c2ecf20Sopenharmony_ci{ 14418c2ecf20Sopenharmony_ci unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; 14428c2ecf20Sopenharmony_ci unsigned long bz; 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci /* If memory block size has been set, then use it */ 14458c2ecf20Sopenharmony_ci bz = set_memory_block_size; 14468c2ecf20Sopenharmony_ci if (bz) 14478c2ecf20Sopenharmony_ci goto done; 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ 14508c2ecf20Sopenharmony_ci if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { 14518c2ecf20Sopenharmony_ci bz = MIN_MEMORY_BLOCK_SIZE; 14528c2ecf20Sopenharmony_ci goto done; 14538c2ecf20Sopenharmony_ci } 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci /* 14568c2ecf20Sopenharmony_ci * Use max block size to minimize overhead on bare metal, where 14578c2ecf20Sopenharmony_ci * alignment for memory hotplug isn't a concern. 14588c2ecf20Sopenharmony_ci */ 14598c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { 14608c2ecf20Sopenharmony_ci bz = MAX_BLOCK_SIZE; 14618c2ecf20Sopenharmony_ci goto done; 14628c2ecf20Sopenharmony_ci } 14638c2ecf20Sopenharmony_ci 14648c2ecf20Sopenharmony_ci /* Find the largest allowed block size that aligns to memory end */ 14658c2ecf20Sopenharmony_ci for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { 14668c2ecf20Sopenharmony_ci if (IS_ALIGNED(boot_mem_end, bz)) 14678c2ecf20Sopenharmony_ci break; 14688c2ecf20Sopenharmony_ci } 14698c2ecf20Sopenharmony_cidone: 14708c2ecf20Sopenharmony_ci pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci return bz; 14738c2ecf20Sopenharmony_ci} 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_cistatic unsigned long memory_block_size_probed; 14768c2ecf20Sopenharmony_ciunsigned long memory_block_size_bytes(void) 14778c2ecf20Sopenharmony_ci{ 14788c2ecf20Sopenharmony_ci if (!memory_block_size_probed) 14798c2ecf20Sopenharmony_ci memory_block_size_probed = probe_memory_block_size(); 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci return memory_block_size_probed; 14828c2ecf20Sopenharmony_ci} 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 14858c2ecf20Sopenharmony_ci/* 14868c2ecf20Sopenharmony_ci * Initialise the sparsemem vmemmap using huge-pages at the PMD level. 14878c2ecf20Sopenharmony_ci */ 14888c2ecf20Sopenharmony_cistatic long __meminitdata addr_start, addr_end; 14898c2ecf20Sopenharmony_cistatic void __meminitdata *p_start, *p_end; 14908c2ecf20Sopenharmony_cistatic int __meminitdata node_start; 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_cistatic int __meminit vmemmap_populate_hugepages(unsigned long start, 14938c2ecf20Sopenharmony_ci unsigned long end, int node, struct vmem_altmap *altmap) 14948c2ecf20Sopenharmony_ci{ 14958c2ecf20Sopenharmony_ci unsigned long addr; 14968c2ecf20Sopenharmony_ci unsigned long next; 14978c2ecf20Sopenharmony_ci pgd_t *pgd; 14988c2ecf20Sopenharmony_ci p4d_t *p4d; 14998c2ecf20Sopenharmony_ci pud_t *pud; 15008c2ecf20Sopenharmony_ci pmd_t *pmd; 15018c2ecf20Sopenharmony_ci 15028c2ecf20Sopenharmony_ci for (addr = start; addr < end; addr = next) { 15038c2ecf20Sopenharmony_ci next = pmd_addr_end(addr, end); 15048c2ecf20Sopenharmony_ci 15058c2ecf20Sopenharmony_ci pgd = vmemmap_pgd_populate(addr, node); 15068c2ecf20Sopenharmony_ci if (!pgd) 15078c2ecf20Sopenharmony_ci return -ENOMEM; 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci p4d = vmemmap_p4d_populate(pgd, addr, node); 15108c2ecf20Sopenharmony_ci if (!p4d) 15118c2ecf20Sopenharmony_ci return -ENOMEM; 15128c2ecf20Sopenharmony_ci 15138c2ecf20Sopenharmony_ci pud = vmemmap_pud_populate(p4d, addr, node); 15148c2ecf20Sopenharmony_ci if (!pud) 15158c2ecf20Sopenharmony_ci return -ENOMEM; 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, addr); 15188c2ecf20Sopenharmony_ci if (pmd_none(*pmd)) { 15198c2ecf20Sopenharmony_ci void *p; 15208c2ecf20Sopenharmony_ci 15218c2ecf20Sopenharmony_ci p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); 15228c2ecf20Sopenharmony_ci if (p) { 15238c2ecf20Sopenharmony_ci pte_t entry; 15248c2ecf20Sopenharmony_ci 15258c2ecf20Sopenharmony_ci entry = pfn_pte(__pa(p) >> PAGE_SHIFT, 15268c2ecf20Sopenharmony_ci PAGE_KERNEL_LARGE); 15278c2ecf20Sopenharmony_ci set_pmd(pmd, __pmd(pte_val(entry))); 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_ci /* check to see if we have contiguous blocks */ 15308c2ecf20Sopenharmony_ci if (p_end != p || node_start != node) { 15318c2ecf20Sopenharmony_ci if (p_start) 15328c2ecf20Sopenharmony_ci pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", 15338c2ecf20Sopenharmony_ci addr_start, addr_end-1, p_start, p_end-1, node_start); 15348c2ecf20Sopenharmony_ci addr_start = addr; 15358c2ecf20Sopenharmony_ci node_start = node; 15368c2ecf20Sopenharmony_ci p_start = p; 15378c2ecf20Sopenharmony_ci } 15388c2ecf20Sopenharmony_ci 15398c2ecf20Sopenharmony_ci addr_end = addr + PMD_SIZE; 15408c2ecf20Sopenharmony_ci p_end = p + PMD_SIZE; 15418c2ecf20Sopenharmony_ci continue; 15428c2ecf20Sopenharmony_ci } else if (altmap) 15438c2ecf20Sopenharmony_ci return -ENOMEM; /* no fallback */ 15448c2ecf20Sopenharmony_ci } else if (pmd_large(*pmd)) { 15458c2ecf20Sopenharmony_ci vmemmap_verify((pte_t *)pmd, node, addr, next); 15468c2ecf20Sopenharmony_ci continue; 15478c2ecf20Sopenharmony_ci } 15488c2ecf20Sopenharmony_ci if (vmemmap_populate_basepages(addr, next, node, NULL)) 15498c2ecf20Sopenharmony_ci return -ENOMEM; 15508c2ecf20Sopenharmony_ci } 15518c2ecf20Sopenharmony_ci return 0; 15528c2ecf20Sopenharmony_ci} 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 15558c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 15568c2ecf20Sopenharmony_ci{ 15578c2ecf20Sopenharmony_ci int err; 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci if (end - start < PAGES_PER_SECTION * sizeof(struct page)) 15608c2ecf20Sopenharmony_ci err = vmemmap_populate_basepages(start, end, node, NULL); 15618c2ecf20Sopenharmony_ci else if (boot_cpu_has(X86_FEATURE_PSE)) 15628c2ecf20Sopenharmony_ci err = vmemmap_populate_hugepages(start, end, node, altmap); 15638c2ecf20Sopenharmony_ci else if (altmap) { 15648c2ecf20Sopenharmony_ci pr_err_once("%s: no cpu support for altmap allocations\n", 15658c2ecf20Sopenharmony_ci __func__); 15668c2ecf20Sopenharmony_ci err = -ENOMEM; 15678c2ecf20Sopenharmony_ci } else 15688c2ecf20Sopenharmony_ci err = vmemmap_populate_basepages(start, end, node, NULL); 15698c2ecf20Sopenharmony_ci if (!err) 15708c2ecf20Sopenharmony_ci sync_global_pgds(start, end - 1); 15718c2ecf20Sopenharmony_ci return err; 15728c2ecf20Sopenharmony_ci} 15738c2ecf20Sopenharmony_ci 15748c2ecf20Sopenharmony_ci#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) 15758c2ecf20Sopenharmony_civoid register_page_bootmem_memmap(unsigned long section_nr, 15768c2ecf20Sopenharmony_ci struct page *start_page, unsigned long nr_pages) 15778c2ecf20Sopenharmony_ci{ 15788c2ecf20Sopenharmony_ci unsigned long addr = (unsigned long)start_page; 15798c2ecf20Sopenharmony_ci unsigned long end = (unsigned long)(start_page + nr_pages); 15808c2ecf20Sopenharmony_ci unsigned long next; 15818c2ecf20Sopenharmony_ci pgd_t *pgd; 15828c2ecf20Sopenharmony_ci p4d_t *p4d; 15838c2ecf20Sopenharmony_ci pud_t *pud; 15848c2ecf20Sopenharmony_ci pmd_t *pmd; 15858c2ecf20Sopenharmony_ci unsigned int nr_pmd_pages; 15868c2ecf20Sopenharmony_ci struct page *page; 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_ci for (; addr < end; addr = next) { 15898c2ecf20Sopenharmony_ci pte_t *pte = NULL; 15908c2ecf20Sopenharmony_ci 15918c2ecf20Sopenharmony_ci pgd = pgd_offset_k(addr); 15928c2ecf20Sopenharmony_ci if (pgd_none(*pgd)) { 15938c2ecf20Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 15948c2ecf20Sopenharmony_ci continue; 15958c2ecf20Sopenharmony_ci } 15968c2ecf20Sopenharmony_ci get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); 15978c2ecf20Sopenharmony_ci 15988c2ecf20Sopenharmony_ci p4d = p4d_offset(pgd, addr); 15998c2ecf20Sopenharmony_ci if (p4d_none(*p4d)) { 16008c2ecf20Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 16018c2ecf20Sopenharmony_ci continue; 16028c2ecf20Sopenharmony_ci } 16038c2ecf20Sopenharmony_ci get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO); 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci pud = pud_offset(p4d, addr); 16068c2ecf20Sopenharmony_ci if (pud_none(*pud)) { 16078c2ecf20Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 16088c2ecf20Sopenharmony_ci continue; 16098c2ecf20Sopenharmony_ci } 16108c2ecf20Sopenharmony_ci get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_PSE)) { 16138c2ecf20Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 16148c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, addr); 16158c2ecf20Sopenharmony_ci if (pmd_none(*pmd)) 16168c2ecf20Sopenharmony_ci continue; 16178c2ecf20Sopenharmony_ci get_page_bootmem(section_nr, pmd_page(*pmd), 16188c2ecf20Sopenharmony_ci MIX_SECTION_INFO); 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci pte = pte_offset_kernel(pmd, addr); 16218c2ecf20Sopenharmony_ci if (pte_none(*pte)) 16228c2ecf20Sopenharmony_ci continue; 16238c2ecf20Sopenharmony_ci get_page_bootmem(section_nr, pte_page(*pte), 16248c2ecf20Sopenharmony_ci SECTION_INFO); 16258c2ecf20Sopenharmony_ci } else { 16268c2ecf20Sopenharmony_ci next = pmd_addr_end(addr, end); 16278c2ecf20Sopenharmony_ci 16288c2ecf20Sopenharmony_ci pmd = pmd_offset(pud, addr); 16298c2ecf20Sopenharmony_ci if (pmd_none(*pmd)) 16308c2ecf20Sopenharmony_ci continue; 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci nr_pmd_pages = 1 << get_order(PMD_SIZE); 16338c2ecf20Sopenharmony_ci page = pmd_page(*pmd); 16348c2ecf20Sopenharmony_ci while (nr_pmd_pages--) 16358c2ecf20Sopenharmony_ci get_page_bootmem(section_nr, page++, 16368c2ecf20Sopenharmony_ci SECTION_INFO); 16378c2ecf20Sopenharmony_ci } 16388c2ecf20Sopenharmony_ci } 16398c2ecf20Sopenharmony_ci} 16408c2ecf20Sopenharmony_ci#endif 16418c2ecf20Sopenharmony_ci 16428c2ecf20Sopenharmony_civoid __meminit vmemmap_populate_print_last(void) 16438c2ecf20Sopenharmony_ci{ 16448c2ecf20Sopenharmony_ci if (p_start) { 16458c2ecf20Sopenharmony_ci pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", 16468c2ecf20Sopenharmony_ci addr_start, addr_end-1, p_start, p_end-1, node_start); 16478c2ecf20Sopenharmony_ci p_start = NULL; 16488c2ecf20Sopenharmony_ci p_end = NULL; 16498c2ecf20Sopenharmony_ci node_start = 0; 16508c2ecf20Sopenharmony_ci } 16518c2ecf20Sopenharmony_ci} 16528c2ecf20Sopenharmony_ci#endif 1653