162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/arch/x86_64/mm/init.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1995 Linus Torvalds 662306a36Sopenharmony_ci * Copyright (C) 2000 Pavel Machek <pavel@ucw.cz> 762306a36Sopenharmony_ci * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/signal.h> 1162306a36Sopenharmony_ci#include <linux/sched.h> 1262306a36Sopenharmony_ci#include <linux/kernel.h> 1362306a36Sopenharmony_ci#include <linux/errno.h> 1462306a36Sopenharmony_ci#include <linux/string.h> 1562306a36Sopenharmony_ci#include <linux/types.h> 1662306a36Sopenharmony_ci#include <linux/ptrace.h> 1762306a36Sopenharmony_ci#include <linux/mman.h> 1862306a36Sopenharmony_ci#include <linux/mm.h> 1962306a36Sopenharmony_ci#include <linux/swap.h> 2062306a36Sopenharmony_ci#include <linux/smp.h> 2162306a36Sopenharmony_ci#include <linux/init.h> 2262306a36Sopenharmony_ci#include <linux/initrd.h> 2362306a36Sopenharmony_ci#include <linux/pagemap.h> 2462306a36Sopenharmony_ci#include <linux/memblock.h> 2562306a36Sopenharmony_ci#include <linux/proc_fs.h> 2662306a36Sopenharmony_ci#include <linux/pci.h> 2762306a36Sopenharmony_ci#include <linux/pfn.h> 2862306a36Sopenharmony_ci#include <linux/poison.h> 2962306a36Sopenharmony_ci#include <linux/dma-mapping.h> 3062306a36Sopenharmony_ci#include <linux/memory.h> 3162306a36Sopenharmony_ci#include <linux/memory_hotplug.h> 3262306a36Sopenharmony_ci#include <linux/memremap.h> 3362306a36Sopenharmony_ci#include <linux/nmi.h> 3462306a36Sopenharmony_ci#include <linux/gfp.h> 3562306a36Sopenharmony_ci#include <linux/kcore.h> 3662306a36Sopenharmony_ci#include <linux/bootmem_info.h> 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#include <asm/processor.h> 3962306a36Sopenharmony_ci#include <asm/bios_ebda.h> 4062306a36Sopenharmony_ci#include <linux/uaccess.h> 4162306a36Sopenharmony_ci#include <asm/pgalloc.h> 4262306a36Sopenharmony_ci#include <asm/dma.h> 4362306a36Sopenharmony_ci#include <asm/fixmap.h> 4462306a36Sopenharmony_ci#include <asm/e820/api.h> 4562306a36Sopenharmony_ci#include <asm/apic.h> 4662306a36Sopenharmony_ci#include <asm/tlb.h> 4762306a36Sopenharmony_ci#include <asm/mmu_context.h> 4862306a36Sopenharmony_ci#include <asm/proto.h> 4962306a36Sopenharmony_ci#include <asm/smp.h> 5062306a36Sopenharmony_ci#include <asm/sections.h> 5162306a36Sopenharmony_ci#include <asm/kdebug.h> 5262306a36Sopenharmony_ci#include <asm/numa.h> 5362306a36Sopenharmony_ci#include <asm/set_memory.h> 5462306a36Sopenharmony_ci#include <asm/init.h> 5562306a36Sopenharmony_ci#include <asm/uv/uv.h> 5662306a36Sopenharmony_ci#include <asm/setup.h> 5762306a36Sopenharmony_ci#include <asm/ftrace.h> 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci#include "mm_internal.h" 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#include "ident_map.c" 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#define DEFINE_POPULATE(fname, type1, type2, init) \ 6462306a36Sopenharmony_cistatic inline void fname##_init(struct mm_struct *mm, \ 6562306a36Sopenharmony_ci type1##_t *arg1, type2##_t *arg2, bool init) \ 6662306a36Sopenharmony_ci{ \ 6762306a36Sopenharmony_ci if (init) \ 6862306a36Sopenharmony_ci fname##_safe(mm, arg1, arg2); \ 6962306a36Sopenharmony_ci else \ 7062306a36Sopenharmony_ci fname(mm, arg1, arg2); \ 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ciDEFINE_POPULATE(p4d_populate, p4d, pud, init) 7462306a36Sopenharmony_ciDEFINE_POPULATE(pgd_populate, pgd, p4d, init) 7562306a36Sopenharmony_ciDEFINE_POPULATE(pud_populate, pud, pmd, init) 7662306a36Sopenharmony_ciDEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init) 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci#define DEFINE_ENTRY(type1, type2, init) \ 7962306a36Sopenharmony_cistatic inline void set_##type1##_init(type1##_t *arg1, \ 8062306a36Sopenharmony_ci type2##_t arg2, bool init) \ 8162306a36Sopenharmony_ci{ \ 8262306a36Sopenharmony_ci if (init) \ 8362306a36Sopenharmony_ci set_##type1##_safe(arg1, arg2); \ 8462306a36Sopenharmony_ci else \ 8562306a36Sopenharmony_ci set_##type1(arg1, arg2); \ 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ciDEFINE_ENTRY(p4d, p4d, init) 8962306a36Sopenharmony_ciDEFINE_ENTRY(pud, pud, init) 9062306a36Sopenharmony_ciDEFINE_ENTRY(pmd, pmd, init) 9162306a36Sopenharmony_ciDEFINE_ENTRY(pte, pte, init) 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_cistatic inline pgprot_t prot_sethuge(pgprot_t prot) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci return __pgprot(pgprot_val(prot) | _PAGE_PSE); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci/* 10162306a36Sopenharmony_ci * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 10262306a36Sopenharmony_ci * physical space so we can cache the place of the first one and move 10362306a36Sopenharmony_ci * around without checking the pgd every time. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci/* Bits supported by the hardware: */ 10762306a36Sopenharmony_cipteval_t __supported_pte_mask __read_mostly = ~0; 10862306a36Sopenharmony_ci/* Bits allowed in normal kernel mappings: */ 10962306a36Sopenharmony_cipteval_t __default_kernel_pte_mask __read_mostly = ~0; 11062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__supported_pte_mask); 11162306a36Sopenharmony_ci/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ 11262306a36Sopenharmony_ciEXPORT_SYMBOL(__default_kernel_pte_mask); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ciint force_personality32; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci/* 11762306a36Sopenharmony_ci * noexec32=on|off 11862306a36Sopenharmony_ci * Control non executable heap for 32bit processes. 11962306a36Sopenharmony_ci * 12062306a36Sopenharmony_ci * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) 12162306a36Sopenharmony_ci * off PROT_READ implies PROT_EXEC 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_cistatic int __init nonx32_setup(char *str) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci if (!strcmp(str, "on")) 12662306a36Sopenharmony_ci force_personality32 &= ~READ_IMPLIES_EXEC; 12762306a36Sopenharmony_ci else if (!strcmp(str, "off")) 12862306a36Sopenharmony_ci force_personality32 |= READ_IMPLIES_EXEC; 12962306a36Sopenharmony_ci return 1; 13062306a36Sopenharmony_ci} 13162306a36Sopenharmony_ci__setup("noexec32=", nonx32_setup); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic void sync_global_pgds_l5(unsigned long start, unsigned long end) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci unsigned long addr; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 13862306a36Sopenharmony_ci const pgd_t *pgd_ref = pgd_offset_k(addr); 13962306a36Sopenharmony_ci struct page *page; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci /* Check for overflow */ 14262306a36Sopenharmony_ci if (addr < start) 14362306a36Sopenharmony_ci break; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci if (pgd_none(*pgd_ref)) 14662306a36Sopenharmony_ci continue; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci spin_lock(&pgd_lock); 14962306a36Sopenharmony_ci list_for_each_entry(page, &pgd_list, lru) { 15062306a36Sopenharmony_ci pgd_t *pgd; 15162306a36Sopenharmony_ci spinlock_t *pgt_lock; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci pgd = (pgd_t *)page_address(page) + pgd_index(addr); 15462306a36Sopenharmony_ci /* the pgt_lock only for Xen */ 15562306a36Sopenharmony_ci pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 15662306a36Sopenharmony_ci spin_lock(pgt_lock); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci if (!pgd_none(*pgd_ref) && !pgd_none(*pgd)) 15962306a36Sopenharmony_ci BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci if (pgd_none(*pgd)) 16262306a36Sopenharmony_ci set_pgd(pgd, *pgd_ref); 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci spin_unlock(pgt_lock); 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci spin_unlock(&pgd_lock); 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cistatic void sync_global_pgds_l4(unsigned long start, unsigned long end) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci unsigned long addr; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 17562306a36Sopenharmony_ci pgd_t *pgd_ref = pgd_offset_k(addr); 17662306a36Sopenharmony_ci const p4d_t *p4d_ref; 17762306a36Sopenharmony_ci struct page *page; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci /* 18062306a36Sopenharmony_ci * With folded p4d, pgd_none() is always false, we need to 18162306a36Sopenharmony_ci * handle synchronization on p4d level. 18262306a36Sopenharmony_ci */ 18362306a36Sopenharmony_ci MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref)); 18462306a36Sopenharmony_ci p4d_ref = p4d_offset(pgd_ref, addr); 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci if (p4d_none(*p4d_ref)) 18762306a36Sopenharmony_ci continue; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci spin_lock(&pgd_lock); 19062306a36Sopenharmony_ci list_for_each_entry(page, &pgd_list, lru) { 19162306a36Sopenharmony_ci pgd_t *pgd; 19262306a36Sopenharmony_ci p4d_t *p4d; 19362306a36Sopenharmony_ci spinlock_t *pgt_lock; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci pgd = (pgd_t *)page_address(page) + pgd_index(addr); 19662306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 19762306a36Sopenharmony_ci /* the pgt_lock only for Xen */ 19862306a36Sopenharmony_ci pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 19962306a36Sopenharmony_ci spin_lock(pgt_lock); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci if (!p4d_none(*p4d_ref) && !p4d_none(*p4d)) 20262306a36Sopenharmony_ci BUG_ON(p4d_pgtable(*p4d) 20362306a36Sopenharmony_ci != p4d_pgtable(*p4d_ref)); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci if (p4d_none(*p4d)) 20662306a36Sopenharmony_ci set_p4d(p4d, *p4d_ref); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci spin_unlock(pgt_lock); 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci spin_unlock(&pgd_lock); 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci} 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci/* 21562306a36Sopenharmony_ci * When memory was added make sure all the processes MM have 21662306a36Sopenharmony_ci * suitable PGD entries in the local PGD level page. 21762306a36Sopenharmony_ci */ 21862306a36Sopenharmony_cistatic void sync_global_pgds(unsigned long start, unsigned long end) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci if (pgtable_l5_enabled()) 22162306a36Sopenharmony_ci sync_global_pgds_l5(start, end); 22262306a36Sopenharmony_ci else 22362306a36Sopenharmony_ci sync_global_pgds_l4(start, end); 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci/* 22762306a36Sopenharmony_ci * NOTE: This function is marked __ref because it calls __init function 22862306a36Sopenharmony_ci * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 22962306a36Sopenharmony_ci */ 23062306a36Sopenharmony_cistatic __ref void *spp_getpage(void) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci void *ptr; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci if (after_bootmem) 23562306a36Sopenharmony_ci ptr = (void *) get_zeroed_page(GFP_ATOMIC); 23662306a36Sopenharmony_ci else 23762306a36Sopenharmony_ci ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { 24062306a36Sopenharmony_ci panic("set_pte_phys: cannot allocate page data %s\n", 24162306a36Sopenharmony_ci after_bootmem ? "after bootmem" : ""); 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci pr_debug("spp_getpage %p\n", ptr); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci return ptr; 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_cistatic p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci if (pgd_none(*pgd)) { 25262306a36Sopenharmony_ci p4d_t *p4d = (p4d_t *)spp_getpage(); 25362306a36Sopenharmony_ci pgd_populate(&init_mm, pgd, p4d); 25462306a36Sopenharmony_ci if (p4d != p4d_offset(pgd, 0)) 25562306a36Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", 25662306a36Sopenharmony_ci p4d, p4d_offset(pgd, 0)); 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci return p4d_offset(pgd, vaddr); 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_cistatic pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr) 26262306a36Sopenharmony_ci{ 26362306a36Sopenharmony_ci if (p4d_none(*p4d)) { 26462306a36Sopenharmony_ci pud_t *pud = (pud_t *)spp_getpage(); 26562306a36Sopenharmony_ci p4d_populate(&init_mm, p4d, pud); 26662306a36Sopenharmony_ci if (pud != pud_offset(p4d, 0)) 26762306a36Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", 26862306a36Sopenharmony_ci pud, pud_offset(p4d, 0)); 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci return pud_offset(p4d, vaddr); 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci if (pud_none(*pud)) { 27662306a36Sopenharmony_ci pmd_t *pmd = (pmd_t *) spp_getpage(); 27762306a36Sopenharmony_ci pud_populate(&init_mm, pud, pmd); 27862306a36Sopenharmony_ci if (pmd != pmd_offset(pud, 0)) 27962306a36Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n", 28062306a36Sopenharmony_ci pmd, pmd_offset(pud, 0)); 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci return pmd_offset(pud, vaddr); 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_cistatic pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci if (pmd_none(*pmd)) { 28862306a36Sopenharmony_ci pte_t *pte = (pte_t *) spp_getpage(); 28962306a36Sopenharmony_ci pmd_populate_kernel(&init_mm, pmd, pte); 29062306a36Sopenharmony_ci if (pte != pte_offset_kernel(pmd, 0)) 29162306a36Sopenharmony_ci printk(KERN_ERR "PAGETABLE BUG #03!\n"); 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci return pte_offset_kernel(pmd, vaddr); 29462306a36Sopenharmony_ci} 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_cistatic void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci pmd_t *pmd = fill_pmd(pud, vaddr); 29962306a36Sopenharmony_ci pte_t *pte = fill_pte(pmd, vaddr); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci set_pte(pte, new_pte); 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci /* 30462306a36Sopenharmony_ci * It's enough to flush this one mapping. 30562306a36Sopenharmony_ci * (PGE mappings get flushed as well) 30662306a36Sopenharmony_ci */ 30762306a36Sopenharmony_ci flush_tlb_one_kernel(vaddr); 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_civoid set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci p4d_t *p4d = p4d_page + p4d_index(vaddr); 31362306a36Sopenharmony_ci pud_t *pud = fill_pud(p4d, vaddr); 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci __set_pte_vaddr(pud, vaddr, new_pte); 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_civoid set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) 31962306a36Sopenharmony_ci{ 32062306a36Sopenharmony_ci pud_t *pud = pud_page + pud_index(vaddr); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci __set_pte_vaddr(pud, vaddr, new_pte); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_civoid set_pte_vaddr(unsigned long vaddr, pte_t pteval) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci pgd_t *pgd; 32862306a36Sopenharmony_ci p4d_t *p4d_page; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval)); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci pgd = pgd_offset_k(vaddr); 33362306a36Sopenharmony_ci if (pgd_none(*pgd)) { 33462306a36Sopenharmony_ci printk(KERN_ERR 33562306a36Sopenharmony_ci "PGD FIXMAP MISSING, it should be setup in head.S!\n"); 33662306a36Sopenharmony_ci return; 33762306a36Sopenharmony_ci } 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci p4d_page = p4d_offset(pgd, 0); 34062306a36Sopenharmony_ci set_pte_vaddr_p4d(p4d_page, vaddr, pteval); 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cipmd_t * __init populate_extra_pmd(unsigned long vaddr) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci pgd_t *pgd; 34662306a36Sopenharmony_ci p4d_t *p4d; 34762306a36Sopenharmony_ci pud_t *pud; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci pgd = pgd_offset_k(vaddr); 35062306a36Sopenharmony_ci p4d = fill_p4d(pgd, vaddr); 35162306a36Sopenharmony_ci pud = fill_pud(p4d, vaddr); 35262306a36Sopenharmony_ci return fill_pmd(pud, vaddr); 35362306a36Sopenharmony_ci} 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_cipte_t * __init populate_extra_pte(unsigned long vaddr) 35662306a36Sopenharmony_ci{ 35762306a36Sopenharmony_ci pmd_t *pmd; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci pmd = populate_extra_pmd(vaddr); 36062306a36Sopenharmony_ci return fill_pte(pmd, vaddr); 36162306a36Sopenharmony_ci} 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci/* 36462306a36Sopenharmony_ci * Create large page table mappings for a range of physical addresses. 36562306a36Sopenharmony_ci */ 36662306a36Sopenharmony_cistatic void __init __init_extra_mapping(unsigned long phys, unsigned long size, 36762306a36Sopenharmony_ci enum page_cache_mode cache) 36862306a36Sopenharmony_ci{ 36962306a36Sopenharmony_ci pgd_t *pgd; 37062306a36Sopenharmony_ci p4d_t *p4d; 37162306a36Sopenharmony_ci pud_t *pud; 37262306a36Sopenharmony_ci pmd_t *pmd; 37362306a36Sopenharmony_ci pgprot_t prot; 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci pgprot_val(prot) = pgprot_val(PAGE_KERNEL_LARGE) | 37662306a36Sopenharmony_ci protval_4k_2_large(cachemode2protval(cache)); 37762306a36Sopenharmony_ci BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); 37862306a36Sopenharmony_ci for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { 37962306a36Sopenharmony_ci pgd = pgd_offset_k((unsigned long)__va(phys)); 38062306a36Sopenharmony_ci if (pgd_none(*pgd)) { 38162306a36Sopenharmony_ci p4d = (p4d_t *) spp_getpage(); 38262306a36Sopenharmony_ci set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE | 38362306a36Sopenharmony_ci _PAGE_USER)); 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci p4d = p4d_offset(pgd, (unsigned long)__va(phys)); 38662306a36Sopenharmony_ci if (p4d_none(*p4d)) { 38762306a36Sopenharmony_ci pud = (pud_t *) spp_getpage(); 38862306a36Sopenharmony_ci set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE | 38962306a36Sopenharmony_ci _PAGE_USER)); 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci pud = pud_offset(p4d, (unsigned long)__va(phys)); 39262306a36Sopenharmony_ci if (pud_none(*pud)) { 39362306a36Sopenharmony_ci pmd = (pmd_t *) spp_getpage(); 39462306a36Sopenharmony_ci set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | 39562306a36Sopenharmony_ci _PAGE_USER)); 39662306a36Sopenharmony_ci } 39762306a36Sopenharmony_ci pmd = pmd_offset(pud, phys); 39862306a36Sopenharmony_ci BUG_ON(!pmd_none(*pmd)); 39962306a36Sopenharmony_ci set_pmd(pmd, __pmd(phys | pgprot_val(prot))); 40062306a36Sopenharmony_ci } 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_civoid __init init_extra_mapping_wb(unsigned long phys, unsigned long size) 40462306a36Sopenharmony_ci{ 40562306a36Sopenharmony_ci __init_extra_mapping(phys, size, _PAGE_CACHE_MODE_WB); 40662306a36Sopenharmony_ci} 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_civoid __init init_extra_mapping_uc(unsigned long phys, unsigned long size) 40962306a36Sopenharmony_ci{ 41062306a36Sopenharmony_ci __init_extra_mapping(phys, size, _PAGE_CACHE_MODE_UC); 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci/* 41462306a36Sopenharmony_ci * The head.S code sets up the kernel high mapping: 41562306a36Sopenharmony_ci * 41662306a36Sopenharmony_ci * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) 41762306a36Sopenharmony_ci * 41862306a36Sopenharmony_ci * phys_base holds the negative offset to the kernel, which is added 41962306a36Sopenharmony_ci * to the compile time generated pmds. This results in invalid pmds up 42062306a36Sopenharmony_ci * to the point where we hit the physaddr 0 mapping. 42162306a36Sopenharmony_ci * 42262306a36Sopenharmony_ci * We limit the mappings to the region from _text to _brk_end. _brk_end 42362306a36Sopenharmony_ci * is rounded up to the 2MB boundary. This catches the invalid pmds as 42462306a36Sopenharmony_ci * well, as they are located before _text: 42562306a36Sopenharmony_ci */ 42662306a36Sopenharmony_civoid __init cleanup_highmap(void) 42762306a36Sopenharmony_ci{ 42862306a36Sopenharmony_ci unsigned long vaddr = __START_KERNEL_map; 42962306a36Sopenharmony_ci unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE; 43062306a36Sopenharmony_ci unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; 43162306a36Sopenharmony_ci pmd_t *pmd = level2_kernel_pgt; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci /* 43462306a36Sopenharmony_ci * Native path, max_pfn_mapped is not set yet. 43562306a36Sopenharmony_ci * Xen has valid max_pfn_mapped set in 43662306a36Sopenharmony_ci * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable(). 43762306a36Sopenharmony_ci */ 43862306a36Sopenharmony_ci if (max_pfn_mapped) 43962306a36Sopenharmony_ci vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { 44262306a36Sopenharmony_ci if (pmd_none(*pmd)) 44362306a36Sopenharmony_ci continue; 44462306a36Sopenharmony_ci if (vaddr < (unsigned long) _text || vaddr > end) 44562306a36Sopenharmony_ci set_pmd(pmd, __pmd(0)); 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci} 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci/* 45062306a36Sopenharmony_ci * Create PTE level page table mapping for physical addresses. 45162306a36Sopenharmony_ci * It returns the last physical address mapped. 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_cistatic unsigned long __meminit 45462306a36Sopenharmony_ciphys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, 45562306a36Sopenharmony_ci pgprot_t prot, bool init) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci unsigned long pages = 0, paddr_next; 45862306a36Sopenharmony_ci unsigned long paddr_last = paddr_end; 45962306a36Sopenharmony_ci pte_t *pte; 46062306a36Sopenharmony_ci int i; 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci pte = pte_page + pte_index(paddr); 46362306a36Sopenharmony_ci i = pte_index(paddr); 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) { 46662306a36Sopenharmony_ci paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE; 46762306a36Sopenharmony_ci if (paddr >= paddr_end) { 46862306a36Sopenharmony_ci if (!after_bootmem && 46962306a36Sopenharmony_ci !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 47062306a36Sopenharmony_ci E820_TYPE_RAM) && 47162306a36Sopenharmony_ci !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 47262306a36Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 47362306a36Sopenharmony_ci set_pte_init(pte, __pte(0), init); 47462306a36Sopenharmony_ci continue; 47562306a36Sopenharmony_ci } 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci /* 47862306a36Sopenharmony_ci * We will re-use the existing mapping. 47962306a36Sopenharmony_ci * Xen for example has some special requirements, like mapping 48062306a36Sopenharmony_ci * pagetable pages as RO. So assume someone who pre-setup 48162306a36Sopenharmony_ci * these mappings are more intelligent. 48262306a36Sopenharmony_ci */ 48362306a36Sopenharmony_ci if (!pte_none(*pte)) { 48462306a36Sopenharmony_ci if (!after_bootmem) 48562306a36Sopenharmony_ci pages++; 48662306a36Sopenharmony_ci continue; 48762306a36Sopenharmony_ci } 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci if (0) 49062306a36Sopenharmony_ci pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, 49162306a36Sopenharmony_ci pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); 49262306a36Sopenharmony_ci pages++; 49362306a36Sopenharmony_ci set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); 49462306a36Sopenharmony_ci paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; 49562306a36Sopenharmony_ci } 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci update_page_count(PG_LEVEL_4K, pages); 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci return paddr_last; 50062306a36Sopenharmony_ci} 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci/* 50362306a36Sopenharmony_ci * Create PMD level page table mapping for physical addresses. The virtual 50462306a36Sopenharmony_ci * and physical address have to be aligned at this level. 50562306a36Sopenharmony_ci * It returns the last physical address mapped. 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_cistatic unsigned long __meminit 50862306a36Sopenharmony_ciphys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, 50962306a36Sopenharmony_ci unsigned long page_size_mask, pgprot_t prot, bool init) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci unsigned long pages = 0, paddr_next; 51262306a36Sopenharmony_ci unsigned long paddr_last = paddr_end; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci int i = pmd_index(paddr); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) { 51762306a36Sopenharmony_ci pmd_t *pmd = pmd_page + pmd_index(paddr); 51862306a36Sopenharmony_ci pte_t *pte; 51962306a36Sopenharmony_ci pgprot_t new_prot = prot; 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci paddr_next = (paddr & PMD_MASK) + PMD_SIZE; 52262306a36Sopenharmony_ci if (paddr >= paddr_end) { 52362306a36Sopenharmony_ci if (!after_bootmem && 52462306a36Sopenharmony_ci !e820__mapped_any(paddr & PMD_MASK, paddr_next, 52562306a36Sopenharmony_ci E820_TYPE_RAM) && 52662306a36Sopenharmony_ci !e820__mapped_any(paddr & PMD_MASK, paddr_next, 52762306a36Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 52862306a36Sopenharmony_ci set_pmd_init(pmd, __pmd(0), init); 52962306a36Sopenharmony_ci continue; 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci if (!pmd_none(*pmd)) { 53362306a36Sopenharmony_ci if (!pmd_large(*pmd)) { 53462306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 53562306a36Sopenharmony_ci pte = (pte_t *)pmd_page_vaddr(*pmd); 53662306a36Sopenharmony_ci paddr_last = phys_pte_init(pte, paddr, 53762306a36Sopenharmony_ci paddr_end, prot, 53862306a36Sopenharmony_ci init); 53962306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 54062306a36Sopenharmony_ci continue; 54162306a36Sopenharmony_ci } 54262306a36Sopenharmony_ci /* 54362306a36Sopenharmony_ci * If we are ok with PG_LEVEL_2M mapping, then we will 54462306a36Sopenharmony_ci * use the existing mapping, 54562306a36Sopenharmony_ci * 54662306a36Sopenharmony_ci * Otherwise, we will split the large page mapping but 54762306a36Sopenharmony_ci * use the same existing protection bits except for 54862306a36Sopenharmony_ci * large page, so that we don't violate Intel's TLB 54962306a36Sopenharmony_ci * Application note (317080) which says, while changing 55062306a36Sopenharmony_ci * the page sizes, new and old translations should 55162306a36Sopenharmony_ci * not differ with respect to page frame and 55262306a36Sopenharmony_ci * attributes. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ci if (page_size_mask & (1 << PG_LEVEL_2M)) { 55562306a36Sopenharmony_ci if (!after_bootmem) 55662306a36Sopenharmony_ci pages++; 55762306a36Sopenharmony_ci paddr_last = paddr_next; 55862306a36Sopenharmony_ci continue; 55962306a36Sopenharmony_ci } 56062306a36Sopenharmony_ci new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); 56162306a36Sopenharmony_ci } 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci if (page_size_mask & (1<<PG_LEVEL_2M)) { 56462306a36Sopenharmony_ci pages++; 56562306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 56662306a36Sopenharmony_ci set_pmd_init(pmd, 56762306a36Sopenharmony_ci pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)), 56862306a36Sopenharmony_ci init); 56962306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 57062306a36Sopenharmony_ci paddr_last = paddr_next; 57162306a36Sopenharmony_ci continue; 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci pte = alloc_low_page(); 57562306a36Sopenharmony_ci paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init); 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 57862306a36Sopenharmony_ci pmd_populate_kernel_init(&init_mm, pmd, pte, init); 57962306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci update_page_count(PG_LEVEL_2M, pages); 58262306a36Sopenharmony_ci return paddr_last; 58362306a36Sopenharmony_ci} 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci/* 58662306a36Sopenharmony_ci * Create PUD level page table mapping for physical addresses. The virtual 58762306a36Sopenharmony_ci * and physical address do not have to be aligned at this level. KASLR can 58862306a36Sopenharmony_ci * randomize virtual addresses up to this level. 58962306a36Sopenharmony_ci * It returns the last physical address mapped. 59062306a36Sopenharmony_ci */ 59162306a36Sopenharmony_cistatic unsigned long __meminit 59262306a36Sopenharmony_ciphys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, 59362306a36Sopenharmony_ci unsigned long page_size_mask, pgprot_t _prot, bool init) 59462306a36Sopenharmony_ci{ 59562306a36Sopenharmony_ci unsigned long pages = 0, paddr_next; 59662306a36Sopenharmony_ci unsigned long paddr_last = paddr_end; 59762306a36Sopenharmony_ci unsigned long vaddr = (unsigned long)__va(paddr); 59862306a36Sopenharmony_ci int i = pud_index(vaddr); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { 60162306a36Sopenharmony_ci pud_t *pud; 60262306a36Sopenharmony_ci pmd_t *pmd; 60362306a36Sopenharmony_ci pgprot_t prot = _prot; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci vaddr = (unsigned long)__va(paddr); 60662306a36Sopenharmony_ci pud = pud_page + pud_index(vaddr); 60762306a36Sopenharmony_ci paddr_next = (paddr & PUD_MASK) + PUD_SIZE; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci if (paddr >= paddr_end) { 61062306a36Sopenharmony_ci if (!after_bootmem && 61162306a36Sopenharmony_ci !e820__mapped_any(paddr & PUD_MASK, paddr_next, 61262306a36Sopenharmony_ci E820_TYPE_RAM) && 61362306a36Sopenharmony_ci !e820__mapped_any(paddr & PUD_MASK, paddr_next, 61462306a36Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 61562306a36Sopenharmony_ci set_pud_init(pud, __pud(0), init); 61662306a36Sopenharmony_ci continue; 61762306a36Sopenharmony_ci } 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci if (!pud_none(*pud)) { 62062306a36Sopenharmony_ci if (!pud_large(*pud)) { 62162306a36Sopenharmony_ci pmd = pmd_offset(pud, 0); 62262306a36Sopenharmony_ci paddr_last = phys_pmd_init(pmd, paddr, 62362306a36Sopenharmony_ci paddr_end, 62462306a36Sopenharmony_ci page_size_mask, 62562306a36Sopenharmony_ci prot, init); 62662306a36Sopenharmony_ci continue; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci /* 62962306a36Sopenharmony_ci * If we are ok with PG_LEVEL_1G mapping, then we will 63062306a36Sopenharmony_ci * use the existing mapping. 63162306a36Sopenharmony_ci * 63262306a36Sopenharmony_ci * Otherwise, we will split the gbpage mapping but use 63362306a36Sopenharmony_ci * the same existing protection bits except for large 63462306a36Sopenharmony_ci * page, so that we don't violate Intel's TLB 63562306a36Sopenharmony_ci * Application note (317080) which says, while changing 63662306a36Sopenharmony_ci * the page sizes, new and old translations should 63762306a36Sopenharmony_ci * not differ with respect to page frame and 63862306a36Sopenharmony_ci * attributes. 63962306a36Sopenharmony_ci */ 64062306a36Sopenharmony_ci if (page_size_mask & (1 << PG_LEVEL_1G)) { 64162306a36Sopenharmony_ci if (!after_bootmem) 64262306a36Sopenharmony_ci pages++; 64362306a36Sopenharmony_ci paddr_last = paddr_next; 64462306a36Sopenharmony_ci continue; 64562306a36Sopenharmony_ci } 64662306a36Sopenharmony_ci prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci if (page_size_mask & (1<<PG_LEVEL_1G)) { 65062306a36Sopenharmony_ci pages++; 65162306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 65262306a36Sopenharmony_ci set_pud_init(pud, 65362306a36Sopenharmony_ci pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)), 65462306a36Sopenharmony_ci init); 65562306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 65662306a36Sopenharmony_ci paddr_last = paddr_next; 65762306a36Sopenharmony_ci continue; 65862306a36Sopenharmony_ci } 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci pmd = alloc_low_page(); 66162306a36Sopenharmony_ci paddr_last = phys_pmd_init(pmd, paddr, paddr_end, 66262306a36Sopenharmony_ci page_size_mask, prot, init); 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 66562306a36Sopenharmony_ci pud_populate_init(&init_mm, pud, pmd, init); 66662306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 66762306a36Sopenharmony_ci } 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci update_page_count(PG_LEVEL_1G, pages); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci return paddr_last; 67262306a36Sopenharmony_ci} 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_cistatic unsigned long __meminit 67562306a36Sopenharmony_ciphys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, 67662306a36Sopenharmony_ci unsigned long page_size_mask, pgprot_t prot, bool init) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci paddr_last = paddr_end; 68162306a36Sopenharmony_ci vaddr = (unsigned long)__va(paddr); 68262306a36Sopenharmony_ci vaddr_end = (unsigned long)__va(paddr_end); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci if (!pgtable_l5_enabled()) 68562306a36Sopenharmony_ci return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, 68662306a36Sopenharmony_ci page_size_mask, prot, init); 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci for (; vaddr < vaddr_end; vaddr = vaddr_next) { 68962306a36Sopenharmony_ci p4d_t *p4d = p4d_page + p4d_index(vaddr); 69062306a36Sopenharmony_ci pud_t *pud; 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; 69362306a36Sopenharmony_ci paddr = __pa(vaddr); 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci if (paddr >= paddr_end) { 69662306a36Sopenharmony_ci paddr_next = __pa(vaddr_next); 69762306a36Sopenharmony_ci if (!after_bootmem && 69862306a36Sopenharmony_ci !e820__mapped_any(paddr & P4D_MASK, paddr_next, 69962306a36Sopenharmony_ci E820_TYPE_RAM) && 70062306a36Sopenharmony_ci !e820__mapped_any(paddr & P4D_MASK, paddr_next, 70162306a36Sopenharmony_ci E820_TYPE_RESERVED_KERN)) 70262306a36Sopenharmony_ci set_p4d_init(p4d, __p4d(0), init); 70362306a36Sopenharmony_ci continue; 70462306a36Sopenharmony_ci } 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci if (!p4d_none(*p4d)) { 70762306a36Sopenharmony_ci pud = pud_offset(p4d, 0); 70862306a36Sopenharmony_ci paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), 70962306a36Sopenharmony_ci page_size_mask, prot, init); 71062306a36Sopenharmony_ci continue; 71162306a36Sopenharmony_ci } 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci pud = alloc_low_page(); 71462306a36Sopenharmony_ci paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), 71562306a36Sopenharmony_ci page_size_mask, prot, init); 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 71862306a36Sopenharmony_ci p4d_populate_init(&init_mm, p4d, pud, init); 71962306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 72062306a36Sopenharmony_ci } 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci return paddr_last; 72362306a36Sopenharmony_ci} 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_cistatic unsigned long __meminit 72662306a36Sopenharmony_ci__kernel_physical_mapping_init(unsigned long paddr_start, 72762306a36Sopenharmony_ci unsigned long paddr_end, 72862306a36Sopenharmony_ci unsigned long page_size_mask, 72962306a36Sopenharmony_ci pgprot_t prot, bool init) 73062306a36Sopenharmony_ci{ 73162306a36Sopenharmony_ci bool pgd_changed = false; 73262306a36Sopenharmony_ci unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci paddr_last = paddr_end; 73562306a36Sopenharmony_ci vaddr = (unsigned long)__va(paddr_start); 73662306a36Sopenharmony_ci vaddr_end = (unsigned long)__va(paddr_end); 73762306a36Sopenharmony_ci vaddr_start = vaddr; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci for (; vaddr < vaddr_end; vaddr = vaddr_next) { 74062306a36Sopenharmony_ci pgd_t *pgd = pgd_offset_k(vaddr); 74162306a36Sopenharmony_ci p4d_t *p4d; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci if (pgd_val(*pgd)) { 74662306a36Sopenharmony_ci p4d = (p4d_t *)pgd_page_vaddr(*pgd); 74762306a36Sopenharmony_ci paddr_last = phys_p4d_init(p4d, __pa(vaddr), 74862306a36Sopenharmony_ci __pa(vaddr_end), 74962306a36Sopenharmony_ci page_size_mask, 75062306a36Sopenharmony_ci prot, init); 75162306a36Sopenharmony_ci continue; 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci p4d = alloc_low_page(); 75562306a36Sopenharmony_ci paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), 75662306a36Sopenharmony_ci page_size_mask, prot, init); 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 75962306a36Sopenharmony_ci if (pgtable_l5_enabled()) 76062306a36Sopenharmony_ci pgd_populate_init(&init_mm, pgd, p4d, init); 76162306a36Sopenharmony_ci else 76262306a36Sopenharmony_ci p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), 76362306a36Sopenharmony_ci (pud_t *) p4d, init); 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 76662306a36Sopenharmony_ci pgd_changed = true; 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci if (pgd_changed) 77062306a36Sopenharmony_ci sync_global_pgds(vaddr_start, vaddr_end - 1); 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci return paddr_last; 77362306a36Sopenharmony_ci} 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci/* 77762306a36Sopenharmony_ci * Create page table mapping for the physical memory for specific physical 77862306a36Sopenharmony_ci * addresses. Note that it can only be used to populate non-present entries. 77962306a36Sopenharmony_ci * The virtual and physical addresses have to be aligned on PMD level 78062306a36Sopenharmony_ci * down. It returns the last physical address mapped. 78162306a36Sopenharmony_ci */ 78262306a36Sopenharmony_ciunsigned long __meminit 78362306a36Sopenharmony_cikernel_physical_mapping_init(unsigned long paddr_start, 78462306a36Sopenharmony_ci unsigned long paddr_end, 78562306a36Sopenharmony_ci unsigned long page_size_mask, pgprot_t prot) 78662306a36Sopenharmony_ci{ 78762306a36Sopenharmony_ci return __kernel_physical_mapping_init(paddr_start, paddr_end, 78862306a36Sopenharmony_ci page_size_mask, prot, true); 78962306a36Sopenharmony_ci} 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci/* 79262306a36Sopenharmony_ci * This function is similar to kernel_physical_mapping_init() above with the 79362306a36Sopenharmony_ci * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe() 79462306a36Sopenharmony_ci * when updating the mapping. The caller is responsible to flush the TLBs after 79562306a36Sopenharmony_ci * the function returns. 79662306a36Sopenharmony_ci */ 79762306a36Sopenharmony_ciunsigned long __meminit 79862306a36Sopenharmony_cikernel_physical_mapping_change(unsigned long paddr_start, 79962306a36Sopenharmony_ci unsigned long paddr_end, 80062306a36Sopenharmony_ci unsigned long page_size_mask) 80162306a36Sopenharmony_ci{ 80262306a36Sopenharmony_ci return __kernel_physical_mapping_init(paddr_start, paddr_end, 80362306a36Sopenharmony_ci page_size_mask, PAGE_KERNEL, 80462306a36Sopenharmony_ci false); 80562306a36Sopenharmony_ci} 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci#ifndef CONFIG_NUMA 80862306a36Sopenharmony_civoid __init initmem_init(void) 80962306a36Sopenharmony_ci{ 81062306a36Sopenharmony_ci memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); 81162306a36Sopenharmony_ci} 81262306a36Sopenharmony_ci#endif 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_civoid __init paging_init(void) 81562306a36Sopenharmony_ci{ 81662306a36Sopenharmony_ci sparse_init(); 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci /* 81962306a36Sopenharmony_ci * clear the default setting with node 0 82062306a36Sopenharmony_ci * note: don't use nodes_clear here, that is really clearing when 82162306a36Sopenharmony_ci * numa support is not compiled in, and later node_set_state 82262306a36Sopenharmony_ci * will not set it back. 82362306a36Sopenharmony_ci */ 82462306a36Sopenharmony_ci node_clear_state(0, N_MEMORY); 82562306a36Sopenharmony_ci node_clear_state(0, N_NORMAL_MEMORY); 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci zone_sizes_init(); 82862306a36Sopenharmony_ci} 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 83162306a36Sopenharmony_ci#define PAGE_UNUSED 0xFD 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci/* 83462306a36Sopenharmony_ci * The unused vmemmap range, which was not yet memset(PAGE_UNUSED), ranges 83562306a36Sopenharmony_ci * from unused_pmd_start to next PMD_SIZE boundary. 83662306a36Sopenharmony_ci */ 83762306a36Sopenharmony_cistatic unsigned long unused_pmd_start __meminitdata; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_cistatic void __meminit vmemmap_flush_unused_pmd(void) 84062306a36Sopenharmony_ci{ 84162306a36Sopenharmony_ci if (!unused_pmd_start) 84262306a36Sopenharmony_ci return; 84362306a36Sopenharmony_ci /* 84462306a36Sopenharmony_ci * Clears (unused_pmd_start, PMD_END] 84562306a36Sopenharmony_ci */ 84662306a36Sopenharmony_ci memset((void *)unused_pmd_start, PAGE_UNUSED, 84762306a36Sopenharmony_ci ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); 84862306a36Sopenharmony_ci unused_pmd_start = 0; 84962306a36Sopenharmony_ci} 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 85262306a36Sopenharmony_ci/* Returns true if the PMD is completely unused and thus it can be freed */ 85362306a36Sopenharmony_cistatic bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end) 85462306a36Sopenharmony_ci{ 85562306a36Sopenharmony_ci unsigned long start = ALIGN_DOWN(addr, PMD_SIZE); 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci /* 85862306a36Sopenharmony_ci * Flush the unused range cache to ensure that memchr_inv() will work 85962306a36Sopenharmony_ci * for the whole range. 86062306a36Sopenharmony_ci */ 86162306a36Sopenharmony_ci vmemmap_flush_unused_pmd(); 86262306a36Sopenharmony_ci memset((void *)addr, PAGE_UNUSED, end - addr); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci return !memchr_inv((void *)start, PAGE_UNUSED, PMD_SIZE); 86562306a36Sopenharmony_ci} 86662306a36Sopenharmony_ci#endif 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_cistatic void __meminit __vmemmap_use_sub_pmd(unsigned long start) 86962306a36Sopenharmony_ci{ 87062306a36Sopenharmony_ci /* 87162306a36Sopenharmony_ci * As we expect to add in the same granularity as we remove, it's 87262306a36Sopenharmony_ci * sufficient to mark only some piece used to block the memmap page from 87362306a36Sopenharmony_ci * getting removed when removing some other adjacent memmap (just in 87462306a36Sopenharmony_ci * case the first memmap never gets initialized e.g., because the memory 87562306a36Sopenharmony_ci * block never gets onlined). 87662306a36Sopenharmony_ci */ 87762306a36Sopenharmony_ci memset((void *)start, 0, sizeof(struct page)); 87862306a36Sopenharmony_ci} 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_cistatic void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end) 88162306a36Sopenharmony_ci{ 88262306a36Sopenharmony_ci /* 88362306a36Sopenharmony_ci * We only optimize if the new used range directly follows the 88462306a36Sopenharmony_ci * previously unused range (esp., when populating consecutive sections). 88562306a36Sopenharmony_ci */ 88662306a36Sopenharmony_ci if (unused_pmd_start == start) { 88762306a36Sopenharmony_ci if (likely(IS_ALIGNED(end, PMD_SIZE))) 88862306a36Sopenharmony_ci unused_pmd_start = 0; 88962306a36Sopenharmony_ci else 89062306a36Sopenharmony_ci unused_pmd_start = end; 89162306a36Sopenharmony_ci return; 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci /* 89562306a36Sopenharmony_ci * If the range does not contiguously follows previous one, make sure 89662306a36Sopenharmony_ci * to mark the unused range of the previous one so it can be removed. 89762306a36Sopenharmony_ci */ 89862306a36Sopenharmony_ci vmemmap_flush_unused_pmd(); 89962306a36Sopenharmony_ci __vmemmap_use_sub_pmd(start); 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_cistatic void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) 90462306a36Sopenharmony_ci{ 90562306a36Sopenharmony_ci const unsigned long page = ALIGN_DOWN(start, PMD_SIZE); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci vmemmap_flush_unused_pmd(); 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci /* 91062306a36Sopenharmony_ci * Could be our memmap page is filled with PAGE_UNUSED already from a 91162306a36Sopenharmony_ci * previous remove. Make sure to reset it. 91262306a36Sopenharmony_ci */ 91362306a36Sopenharmony_ci __vmemmap_use_sub_pmd(start); 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci /* 91662306a36Sopenharmony_ci * Mark with PAGE_UNUSED the unused parts of the new memmap range 91762306a36Sopenharmony_ci */ 91862306a36Sopenharmony_ci if (!IS_ALIGNED(start, PMD_SIZE)) 91962306a36Sopenharmony_ci memset((void *)page, PAGE_UNUSED, start - page); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci /* 92262306a36Sopenharmony_ci * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of 92362306a36Sopenharmony_ci * consecutive sections. Remember for the last added PMD where the 92462306a36Sopenharmony_ci * unused range begins. 92562306a36Sopenharmony_ci */ 92662306a36Sopenharmony_ci if (!IS_ALIGNED(end, PMD_SIZE)) 92762306a36Sopenharmony_ci unused_pmd_start = end; 92862306a36Sopenharmony_ci} 92962306a36Sopenharmony_ci#endif 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci/* 93262306a36Sopenharmony_ci * Memory hotplug specific functions 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 93562306a36Sopenharmony_ci/* 93662306a36Sopenharmony_ci * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need 93762306a36Sopenharmony_ci * updating. 93862306a36Sopenharmony_ci */ 93962306a36Sopenharmony_cistatic void update_end_of_memory_vars(u64 start, u64 size) 94062306a36Sopenharmony_ci{ 94162306a36Sopenharmony_ci unsigned long end_pfn = PFN_UP(start + size); 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci if (end_pfn > max_pfn) { 94462306a36Sopenharmony_ci max_pfn = end_pfn; 94562306a36Sopenharmony_ci max_low_pfn = end_pfn; 94662306a36Sopenharmony_ci high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; 94762306a36Sopenharmony_ci } 94862306a36Sopenharmony_ci} 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ciint add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, 95162306a36Sopenharmony_ci struct mhp_params *params) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci int ret; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci ret = __add_pages(nid, start_pfn, nr_pages, params); 95662306a36Sopenharmony_ci WARN_ON_ONCE(ret); 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci /* update max_pfn, max_low_pfn and high_memory */ 95962306a36Sopenharmony_ci update_end_of_memory_vars(start_pfn << PAGE_SHIFT, 96062306a36Sopenharmony_ci nr_pages << PAGE_SHIFT); 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci return ret; 96362306a36Sopenharmony_ci} 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ciint arch_add_memory(int nid, u64 start, u64 size, 96662306a36Sopenharmony_ci struct mhp_params *params) 96762306a36Sopenharmony_ci{ 96862306a36Sopenharmony_ci unsigned long start_pfn = start >> PAGE_SHIFT; 96962306a36Sopenharmony_ci unsigned long nr_pages = size >> PAGE_SHIFT; 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci init_memory_mapping(start, start + size, params->pgprot); 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci return add_pages(nid, start_pfn, nr_pages, params); 97462306a36Sopenharmony_ci} 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_cistatic void __meminit free_pagetable(struct page *page, int order) 97762306a36Sopenharmony_ci{ 97862306a36Sopenharmony_ci unsigned long magic; 97962306a36Sopenharmony_ci unsigned int nr_pages = 1 << order; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci /* bootmem page has reserved flag */ 98262306a36Sopenharmony_ci if (PageReserved(page)) { 98362306a36Sopenharmony_ci __ClearPageReserved(page); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci magic = page->index; 98662306a36Sopenharmony_ci if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { 98762306a36Sopenharmony_ci while (nr_pages--) 98862306a36Sopenharmony_ci put_page_bootmem(page++); 98962306a36Sopenharmony_ci } else 99062306a36Sopenharmony_ci while (nr_pages--) 99162306a36Sopenharmony_ci free_reserved_page(page++); 99262306a36Sopenharmony_ci } else 99362306a36Sopenharmony_ci free_pages((unsigned long)page_address(page), order); 99462306a36Sopenharmony_ci} 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_cistatic void __meminit free_hugepage_table(struct page *page, 99762306a36Sopenharmony_ci struct vmem_altmap *altmap) 99862306a36Sopenharmony_ci{ 99962306a36Sopenharmony_ci if (altmap) 100062306a36Sopenharmony_ci vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); 100162306a36Sopenharmony_ci else 100262306a36Sopenharmony_ci free_pagetable(page, get_order(PMD_SIZE)); 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_cistatic void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) 100662306a36Sopenharmony_ci{ 100762306a36Sopenharmony_ci pte_t *pte; 100862306a36Sopenharmony_ci int i; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PTE; i++) { 101162306a36Sopenharmony_ci pte = pte_start + i; 101262306a36Sopenharmony_ci if (!pte_none(*pte)) 101362306a36Sopenharmony_ci return; 101462306a36Sopenharmony_ci } 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci /* free a pte talbe */ 101762306a36Sopenharmony_ci free_pagetable(pmd_page(*pmd), 0); 101862306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 101962306a36Sopenharmony_ci pmd_clear(pmd); 102062306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 102162306a36Sopenharmony_ci} 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_cistatic void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) 102462306a36Sopenharmony_ci{ 102562306a36Sopenharmony_ci pmd_t *pmd; 102662306a36Sopenharmony_ci int i; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PMD; i++) { 102962306a36Sopenharmony_ci pmd = pmd_start + i; 103062306a36Sopenharmony_ci if (!pmd_none(*pmd)) 103162306a36Sopenharmony_ci return; 103262306a36Sopenharmony_ci } 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci /* free a pmd talbe */ 103562306a36Sopenharmony_ci free_pagetable(pud_page(*pud), 0); 103662306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 103762306a36Sopenharmony_ci pud_clear(pud); 103862306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_cistatic void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci pud_t *pud; 104462306a36Sopenharmony_ci int i; 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci for (i = 0; i < PTRS_PER_PUD; i++) { 104762306a36Sopenharmony_ci pud = pud_start + i; 104862306a36Sopenharmony_ci if (!pud_none(*pud)) 104962306a36Sopenharmony_ci return; 105062306a36Sopenharmony_ci } 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci /* free a pud talbe */ 105362306a36Sopenharmony_ci free_pagetable(p4d_page(*p4d), 0); 105462306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 105562306a36Sopenharmony_ci p4d_clear(p4d); 105662306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 105762306a36Sopenharmony_ci} 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_cistatic void __meminit 106062306a36Sopenharmony_ciremove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, 106162306a36Sopenharmony_ci bool direct) 106262306a36Sopenharmony_ci{ 106362306a36Sopenharmony_ci unsigned long next, pages = 0; 106462306a36Sopenharmony_ci pte_t *pte; 106562306a36Sopenharmony_ci phys_addr_t phys_addr; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci pte = pte_start + pte_index(addr); 106862306a36Sopenharmony_ci for (; addr < end; addr = next, pte++) { 106962306a36Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 107062306a36Sopenharmony_ci if (next > end) 107162306a36Sopenharmony_ci next = end; 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci if (!pte_present(*pte)) 107462306a36Sopenharmony_ci continue; 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci /* 107762306a36Sopenharmony_ci * We mapped [0,1G) memory as identity mapping when 107862306a36Sopenharmony_ci * initializing, in arch/x86/kernel/head_64.S. These 107962306a36Sopenharmony_ci * pagetables cannot be removed. 108062306a36Sopenharmony_ci */ 108162306a36Sopenharmony_ci phys_addr = pte_val(*pte) + (addr & PAGE_MASK); 108262306a36Sopenharmony_ci if (phys_addr < (phys_addr_t)0x40000000) 108362306a36Sopenharmony_ci return; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci if (!direct) 108662306a36Sopenharmony_ci free_pagetable(pte_page(*pte), 0); 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 108962306a36Sopenharmony_ci pte_clear(&init_mm, addr, pte); 109062306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci /* For non-direct mapping, pages means nothing. */ 109362306a36Sopenharmony_ci pages++; 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci /* Call free_pte_table() in remove_pmd_table(). */ 109762306a36Sopenharmony_ci flush_tlb_all(); 109862306a36Sopenharmony_ci if (direct) 109962306a36Sopenharmony_ci update_page_count(PG_LEVEL_4K, -pages); 110062306a36Sopenharmony_ci} 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_cistatic void __meminit 110362306a36Sopenharmony_ciremove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, 110462306a36Sopenharmony_ci bool direct, struct vmem_altmap *altmap) 110562306a36Sopenharmony_ci{ 110662306a36Sopenharmony_ci unsigned long next, pages = 0; 110762306a36Sopenharmony_ci pte_t *pte_base; 110862306a36Sopenharmony_ci pmd_t *pmd; 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci pmd = pmd_start + pmd_index(addr); 111162306a36Sopenharmony_ci for (; addr < end; addr = next, pmd++) { 111262306a36Sopenharmony_ci next = pmd_addr_end(addr, end); 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci if (!pmd_present(*pmd)) 111562306a36Sopenharmony_ci continue; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci if (pmd_large(*pmd)) { 111862306a36Sopenharmony_ci if (IS_ALIGNED(addr, PMD_SIZE) && 111962306a36Sopenharmony_ci IS_ALIGNED(next, PMD_SIZE)) { 112062306a36Sopenharmony_ci if (!direct) 112162306a36Sopenharmony_ci free_hugepage_table(pmd_page(*pmd), 112262306a36Sopenharmony_ci altmap); 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 112562306a36Sopenharmony_ci pmd_clear(pmd); 112662306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 112762306a36Sopenharmony_ci pages++; 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 113062306a36Sopenharmony_ci else if (vmemmap_pmd_is_unused(addr, next)) { 113162306a36Sopenharmony_ci free_hugepage_table(pmd_page(*pmd), 113262306a36Sopenharmony_ci altmap); 113362306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 113462306a36Sopenharmony_ci pmd_clear(pmd); 113562306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 113662306a36Sopenharmony_ci } 113762306a36Sopenharmony_ci#endif 113862306a36Sopenharmony_ci continue; 113962306a36Sopenharmony_ci } 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci pte_base = (pte_t *)pmd_page_vaddr(*pmd); 114262306a36Sopenharmony_ci remove_pte_table(pte_base, addr, next, direct); 114362306a36Sopenharmony_ci free_pte_table(pte_base, pmd); 114462306a36Sopenharmony_ci } 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci /* Call free_pmd_table() in remove_pud_table(). */ 114762306a36Sopenharmony_ci if (direct) 114862306a36Sopenharmony_ci update_page_count(PG_LEVEL_2M, -pages); 114962306a36Sopenharmony_ci} 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_cistatic void __meminit 115262306a36Sopenharmony_ciremove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, 115362306a36Sopenharmony_ci struct vmem_altmap *altmap, bool direct) 115462306a36Sopenharmony_ci{ 115562306a36Sopenharmony_ci unsigned long next, pages = 0; 115662306a36Sopenharmony_ci pmd_t *pmd_base; 115762306a36Sopenharmony_ci pud_t *pud; 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci pud = pud_start + pud_index(addr); 116062306a36Sopenharmony_ci for (; addr < end; addr = next, pud++) { 116162306a36Sopenharmony_ci next = pud_addr_end(addr, end); 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci if (!pud_present(*pud)) 116462306a36Sopenharmony_ci continue; 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci if (pud_large(*pud) && 116762306a36Sopenharmony_ci IS_ALIGNED(addr, PUD_SIZE) && 116862306a36Sopenharmony_ci IS_ALIGNED(next, PUD_SIZE)) { 116962306a36Sopenharmony_ci spin_lock(&init_mm.page_table_lock); 117062306a36Sopenharmony_ci pud_clear(pud); 117162306a36Sopenharmony_ci spin_unlock(&init_mm.page_table_lock); 117262306a36Sopenharmony_ci pages++; 117362306a36Sopenharmony_ci continue; 117462306a36Sopenharmony_ci } 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci pmd_base = pmd_offset(pud, 0); 117762306a36Sopenharmony_ci remove_pmd_table(pmd_base, addr, next, direct, altmap); 117862306a36Sopenharmony_ci free_pmd_table(pmd_base, pud); 117962306a36Sopenharmony_ci } 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci if (direct) 118262306a36Sopenharmony_ci update_page_count(PG_LEVEL_1G, -pages); 118362306a36Sopenharmony_ci} 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_cistatic void __meminit 118662306a36Sopenharmony_ciremove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, 118762306a36Sopenharmony_ci struct vmem_altmap *altmap, bool direct) 118862306a36Sopenharmony_ci{ 118962306a36Sopenharmony_ci unsigned long next, pages = 0; 119062306a36Sopenharmony_ci pud_t *pud_base; 119162306a36Sopenharmony_ci p4d_t *p4d; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci p4d = p4d_start + p4d_index(addr); 119462306a36Sopenharmony_ci for (; addr < end; addr = next, p4d++) { 119562306a36Sopenharmony_ci next = p4d_addr_end(addr, end); 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci if (!p4d_present(*p4d)) 119862306a36Sopenharmony_ci continue; 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci BUILD_BUG_ON(p4d_large(*p4d)); 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci pud_base = pud_offset(p4d, 0); 120362306a36Sopenharmony_ci remove_pud_table(pud_base, addr, next, altmap, direct); 120462306a36Sopenharmony_ci /* 120562306a36Sopenharmony_ci * For 4-level page tables we do not want to free PUDs, but in the 120662306a36Sopenharmony_ci * 5-level case we should free them. This code will have to change 120762306a36Sopenharmony_ci * to adapt for boot-time switching between 4 and 5 level page tables. 120862306a36Sopenharmony_ci */ 120962306a36Sopenharmony_ci if (pgtable_l5_enabled()) 121062306a36Sopenharmony_ci free_pud_table(pud_base, p4d); 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci if (direct) 121462306a36Sopenharmony_ci update_page_count(PG_LEVEL_512G, -pages); 121562306a36Sopenharmony_ci} 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci/* start and end are both virtual address. */ 121862306a36Sopenharmony_cistatic void __meminit 121962306a36Sopenharmony_ciremove_pagetable(unsigned long start, unsigned long end, bool direct, 122062306a36Sopenharmony_ci struct vmem_altmap *altmap) 122162306a36Sopenharmony_ci{ 122262306a36Sopenharmony_ci unsigned long next; 122362306a36Sopenharmony_ci unsigned long addr; 122462306a36Sopenharmony_ci pgd_t *pgd; 122562306a36Sopenharmony_ci p4d_t *p4d; 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci for (addr = start; addr < end; addr = next) { 122862306a36Sopenharmony_ci next = pgd_addr_end(addr, end); 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci pgd = pgd_offset_k(addr); 123162306a36Sopenharmony_ci if (!pgd_present(*pgd)) 123262306a36Sopenharmony_ci continue; 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci p4d = p4d_offset(pgd, 0); 123562306a36Sopenharmony_ci remove_p4d_table(p4d, addr, next, altmap, direct); 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci flush_tlb_all(); 123962306a36Sopenharmony_ci} 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_civoid __ref vmemmap_free(unsigned long start, unsigned long end, 124262306a36Sopenharmony_ci struct vmem_altmap *altmap) 124362306a36Sopenharmony_ci{ 124462306a36Sopenharmony_ci VM_BUG_ON(!PAGE_ALIGNED(start)); 124562306a36Sopenharmony_ci VM_BUG_ON(!PAGE_ALIGNED(end)); 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci remove_pagetable(start, end, false, altmap); 124862306a36Sopenharmony_ci} 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_cistatic void __meminit 125162306a36Sopenharmony_cikernel_physical_mapping_remove(unsigned long start, unsigned long end) 125262306a36Sopenharmony_ci{ 125362306a36Sopenharmony_ci start = (unsigned long)__va(start); 125462306a36Sopenharmony_ci end = (unsigned long)__va(end); 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci remove_pagetable(start, end, true, NULL); 125762306a36Sopenharmony_ci} 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_civoid __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 126062306a36Sopenharmony_ci{ 126162306a36Sopenharmony_ci unsigned long start_pfn = start >> PAGE_SHIFT; 126262306a36Sopenharmony_ci unsigned long nr_pages = size >> PAGE_SHIFT; 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci __remove_pages(start_pfn, nr_pages, altmap); 126562306a36Sopenharmony_ci kernel_physical_mapping_remove(start, start + size); 126662306a36Sopenharmony_ci} 126762306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */ 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_cistatic struct kcore_list kcore_vsyscall; 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_cistatic void __init register_page_bootmem_info(void) 127262306a36Sopenharmony_ci{ 127362306a36Sopenharmony_ci#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) 127462306a36Sopenharmony_ci int i; 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci for_each_online_node(i) 127762306a36Sopenharmony_ci register_page_bootmem_info_node(NODE_DATA(i)); 127862306a36Sopenharmony_ci#endif 127962306a36Sopenharmony_ci} 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci/* 128262306a36Sopenharmony_ci * Pre-allocates page-table pages for the vmalloc area in the kernel page-table. 128362306a36Sopenharmony_ci * Only the level which needs to be synchronized between all page-tables is 128462306a36Sopenharmony_ci * allocated because the synchronization can be expensive. 128562306a36Sopenharmony_ci */ 128662306a36Sopenharmony_cistatic void __init preallocate_vmalloc_pages(void) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci unsigned long addr; 128962306a36Sopenharmony_ci const char *lvl; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 129262306a36Sopenharmony_ci pgd_t *pgd = pgd_offset_k(addr); 129362306a36Sopenharmony_ci p4d_t *p4d; 129462306a36Sopenharmony_ci pud_t *pud; 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci lvl = "p4d"; 129762306a36Sopenharmony_ci p4d = p4d_alloc(&init_mm, pgd, addr); 129862306a36Sopenharmony_ci if (!p4d) 129962306a36Sopenharmony_ci goto failed; 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci if (pgtable_l5_enabled()) 130262306a36Sopenharmony_ci continue; 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci /* 130562306a36Sopenharmony_ci * The goal here is to allocate all possibly required 130662306a36Sopenharmony_ci * hardware page tables pointed to by the top hardware 130762306a36Sopenharmony_ci * level. 130862306a36Sopenharmony_ci * 130962306a36Sopenharmony_ci * On 4-level systems, the P4D layer is folded away and 131062306a36Sopenharmony_ci * the above code does no preallocation. Below, go down 131162306a36Sopenharmony_ci * to the pud _software_ level to ensure the second 131262306a36Sopenharmony_ci * hardware level is allocated on 4-level systems too. 131362306a36Sopenharmony_ci */ 131462306a36Sopenharmony_ci lvl = "pud"; 131562306a36Sopenharmony_ci pud = pud_alloc(&init_mm, p4d, addr); 131662306a36Sopenharmony_ci if (!pud) 131762306a36Sopenharmony_ci goto failed; 131862306a36Sopenharmony_ci } 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci return; 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_cifailed: 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci /* 132562306a36Sopenharmony_ci * The pages have to be there now or they will be missing in 132662306a36Sopenharmony_ci * process page-tables later. 132762306a36Sopenharmony_ci */ 132862306a36Sopenharmony_ci panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl); 132962306a36Sopenharmony_ci} 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_civoid __init mem_init(void) 133262306a36Sopenharmony_ci{ 133362306a36Sopenharmony_ci pci_iommu_alloc(); 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci /* clear_bss() already clear the empty_zero_page */ 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci /* this will put all memory onto the freelists */ 133862306a36Sopenharmony_ci memblock_free_all(); 133962306a36Sopenharmony_ci after_bootmem = 1; 134062306a36Sopenharmony_ci x86_init.hyper.init_after_bootmem(); 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci /* 134362306a36Sopenharmony_ci * Must be done after boot memory is put on freelist, because here we 134462306a36Sopenharmony_ci * might set fields in deferred struct pages that have not yet been 134562306a36Sopenharmony_ci * initialized, and memblock_free_all() initializes all the reserved 134662306a36Sopenharmony_ci * deferred pages for us. 134762306a36Sopenharmony_ci */ 134862306a36Sopenharmony_ci register_page_bootmem_info(); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci /* Register memory areas for /proc/kcore */ 135162306a36Sopenharmony_ci if (get_gate_vma(&init_mm)) 135262306a36Sopenharmony_ci kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER); 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci preallocate_vmalloc_pages(); 135562306a36Sopenharmony_ci} 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_ci#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 135862306a36Sopenharmony_ciint __init deferred_page_init_max_threads(const struct cpumask *node_cpumask) 135962306a36Sopenharmony_ci{ 136062306a36Sopenharmony_ci /* 136162306a36Sopenharmony_ci * More CPUs always led to greater speedups on tested systems, up to 136262306a36Sopenharmony_ci * all the nodes' CPUs. Use all since the system is otherwise idle 136362306a36Sopenharmony_ci * now. 136462306a36Sopenharmony_ci */ 136562306a36Sopenharmony_ci return max_t(int, cpumask_weight(node_cpumask), 1); 136662306a36Sopenharmony_ci} 136762306a36Sopenharmony_ci#endif 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ciint kernel_set_to_readonly; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_civoid mark_rodata_ro(void) 137262306a36Sopenharmony_ci{ 137362306a36Sopenharmony_ci unsigned long start = PFN_ALIGN(_text); 137462306a36Sopenharmony_ci unsigned long rodata_start = PFN_ALIGN(__start_rodata); 137562306a36Sopenharmony_ci unsigned long end = (unsigned long)__end_rodata_hpage_align; 137662306a36Sopenharmony_ci unsigned long text_end = PFN_ALIGN(_etext); 137762306a36Sopenharmony_ci unsigned long rodata_end = PFN_ALIGN(__end_rodata); 137862306a36Sopenharmony_ci unsigned long all_end; 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 138162306a36Sopenharmony_ci (end - start) >> 10); 138262306a36Sopenharmony_ci set_memory_ro(start, (end - start) >> PAGE_SHIFT); 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_ci kernel_set_to_readonly = 1; 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci /* 138762306a36Sopenharmony_ci * The rodata/data/bss/brk section (but not the kernel text!) 138862306a36Sopenharmony_ci * should also be not-executable. 138962306a36Sopenharmony_ci * 139062306a36Sopenharmony_ci * We align all_end to PMD_SIZE because the existing mapping 139162306a36Sopenharmony_ci * is a full PMD. If we would align _brk_end to PAGE_SIZE we 139262306a36Sopenharmony_ci * split the PMD and the reminder between _brk_end and the end 139362306a36Sopenharmony_ci * of the PMD will remain mapped executable. 139462306a36Sopenharmony_ci * 139562306a36Sopenharmony_ci * Any PMD which was setup after the one which covers _brk_end 139662306a36Sopenharmony_ci * has been zapped already via cleanup_highmem(). 139762306a36Sopenharmony_ci */ 139862306a36Sopenharmony_ci all_end = roundup((unsigned long)_brk_end, PMD_SIZE); 139962306a36Sopenharmony_ci set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci set_ftrace_ops_ro(); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci#ifdef CONFIG_CPA_DEBUG 140462306a36Sopenharmony_ci printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); 140562306a36Sopenharmony_ci set_memory_rw(start, (end-start) >> PAGE_SHIFT); 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci printk(KERN_INFO "Testing CPA: again\n"); 140862306a36Sopenharmony_ci set_memory_ro(start, (end-start) >> PAGE_SHIFT); 140962306a36Sopenharmony_ci#endif 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci free_kernel_image_pages("unused kernel image (text/rodata gap)", 141262306a36Sopenharmony_ci (void *)text_end, (void *)rodata_start); 141362306a36Sopenharmony_ci free_kernel_image_pages("unused kernel image (rodata/data gap)", 141462306a36Sopenharmony_ci (void *)rodata_end, (void *)_sdata); 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci debug_checkwx(); 141762306a36Sopenharmony_ci} 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci/* 142062306a36Sopenharmony_ci * Block size is the minimum amount of memory which can be hotplugged or 142162306a36Sopenharmony_ci * hotremoved. It must be power of two and must be equal or larger than 142262306a36Sopenharmony_ci * MIN_MEMORY_BLOCK_SIZE. 142362306a36Sopenharmony_ci */ 142462306a36Sopenharmony_ci#define MAX_BLOCK_SIZE (2UL << 30) 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci/* Amount of ram needed to start using large blocks */ 142762306a36Sopenharmony_ci#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci/* Adjustable memory block size */ 143062306a36Sopenharmony_cistatic unsigned long set_memory_block_size; 143162306a36Sopenharmony_ciint __init set_memory_block_size_order(unsigned int order) 143262306a36Sopenharmony_ci{ 143362306a36Sopenharmony_ci unsigned long size = 1UL << order; 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_ci if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE) 143662306a36Sopenharmony_ci return -EINVAL; 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci set_memory_block_size = size; 143962306a36Sopenharmony_ci return 0; 144062306a36Sopenharmony_ci} 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_cistatic unsigned long probe_memory_block_size(void) 144362306a36Sopenharmony_ci{ 144462306a36Sopenharmony_ci unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; 144562306a36Sopenharmony_ci unsigned long bz; 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci /* If memory block size has been set, then use it */ 144862306a36Sopenharmony_ci bz = set_memory_block_size; 144962306a36Sopenharmony_ci if (bz) 145062306a36Sopenharmony_ci goto done; 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ 145362306a36Sopenharmony_ci if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { 145462306a36Sopenharmony_ci bz = MIN_MEMORY_BLOCK_SIZE; 145562306a36Sopenharmony_ci goto done; 145662306a36Sopenharmony_ci } 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci /* 145962306a36Sopenharmony_ci * Use max block size to minimize overhead on bare metal, where 146062306a36Sopenharmony_ci * alignment for memory hotplug isn't a concern. 146162306a36Sopenharmony_ci */ 146262306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { 146362306a36Sopenharmony_ci bz = MAX_BLOCK_SIZE; 146462306a36Sopenharmony_ci goto done; 146562306a36Sopenharmony_ci } 146662306a36Sopenharmony_ci 146762306a36Sopenharmony_ci /* Find the largest allowed block size that aligns to memory end */ 146862306a36Sopenharmony_ci for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { 146962306a36Sopenharmony_ci if (IS_ALIGNED(boot_mem_end, bz)) 147062306a36Sopenharmony_ci break; 147162306a36Sopenharmony_ci } 147262306a36Sopenharmony_cidone: 147362306a36Sopenharmony_ci pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci return bz; 147662306a36Sopenharmony_ci} 147762306a36Sopenharmony_ci 147862306a36Sopenharmony_cistatic unsigned long memory_block_size_probed; 147962306a36Sopenharmony_ciunsigned long memory_block_size_bytes(void) 148062306a36Sopenharmony_ci{ 148162306a36Sopenharmony_ci if (!memory_block_size_probed) 148262306a36Sopenharmony_ci memory_block_size_probed = probe_memory_block_size(); 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci return memory_block_size_probed; 148562306a36Sopenharmony_ci} 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 148862306a36Sopenharmony_ci/* 148962306a36Sopenharmony_ci * Initialise the sparsemem vmemmap using huge-pages at the PMD level. 149062306a36Sopenharmony_ci */ 149162306a36Sopenharmony_cistatic long __meminitdata addr_start, addr_end; 149262306a36Sopenharmony_cistatic void __meminitdata *p_start, *p_end; 149362306a36Sopenharmony_cistatic int __meminitdata node_start; 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_civoid __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, 149662306a36Sopenharmony_ci unsigned long addr, unsigned long next) 149762306a36Sopenharmony_ci{ 149862306a36Sopenharmony_ci pte_t entry; 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci entry = pfn_pte(__pa(p) >> PAGE_SHIFT, 150162306a36Sopenharmony_ci PAGE_KERNEL_LARGE); 150262306a36Sopenharmony_ci set_pmd(pmd, __pmd(pte_val(entry))); 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci /* check to see if we have contiguous blocks */ 150562306a36Sopenharmony_ci if (p_end != p || node_start != node) { 150662306a36Sopenharmony_ci if (p_start) 150762306a36Sopenharmony_ci pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", 150862306a36Sopenharmony_ci addr_start, addr_end-1, p_start, p_end-1, node_start); 150962306a36Sopenharmony_ci addr_start = addr; 151062306a36Sopenharmony_ci node_start = node; 151162306a36Sopenharmony_ci p_start = p; 151262306a36Sopenharmony_ci } 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci addr_end = addr + PMD_SIZE; 151562306a36Sopenharmony_ci p_end = p + PMD_SIZE; 151662306a36Sopenharmony_ci 151762306a36Sopenharmony_ci if (!IS_ALIGNED(addr, PMD_SIZE) || 151862306a36Sopenharmony_ci !IS_ALIGNED(next, PMD_SIZE)) 151962306a36Sopenharmony_ci vmemmap_use_new_sub_pmd(addr, next); 152062306a36Sopenharmony_ci} 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ciint __meminit vmemmap_check_pmd(pmd_t *pmd, int node, 152362306a36Sopenharmony_ci unsigned long addr, unsigned long next) 152462306a36Sopenharmony_ci{ 152562306a36Sopenharmony_ci int large = pmd_large(*pmd); 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci if (pmd_large(*pmd)) { 152862306a36Sopenharmony_ci vmemmap_verify((pte_t *)pmd, node, addr, next); 152962306a36Sopenharmony_ci vmemmap_use_sub_pmd(addr, next); 153062306a36Sopenharmony_ci } 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci return large; 153362306a36Sopenharmony_ci} 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 153662306a36Sopenharmony_ci struct vmem_altmap *altmap) 153762306a36Sopenharmony_ci{ 153862306a36Sopenharmony_ci int err; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci VM_BUG_ON(!PAGE_ALIGNED(start)); 154162306a36Sopenharmony_ci VM_BUG_ON(!PAGE_ALIGNED(end)); 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci if (end - start < PAGES_PER_SECTION * sizeof(struct page)) 154462306a36Sopenharmony_ci err = vmemmap_populate_basepages(start, end, node, NULL); 154562306a36Sopenharmony_ci else if (boot_cpu_has(X86_FEATURE_PSE)) 154662306a36Sopenharmony_ci err = vmemmap_populate_hugepages(start, end, node, altmap); 154762306a36Sopenharmony_ci else if (altmap) { 154862306a36Sopenharmony_ci pr_err_once("%s: no cpu support for altmap allocations\n", 154962306a36Sopenharmony_ci __func__); 155062306a36Sopenharmony_ci err = -ENOMEM; 155162306a36Sopenharmony_ci } else 155262306a36Sopenharmony_ci err = vmemmap_populate_basepages(start, end, node, NULL); 155362306a36Sopenharmony_ci if (!err) 155462306a36Sopenharmony_ci sync_global_pgds(start, end - 1); 155562306a36Sopenharmony_ci return err; 155662306a36Sopenharmony_ci} 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE 155962306a36Sopenharmony_civoid register_page_bootmem_memmap(unsigned long section_nr, 156062306a36Sopenharmony_ci struct page *start_page, unsigned long nr_pages) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci unsigned long addr = (unsigned long)start_page; 156362306a36Sopenharmony_ci unsigned long end = (unsigned long)(start_page + nr_pages); 156462306a36Sopenharmony_ci unsigned long next; 156562306a36Sopenharmony_ci pgd_t *pgd; 156662306a36Sopenharmony_ci p4d_t *p4d; 156762306a36Sopenharmony_ci pud_t *pud; 156862306a36Sopenharmony_ci pmd_t *pmd; 156962306a36Sopenharmony_ci unsigned int nr_pmd_pages; 157062306a36Sopenharmony_ci struct page *page; 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci for (; addr < end; addr = next) { 157362306a36Sopenharmony_ci pte_t *pte = NULL; 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci pgd = pgd_offset_k(addr); 157662306a36Sopenharmony_ci if (pgd_none(*pgd)) { 157762306a36Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 157862306a36Sopenharmony_ci continue; 157962306a36Sopenharmony_ci } 158062306a36Sopenharmony_ci get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci p4d = p4d_offset(pgd, addr); 158362306a36Sopenharmony_ci if (p4d_none(*p4d)) { 158462306a36Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 158562306a36Sopenharmony_ci continue; 158662306a36Sopenharmony_ci } 158762306a36Sopenharmony_ci get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO); 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci pud = pud_offset(p4d, addr); 159062306a36Sopenharmony_ci if (pud_none(*pud)) { 159162306a36Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 159262306a36Sopenharmony_ci continue; 159362306a36Sopenharmony_ci } 159462306a36Sopenharmony_ci get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_PSE)) { 159762306a36Sopenharmony_ci next = (addr + PAGE_SIZE) & PAGE_MASK; 159862306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 159962306a36Sopenharmony_ci if (pmd_none(*pmd)) 160062306a36Sopenharmony_ci continue; 160162306a36Sopenharmony_ci get_page_bootmem(section_nr, pmd_page(*pmd), 160262306a36Sopenharmony_ci MIX_SECTION_INFO); 160362306a36Sopenharmony_ci 160462306a36Sopenharmony_ci pte = pte_offset_kernel(pmd, addr); 160562306a36Sopenharmony_ci if (pte_none(*pte)) 160662306a36Sopenharmony_ci continue; 160762306a36Sopenharmony_ci get_page_bootmem(section_nr, pte_page(*pte), 160862306a36Sopenharmony_ci SECTION_INFO); 160962306a36Sopenharmony_ci } else { 161062306a36Sopenharmony_ci next = pmd_addr_end(addr, end); 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci pmd = pmd_offset(pud, addr); 161362306a36Sopenharmony_ci if (pmd_none(*pmd)) 161462306a36Sopenharmony_ci continue; 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci nr_pmd_pages = 1 << get_order(PMD_SIZE); 161762306a36Sopenharmony_ci page = pmd_page(*pmd); 161862306a36Sopenharmony_ci while (nr_pmd_pages--) 161962306a36Sopenharmony_ci get_page_bootmem(section_nr, page++, 162062306a36Sopenharmony_ci SECTION_INFO); 162162306a36Sopenharmony_ci } 162262306a36Sopenharmony_ci } 162362306a36Sopenharmony_ci} 162462306a36Sopenharmony_ci#endif 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_civoid __meminit vmemmap_populate_print_last(void) 162762306a36Sopenharmony_ci{ 162862306a36Sopenharmony_ci if (p_start) { 162962306a36Sopenharmony_ci pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", 163062306a36Sopenharmony_ci addr_start, addr_end-1, p_start, p_end-1, node_start); 163162306a36Sopenharmony_ci p_start = NULL; 163262306a36Sopenharmony_ci p_end = NULL; 163362306a36Sopenharmony_ci node_start = 0; 163462306a36Sopenharmony_ci } 163562306a36Sopenharmony_ci} 163662306a36Sopenharmony_ci#endif 1637