18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 48c2ecf20Sopenharmony_ci#include <linux/percpu.h> 58c2ecf20Sopenharmony_ci#include <linux/kallsyms.h> 68c2ecf20Sopenharmony_ci#include <linux/kcore.h> 78c2ecf20Sopenharmony_ci#include <linux/pgtable.h> 88c2ecf20Sopenharmony_ci#include <linux/random.h> 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <asm/cpu_entry_area.h> 118c2ecf20Sopenharmony_ci#include <asm/fixmap.h> 128c2ecf20Sopenharmony_ci#include <asm/desc.h> 138c2ecf20Sopenharmony_ci#include <asm/kasan.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 188c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); 198c2ecf20Sopenharmony_ciDEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_cistatic __always_inline unsigned int cea_offset(unsigned int cpu) 248c2ecf20Sopenharmony_ci{ 258c2ecf20Sopenharmony_ci return per_cpu(_cea_offset, cpu); 268c2ecf20Sopenharmony_ci} 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_cistatic __init void init_cea_offsets(void) 298c2ecf20Sopenharmony_ci{ 308c2ecf20Sopenharmony_ci unsigned int max_cea; 318c2ecf20Sopenharmony_ci unsigned int i, j; 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci /* O(sodding terrible) */ 368c2ecf20Sopenharmony_ci for_each_possible_cpu(i) { 378c2ecf20Sopenharmony_ci unsigned int cea; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ciagain: 408c2ecf20Sopenharmony_ci /* 418c2ecf20Sopenharmony_ci * Directly use get_random_u32() instead of prandom_u32_max 428c2ecf20Sopenharmony_ci * to avoid seed can't be generated when CONFIG_RANDOMIZE_BASE=n. 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_ci cea = (u32)(((u64) get_random_u32() * max_cea) >> 32); 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci for_each_possible_cpu(j) { 478c2ecf20Sopenharmony_ci if (cea_offset(j) == cea) 488c2ecf20Sopenharmony_ci goto again; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci if (i == j) 518c2ecf20Sopenharmony_ci break; 528c2ecf20Sopenharmony_ci } 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci per_cpu(_cea_offset, i) = cea; 558c2ecf20Sopenharmony_ci } 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci#else /* !X86_64 */ 588c2ecf20Sopenharmony_ciDECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_cistatic __always_inline unsigned int cea_offset(unsigned int cpu) 618c2ecf20Sopenharmony_ci{ 628c2ecf20Sopenharmony_ci return cpu; 638c2ecf20Sopenharmony_ci} 648c2ecf20Sopenharmony_cistatic inline void init_cea_offsets(void) { } 658c2ecf20Sopenharmony_ci#endif 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci/* Is called from entry code, so must be noinstr */ 688c2ecf20Sopenharmony_cinoinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) 698c2ecf20Sopenharmony_ci{ 708c2ecf20Sopenharmony_ci unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; 718c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci return (struct cpu_entry_area *) va; 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_cpu_entry_area); 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_civoid cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) 788c2ecf20Sopenharmony_ci{ 798c2ecf20Sopenharmony_ci unsigned long va = (unsigned long) cea_vaddr; 808c2ecf20Sopenharmony_ci pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* 838c2ecf20Sopenharmony_ci * The cpu_entry_area is shared between the user and kernel 848c2ecf20Sopenharmony_ci * page tables. All of its ptes can safely be global. 858c2ecf20Sopenharmony_ci * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for 868c2ecf20Sopenharmony_ci * non-present PTEs, so be careful not to set it in that 878c2ecf20Sopenharmony_ci * case to avoid confusion. 888c2ecf20Sopenharmony_ci */ 898c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_PGE) && 908c2ecf20Sopenharmony_ci (pgprot_val(flags) & _PAGE_PRESENT)) 918c2ecf20Sopenharmony_ci pte = pte_set_flags(pte, _PAGE_GLOBAL); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci set_pte_vaddr(va, pte); 948c2ecf20Sopenharmony_ci} 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_cistatic void __init 978c2ecf20Sopenharmony_cicea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) 988c2ecf20Sopenharmony_ci{ 998c2ecf20Sopenharmony_ci for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) 1008c2ecf20Sopenharmony_ci cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_cistatic void __init percpu_setup_debug_store(unsigned int cpu) 1048c2ecf20Sopenharmony_ci{ 1058c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_SUP_INTEL 1068c2ecf20Sopenharmony_ci unsigned int npages; 1078c2ecf20Sopenharmony_ci void *cea; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 1108c2ecf20Sopenharmony_ci return; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_store; 1138c2ecf20Sopenharmony_ci npages = sizeof(struct debug_store) / PAGE_SIZE; 1148c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0); 1158c2ecf20Sopenharmony_ci cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages, 1168c2ecf20Sopenharmony_ci PAGE_KERNEL); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers; 1198c2ecf20Sopenharmony_ci /* 1208c2ecf20Sopenharmony_ci * Force the population of PMDs for not yet allocated per cpu 1218c2ecf20Sopenharmony_ci * memory like debug store buffers. 1228c2ecf20Sopenharmony_ci */ 1238c2ecf20Sopenharmony_ci npages = sizeof(struct debug_store_buffers) / PAGE_SIZE; 1248c2ecf20Sopenharmony_ci for (; npages; npages--, cea += PAGE_SIZE) 1258c2ecf20Sopenharmony_ci cea_set_pte(cea, 0, PAGE_NONE); 1268c2ecf20Sopenharmony_ci#endif 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci#define cea_map_stack(name) do { \ 1328c2ecf20Sopenharmony_ci npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \ 1338c2ecf20Sopenharmony_ci cea_map_percpu_pages(cea->estacks.name## _stack, \ 1348c2ecf20Sopenharmony_ci estacks->name## _stack, npages, PAGE_KERNEL); \ 1358c2ecf20Sopenharmony_ci } while (0) 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_cistatic void __init percpu_setup_exception_stacks(unsigned int cpu) 1388c2ecf20Sopenharmony_ci{ 1398c2ecf20Sopenharmony_ci struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu); 1408c2ecf20Sopenharmony_ci struct cpu_entry_area *cea = get_cpu_entry_area(cpu); 1418c2ecf20Sopenharmony_ci unsigned int npages; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci per_cpu(cea_exception_stacks, cpu) = &cea->estacks; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* 1488c2ecf20Sopenharmony_ci * The exceptions stack mappings in the per cpu area are protected 1498c2ecf20Sopenharmony_ci * by guard pages so each stack must be mapped separately. DB2 is 1508c2ecf20Sopenharmony_ci * not mapped; it just exists to catch triple nesting of #DB. 1518c2ecf20Sopenharmony_ci */ 1528c2ecf20Sopenharmony_ci cea_map_stack(DF); 1538c2ecf20Sopenharmony_ci cea_map_stack(NMI); 1548c2ecf20Sopenharmony_ci cea_map_stack(DB); 1558c2ecf20Sopenharmony_ci cea_map_stack(MCE); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { 1588c2ecf20Sopenharmony_ci if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { 1598c2ecf20Sopenharmony_ci cea_map_stack(VC); 1608c2ecf20Sopenharmony_ci cea_map_stack(VC2); 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci } 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci#else 1658c2ecf20Sopenharmony_cistatic inline void percpu_setup_exception_stacks(unsigned int cpu) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci struct cpu_entry_area *cea = get_cpu_entry_area(cpu); 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci cea_map_percpu_pages(&cea->doublefault_stack, 1708c2ecf20Sopenharmony_ci &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL); 1718c2ecf20Sopenharmony_ci} 1728c2ecf20Sopenharmony_ci#endif 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci/* Setup the fixmap mappings only once per-processor */ 1758c2ecf20Sopenharmony_cistatic void __init setup_cpu_entry_area(unsigned int cpu) 1768c2ecf20Sopenharmony_ci{ 1778c2ecf20Sopenharmony_ci struct cpu_entry_area *cea = get_cpu_entry_area(cpu); 1788c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 1798c2ecf20Sopenharmony_ci /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ 1808c2ecf20Sopenharmony_ci pgprot_t gdt_prot = PAGE_KERNEL_RO; 1818c2ecf20Sopenharmony_ci pgprot_t tss_prot = PAGE_KERNEL_RO; 1828c2ecf20Sopenharmony_ci#else 1838c2ecf20Sopenharmony_ci /* 1848c2ecf20Sopenharmony_ci * On native 32-bit systems, the GDT cannot be read-only because 1858c2ecf20Sopenharmony_ci * our double fault handler uses a task gate, and entering through 1868c2ecf20Sopenharmony_ci * a task gate needs to change an available TSS to busy. If the 1878c2ecf20Sopenharmony_ci * GDT is read-only, that will triple fault. The TSS cannot be 1888c2ecf20Sopenharmony_ci * read-only because the CPU writes to it on task switches. 1898c2ecf20Sopenharmony_ci * 1908c2ecf20Sopenharmony_ci * On Xen PV, the GDT must be read-only because the hypervisor 1918c2ecf20Sopenharmony_ci * requires it. 1928c2ecf20Sopenharmony_ci */ 1938c2ecf20Sopenharmony_ci pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? 1948c2ecf20Sopenharmony_ci PAGE_KERNEL_RO : PAGE_KERNEL; 1958c2ecf20Sopenharmony_ci pgprot_t tss_prot = PAGE_KERNEL; 1968c2ecf20Sopenharmony_ci#endif 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, 1998c2ecf20Sopenharmony_ci early_cpu_to_node(cpu)); 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci cea_map_percpu_pages(&cea->entry_stack_page, 2048c2ecf20Sopenharmony_ci per_cpu_ptr(&entry_stack_storage, cpu), 1, 2058c2ecf20Sopenharmony_ci PAGE_KERNEL); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci /* 2088c2ecf20Sopenharmony_ci * The Intel SDM says (Volume 3, 7.2.1): 2098c2ecf20Sopenharmony_ci * 2108c2ecf20Sopenharmony_ci * Avoid placing a page boundary in the part of the TSS that the 2118c2ecf20Sopenharmony_ci * processor reads during a task switch (the first 104 bytes). The 2128c2ecf20Sopenharmony_ci * processor may not correctly perform address translations if a 2138c2ecf20Sopenharmony_ci * boundary occurs in this area. During a task switch, the processor 2148c2ecf20Sopenharmony_ci * reads and writes into the first 104 bytes of each TSS (using 2158c2ecf20Sopenharmony_ci * contiguous physical addresses beginning with the physical address 2168c2ecf20Sopenharmony_ci * of the first byte of the TSS). So, after TSS access begins, if 2178c2ecf20Sopenharmony_ci * part of the 104 bytes is not physically contiguous, the processor 2188c2ecf20Sopenharmony_ci * will access incorrect information without generating a page-fault 2198c2ecf20Sopenharmony_ci * exception. 2208c2ecf20Sopenharmony_ci * 2218c2ecf20Sopenharmony_ci * There are also a lot of errata involving the TSS spanning a page 2228c2ecf20Sopenharmony_ci * boundary. Assert that we're not doing that. 2238c2ecf20Sopenharmony_ci */ 2248c2ecf20Sopenharmony_ci BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ 2258c2ecf20Sopenharmony_ci offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); 2268c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); 2278c2ecf20Sopenharmony_ci /* 2288c2ecf20Sopenharmony_ci * VMX changes the host TR limit to 0x67 after a VM exit. This is 2298c2ecf20Sopenharmony_ci * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure 2308c2ecf20Sopenharmony_ci * that this is correct. 2318c2ecf20Sopenharmony_ci */ 2328c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0); 2338c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68); 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), 2368c2ecf20Sopenharmony_ci sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_32 2398c2ecf20Sopenharmony_ci per_cpu(cpu_entry_area, cpu) = cea; 2408c2ecf20Sopenharmony_ci#endif 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci percpu_setup_exception_stacks(cpu); 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci percpu_setup_debug_store(cpu); 2458c2ecf20Sopenharmony_ci} 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_cistatic __init void setup_cpu_entry_area_ptes(void) 2488c2ecf20Sopenharmony_ci{ 2498c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_32 2508c2ecf20Sopenharmony_ci unsigned long start, end; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci /* The +1 is for the readonly IDT: */ 2538c2ecf20Sopenharmony_ci BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); 2548c2ecf20Sopenharmony_ci BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci start = CPU_ENTRY_AREA_BASE; 2578c2ecf20Sopenharmony_ci end = start + CPU_ENTRY_AREA_MAP_SIZE; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci /* Careful here: start + PMD_SIZE might wrap around */ 2608c2ecf20Sopenharmony_ci for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) 2618c2ecf20Sopenharmony_ci populate_extra_pte(start); 2628c2ecf20Sopenharmony_ci#endif 2638c2ecf20Sopenharmony_ci} 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_civoid __init setup_cpu_entry_areas(void) 2668c2ecf20Sopenharmony_ci{ 2678c2ecf20Sopenharmony_ci unsigned int cpu; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci init_cea_offsets(); 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci setup_cpu_entry_area_ptes(); 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 2748c2ecf20Sopenharmony_ci setup_cpu_entry_area(cpu); 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* 2778c2ecf20Sopenharmony_ci * This is the last essential update to swapper_pgdir which needs 2788c2ecf20Sopenharmony_ci * to be synchronized to initial_page_table on 32bit. 2798c2ecf20Sopenharmony_ci */ 2808c2ecf20Sopenharmony_ci sync_initial_page_table(); 2818c2ecf20Sopenharmony_ci} 282