162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/spinlock.h> 462306a36Sopenharmony_ci#include <linux/percpu.h> 562306a36Sopenharmony_ci#include <linux/kallsyms.h> 662306a36Sopenharmony_ci#include <linux/kcore.h> 762306a36Sopenharmony_ci#include <linux/pgtable.h> 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <asm/cpu_entry_area.h> 1062306a36Sopenharmony_ci#include <asm/fixmap.h> 1162306a36Sopenharmony_ci#include <asm/desc.h> 1262306a36Sopenharmony_ci#include <asm/kasan.h> 1362306a36Sopenharmony_ci#include <asm/setup.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_cistatic DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#ifdef CONFIG_X86_64 1862306a36Sopenharmony_cistatic DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); 1962306a36Sopenharmony_ciDEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistatic DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cistatic __always_inline unsigned int cea_offset(unsigned int cpu) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci return per_cpu(_cea_offset, cpu); 2662306a36Sopenharmony_ci} 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_cistatic __init void init_cea_offsets(void) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci unsigned int max_cea; 3162306a36Sopenharmony_ci unsigned int i, j; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci if (!kaslr_enabled()) { 3462306a36Sopenharmony_ci for_each_possible_cpu(i) 3562306a36Sopenharmony_ci per_cpu(_cea_offset, i) = i; 3662306a36Sopenharmony_ci return; 3762306a36Sopenharmony_ci } 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci /* O(sodding terrible) */ 4262306a36Sopenharmony_ci for_each_possible_cpu(i) { 4362306a36Sopenharmony_ci unsigned int cea; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ciagain: 4662306a36Sopenharmony_ci cea = get_random_u32_below(max_cea); 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci for_each_possible_cpu(j) { 4962306a36Sopenharmony_ci if (cea_offset(j) == cea) 5062306a36Sopenharmony_ci goto again; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci if (i == j) 5362306a36Sopenharmony_ci break; 5462306a36Sopenharmony_ci } 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci per_cpu(_cea_offset, i) = cea; 5762306a36Sopenharmony_ci } 5862306a36Sopenharmony_ci} 5962306a36Sopenharmony_ci#else /* !X86_64 */ 6062306a36Sopenharmony_ciDECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic __always_inline unsigned int cea_offset(unsigned int cpu) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci return cpu; 6562306a36Sopenharmony_ci} 6662306a36Sopenharmony_cistatic inline void init_cea_offsets(void) { } 6762306a36Sopenharmony_ci#endif 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci/* Is called from entry code, so must be noinstr */ 7062306a36Sopenharmony_cinoinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; 7362306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci return (struct cpu_entry_area *) va; 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ciEXPORT_SYMBOL(get_cpu_entry_area); 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_civoid cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci unsigned long va = (unsigned long) cea_vaddr; 8262306a36Sopenharmony_ci pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags); 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci /* 8562306a36Sopenharmony_ci * The cpu_entry_area is shared between the user and kernel 8662306a36Sopenharmony_ci * page tables. All of its ptes can safely be global. 8762306a36Sopenharmony_ci * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for 8862306a36Sopenharmony_ci * non-present PTEs, so be careful not to set it in that 8962306a36Sopenharmony_ci * case to avoid confusion. 9062306a36Sopenharmony_ci */ 9162306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_PGE) && 9262306a36Sopenharmony_ci (pgprot_val(flags) & _PAGE_PRESENT)) 9362306a36Sopenharmony_ci pte = pte_set_flags(pte, _PAGE_GLOBAL); 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci set_pte_vaddr(va, pte); 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic void __init 9962306a36Sopenharmony_cicea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) 10262306a36Sopenharmony_ci cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); 10362306a36Sopenharmony_ci} 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_cistatic void __init percpu_setup_debug_store(unsigned int cpu) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci#ifdef CONFIG_CPU_SUP_INTEL 10862306a36Sopenharmony_ci unsigned int npages; 10962306a36Sopenharmony_ci void *cea; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 11262306a36Sopenharmony_ci return; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_store; 11562306a36Sopenharmony_ci npages = sizeof(struct debug_store) / PAGE_SIZE; 11662306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0); 11762306a36Sopenharmony_ci cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages, 11862306a36Sopenharmony_ci PAGE_KERNEL); 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers; 12162306a36Sopenharmony_ci /* 12262306a36Sopenharmony_ci * Force the population of PMDs for not yet allocated per cpu 12362306a36Sopenharmony_ci * memory like debug store buffers. 12462306a36Sopenharmony_ci */ 12562306a36Sopenharmony_ci npages = sizeof(struct debug_store_buffers) / PAGE_SIZE; 12662306a36Sopenharmony_ci for (; npages; npages--, cea += PAGE_SIZE) 12762306a36Sopenharmony_ci cea_set_pte(cea, 0, PAGE_NONE); 12862306a36Sopenharmony_ci#endif 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci#ifdef CONFIG_X86_64 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#define cea_map_stack(name) do { \ 13462306a36Sopenharmony_ci npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \ 13562306a36Sopenharmony_ci cea_map_percpu_pages(cea->estacks.name## _stack, \ 13662306a36Sopenharmony_ci estacks->name## _stack, npages, PAGE_KERNEL); \ 13762306a36Sopenharmony_ci } while (0) 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cistatic void __init percpu_setup_exception_stacks(unsigned int cpu) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu); 14262306a36Sopenharmony_ci struct cpu_entry_area *cea = get_cpu_entry_area(cpu); 14362306a36Sopenharmony_ci unsigned int npages; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci per_cpu(cea_exception_stacks, cpu) = &cea->estacks; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci /* 15062306a36Sopenharmony_ci * The exceptions stack mappings in the per cpu area are protected 15162306a36Sopenharmony_ci * by guard pages so each stack must be mapped separately. DB2 is 15262306a36Sopenharmony_ci * not mapped; it just exists to catch triple nesting of #DB. 15362306a36Sopenharmony_ci */ 15462306a36Sopenharmony_ci cea_map_stack(DF); 15562306a36Sopenharmony_ci cea_map_stack(NMI); 15662306a36Sopenharmony_ci cea_map_stack(DB); 15762306a36Sopenharmony_ci cea_map_stack(MCE); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { 16062306a36Sopenharmony_ci if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { 16162306a36Sopenharmony_ci cea_map_stack(VC); 16262306a36Sopenharmony_ci cea_map_stack(VC2); 16362306a36Sopenharmony_ci } 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci#else 16762306a36Sopenharmony_cistatic inline void percpu_setup_exception_stacks(unsigned int cpu) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci struct cpu_entry_area *cea = get_cpu_entry_area(cpu); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci cea_map_percpu_pages(&cea->doublefault_stack, 17262306a36Sopenharmony_ci &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci#endif 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci/* Setup the fixmap mappings only once per-processor */ 17762306a36Sopenharmony_cistatic void __init setup_cpu_entry_area(unsigned int cpu) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci struct cpu_entry_area *cea = get_cpu_entry_area(cpu); 18062306a36Sopenharmony_ci#ifdef CONFIG_X86_64 18162306a36Sopenharmony_ci /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ 18262306a36Sopenharmony_ci pgprot_t gdt_prot = PAGE_KERNEL_RO; 18362306a36Sopenharmony_ci pgprot_t tss_prot = PAGE_KERNEL_RO; 18462306a36Sopenharmony_ci#else 18562306a36Sopenharmony_ci /* 18662306a36Sopenharmony_ci * On 32-bit systems, the GDT cannot be read-only because 18762306a36Sopenharmony_ci * our double fault handler uses a task gate, and entering through 18862306a36Sopenharmony_ci * a task gate needs to change an available TSS to busy. If the 18962306a36Sopenharmony_ci * GDT is read-only, that will triple fault. The TSS cannot be 19062306a36Sopenharmony_ci * read-only because the CPU writes to it on task switches. 19162306a36Sopenharmony_ci */ 19262306a36Sopenharmony_ci pgprot_t gdt_prot = PAGE_KERNEL; 19362306a36Sopenharmony_ci pgprot_t tss_prot = PAGE_KERNEL; 19462306a36Sopenharmony_ci#endif 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, 19762306a36Sopenharmony_ci early_cpu_to_node(cpu)); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci cea_map_percpu_pages(&cea->entry_stack_page, 20262306a36Sopenharmony_ci per_cpu_ptr(&entry_stack_storage, cpu), 1, 20362306a36Sopenharmony_ci PAGE_KERNEL); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* 20662306a36Sopenharmony_ci * The Intel SDM says (Volume 3, 7.2.1): 20762306a36Sopenharmony_ci * 20862306a36Sopenharmony_ci * Avoid placing a page boundary in the part of the TSS that the 20962306a36Sopenharmony_ci * processor reads during a task switch (the first 104 bytes). The 21062306a36Sopenharmony_ci * processor may not correctly perform address translations if a 21162306a36Sopenharmony_ci * boundary occurs in this area. During a task switch, the processor 21262306a36Sopenharmony_ci * reads and writes into the first 104 bytes of each TSS (using 21362306a36Sopenharmony_ci * contiguous physical addresses beginning with the physical address 21462306a36Sopenharmony_ci * of the first byte of the TSS). So, after TSS access begins, if 21562306a36Sopenharmony_ci * part of the 104 bytes is not physically contiguous, the processor 21662306a36Sopenharmony_ci * will access incorrect information without generating a page-fault 21762306a36Sopenharmony_ci * exception. 21862306a36Sopenharmony_ci * 21962306a36Sopenharmony_ci * There are also a lot of errata involving the TSS spanning a page 22062306a36Sopenharmony_ci * boundary. Assert that we're not doing that. 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_ci BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ 22362306a36Sopenharmony_ci offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); 22462306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); 22562306a36Sopenharmony_ci /* 22662306a36Sopenharmony_ci * VMX changes the host TR limit to 0x67 after a VM exit. This is 22762306a36Sopenharmony_ci * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure 22862306a36Sopenharmony_ci * that this is correct. 22962306a36Sopenharmony_ci */ 23062306a36Sopenharmony_ci BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0); 23162306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), 23462306a36Sopenharmony_ci sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci#ifdef CONFIG_X86_32 23762306a36Sopenharmony_ci per_cpu(cpu_entry_area, cpu) = cea; 23862306a36Sopenharmony_ci#endif 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci percpu_setup_exception_stacks(cpu); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci percpu_setup_debug_store(cpu); 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_cistatic __init void setup_cpu_entry_area_ptes(void) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci#ifdef CONFIG_X86_32 24862306a36Sopenharmony_ci unsigned long start, end; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci /* The +1 is for the readonly IDT: */ 25162306a36Sopenharmony_ci BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); 25262306a36Sopenharmony_ci BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci start = CPU_ENTRY_AREA_BASE; 25562306a36Sopenharmony_ci end = start + CPU_ENTRY_AREA_MAP_SIZE; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci /* Careful here: start + PMD_SIZE might wrap around */ 25862306a36Sopenharmony_ci for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) 25962306a36Sopenharmony_ci populate_extra_pte(start); 26062306a36Sopenharmony_ci#endif 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_civoid __init setup_cpu_entry_areas(void) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci unsigned int cpu; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci init_cea_offsets(); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci setup_cpu_entry_area_ptes(); 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci for_each_possible_cpu(cpu) 27262306a36Sopenharmony_ci setup_cpu_entry_area(cpu); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci /* 27562306a36Sopenharmony_ci * This is the last essential update to swapper_pgdir which needs 27662306a36Sopenharmony_ci * to be synchronized to initial_page_table on 32bit. 27762306a36Sopenharmony_ci */ 27862306a36Sopenharmony_ci sync_initial_page_table(); 27962306a36Sopenharmony_ci} 280