162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <linux/spinlock.h>
462306a36Sopenharmony_ci#include <linux/percpu.h>
562306a36Sopenharmony_ci#include <linux/kallsyms.h>
662306a36Sopenharmony_ci#include <linux/kcore.h>
762306a36Sopenharmony_ci#include <linux/pgtable.h>
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <asm/cpu_entry_area.h>
1062306a36Sopenharmony_ci#include <asm/fixmap.h>
1162306a36Sopenharmony_ci#include <asm/desc.h>
1262306a36Sopenharmony_ci#include <asm/kasan.h>
1362306a36Sopenharmony_ci#include <asm/setup.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_cistatic DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#ifdef CONFIG_X86_64
1862306a36Sopenharmony_cistatic DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
1962306a36Sopenharmony_ciDEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_cistatic DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_cistatic __always_inline unsigned int cea_offset(unsigned int cpu)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	return per_cpu(_cea_offset, cpu);
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cistatic __init void init_cea_offsets(void)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	unsigned int max_cea;
3162306a36Sopenharmony_ci	unsigned int i, j;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	if (!kaslr_enabled()) {
3462306a36Sopenharmony_ci		for_each_possible_cpu(i)
3562306a36Sopenharmony_ci			per_cpu(_cea_offset, i) = i;
3662306a36Sopenharmony_ci		return;
3762306a36Sopenharmony_ci	}
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	/* O(sodding terrible) */
4262306a36Sopenharmony_ci	for_each_possible_cpu(i) {
4362306a36Sopenharmony_ci		unsigned int cea;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ciagain:
4662306a36Sopenharmony_ci		cea = get_random_u32_below(max_cea);
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci		for_each_possible_cpu(j) {
4962306a36Sopenharmony_ci			if (cea_offset(j) == cea)
5062306a36Sopenharmony_ci				goto again;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci			if (i == j)
5362306a36Sopenharmony_ci				break;
5462306a36Sopenharmony_ci		}
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci		per_cpu(_cea_offset, i) = cea;
5762306a36Sopenharmony_ci	}
5862306a36Sopenharmony_ci}
5962306a36Sopenharmony_ci#else /* !X86_64 */
6062306a36Sopenharmony_ciDECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistatic __always_inline unsigned int cea_offset(unsigned int cpu)
6362306a36Sopenharmony_ci{
6462306a36Sopenharmony_ci	return cpu;
6562306a36Sopenharmony_ci}
6662306a36Sopenharmony_cistatic inline void init_cea_offsets(void) { }
6762306a36Sopenharmony_ci#endif
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/* Is called from entry code, so must be noinstr */
7062306a36Sopenharmony_cinoinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
7362306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	return (struct cpu_entry_area *) va;
7662306a36Sopenharmony_ci}
7762306a36Sopenharmony_ciEXPORT_SYMBOL(get_cpu_entry_area);
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_civoid cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	unsigned long va = (unsigned long) cea_vaddr;
8262306a36Sopenharmony_ci	pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	/*
8562306a36Sopenharmony_ci	 * The cpu_entry_area is shared between the user and kernel
8662306a36Sopenharmony_ci	 * page tables.  All of its ptes can safely be global.
8762306a36Sopenharmony_ci	 * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
8862306a36Sopenharmony_ci	 * non-present PTEs, so be careful not to set it in that
8962306a36Sopenharmony_ci	 * case to avoid confusion.
9062306a36Sopenharmony_ci	 */
9162306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_PGE) &&
9262306a36Sopenharmony_ci	    (pgprot_val(flags) & _PAGE_PRESENT))
9362306a36Sopenharmony_ci		pte = pte_set_flags(pte, _PAGE_GLOBAL);
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	set_pte_vaddr(va, pte);
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistatic void __init
9962306a36Sopenharmony_cicea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
10062306a36Sopenharmony_ci{
10162306a36Sopenharmony_ci	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
10262306a36Sopenharmony_ci		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_cistatic void __init percpu_setup_debug_store(unsigned int cpu)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci#ifdef CONFIG_CPU_SUP_INTEL
10862306a36Sopenharmony_ci	unsigned int npages;
10962306a36Sopenharmony_ci	void *cea;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
11262306a36Sopenharmony_ci		return;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
11562306a36Sopenharmony_ci	npages = sizeof(struct debug_store) / PAGE_SIZE;
11662306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
11762306a36Sopenharmony_ci	cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
11862306a36Sopenharmony_ci			     PAGE_KERNEL);
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
12162306a36Sopenharmony_ci	/*
12262306a36Sopenharmony_ci	 * Force the population of PMDs for not yet allocated per cpu
12362306a36Sopenharmony_ci	 * memory like debug store buffers.
12462306a36Sopenharmony_ci	 */
12562306a36Sopenharmony_ci	npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
12662306a36Sopenharmony_ci	for (; npages; npages--, cea += PAGE_SIZE)
12762306a36Sopenharmony_ci		cea_set_pte(cea, 0, PAGE_NONE);
12862306a36Sopenharmony_ci#endif
12962306a36Sopenharmony_ci}
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci#ifdef CONFIG_X86_64
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci#define cea_map_stack(name) do {					\
13462306a36Sopenharmony_ci	npages = sizeof(estacks->name## _stack) / PAGE_SIZE;		\
13562306a36Sopenharmony_ci	cea_map_percpu_pages(cea->estacks.name## _stack,		\
13662306a36Sopenharmony_ci			estacks->name## _stack, npages, PAGE_KERNEL);	\
13762306a36Sopenharmony_ci	} while (0)
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cistatic void __init percpu_setup_exception_stacks(unsigned int cpu)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
14262306a36Sopenharmony_ci	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
14362306a36Sopenharmony_ci	unsigned int npages;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	/*
15062306a36Sopenharmony_ci	 * The exceptions stack mappings in the per cpu area are protected
15162306a36Sopenharmony_ci	 * by guard pages so each stack must be mapped separately. DB2 is
15262306a36Sopenharmony_ci	 * not mapped; it just exists to catch triple nesting of #DB.
15362306a36Sopenharmony_ci	 */
15462306a36Sopenharmony_ci	cea_map_stack(DF);
15562306a36Sopenharmony_ci	cea_map_stack(NMI);
15662306a36Sopenharmony_ci	cea_map_stack(DB);
15762306a36Sopenharmony_ci	cea_map_stack(MCE);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
16062306a36Sopenharmony_ci		if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
16162306a36Sopenharmony_ci			cea_map_stack(VC);
16262306a36Sopenharmony_ci			cea_map_stack(VC2);
16362306a36Sopenharmony_ci		}
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci}
16662306a36Sopenharmony_ci#else
16762306a36Sopenharmony_cistatic inline void percpu_setup_exception_stacks(unsigned int cpu)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	cea_map_percpu_pages(&cea->doublefault_stack,
17262306a36Sopenharmony_ci			     &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci#endif
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci/* Setup the fixmap mappings only once per-processor */
17762306a36Sopenharmony_cistatic void __init setup_cpu_entry_area(unsigned int cpu)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
18062306a36Sopenharmony_ci#ifdef CONFIG_X86_64
18162306a36Sopenharmony_ci	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
18262306a36Sopenharmony_ci	pgprot_t gdt_prot = PAGE_KERNEL_RO;
18362306a36Sopenharmony_ci	pgprot_t tss_prot = PAGE_KERNEL_RO;
18462306a36Sopenharmony_ci#else
18562306a36Sopenharmony_ci	/*
18662306a36Sopenharmony_ci	 * On 32-bit systems, the GDT cannot be read-only because
18762306a36Sopenharmony_ci	 * our double fault handler uses a task gate, and entering through
18862306a36Sopenharmony_ci	 * a task gate needs to change an available TSS to busy.  If the
18962306a36Sopenharmony_ci	 * GDT is read-only, that will triple fault.  The TSS cannot be
19062306a36Sopenharmony_ci	 * read-only because the CPU writes to it on task switches.
19162306a36Sopenharmony_ci	 */
19262306a36Sopenharmony_ci	pgprot_t gdt_prot = PAGE_KERNEL;
19362306a36Sopenharmony_ci	pgprot_t tss_prot = PAGE_KERNEL;
19462306a36Sopenharmony_ci#endif
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE,
19762306a36Sopenharmony_ci					early_cpu_to_node(cpu));
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	cea_map_percpu_pages(&cea->entry_stack_page,
20262306a36Sopenharmony_ci			     per_cpu_ptr(&entry_stack_storage, cpu), 1,
20362306a36Sopenharmony_ci			     PAGE_KERNEL);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	/*
20662306a36Sopenharmony_ci	 * The Intel SDM says (Volume 3, 7.2.1):
20762306a36Sopenharmony_ci	 *
20862306a36Sopenharmony_ci	 *  Avoid placing a page boundary in the part of the TSS that the
20962306a36Sopenharmony_ci	 *  processor reads during a task switch (the first 104 bytes). The
21062306a36Sopenharmony_ci	 *  processor may not correctly perform address translations if a
21162306a36Sopenharmony_ci	 *  boundary occurs in this area. During a task switch, the processor
21262306a36Sopenharmony_ci	 *  reads and writes into the first 104 bytes of each TSS (using
21362306a36Sopenharmony_ci	 *  contiguous physical addresses beginning with the physical address
21462306a36Sopenharmony_ci	 *  of the first byte of the TSS). So, after TSS access begins, if
21562306a36Sopenharmony_ci	 *  part of the 104 bytes is not physically contiguous, the processor
21662306a36Sopenharmony_ci	 *  will access incorrect information without generating a page-fault
21762306a36Sopenharmony_ci	 *  exception.
21862306a36Sopenharmony_ci	 *
21962306a36Sopenharmony_ci	 * There are also a lot of errata involving the TSS spanning a page
22062306a36Sopenharmony_ci	 * boundary.  Assert that we're not doing that.
22162306a36Sopenharmony_ci	 */
22262306a36Sopenharmony_ci	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
22362306a36Sopenharmony_ci		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
22462306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
22562306a36Sopenharmony_ci	/*
22662306a36Sopenharmony_ci	 * VMX changes the host TR limit to 0x67 after a VM exit. This is
22762306a36Sopenharmony_ci	 * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure
22862306a36Sopenharmony_ci	 * that this is correct.
22962306a36Sopenharmony_ci	 */
23062306a36Sopenharmony_ci	BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0);
23162306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
23462306a36Sopenharmony_ci			     sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci#ifdef CONFIG_X86_32
23762306a36Sopenharmony_ci	per_cpu(cpu_entry_area, cpu) = cea;
23862306a36Sopenharmony_ci#endif
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	percpu_setup_exception_stacks(cpu);
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	percpu_setup_debug_store(cpu);
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_cistatic __init void setup_cpu_entry_area_ptes(void)
24662306a36Sopenharmony_ci{
24762306a36Sopenharmony_ci#ifdef CONFIG_X86_32
24862306a36Sopenharmony_ci	unsigned long start, end;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	/* The +1 is for the readonly IDT: */
25162306a36Sopenharmony_ci	BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
25262306a36Sopenharmony_ci	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	start = CPU_ENTRY_AREA_BASE;
25562306a36Sopenharmony_ci	end = start + CPU_ENTRY_AREA_MAP_SIZE;
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	/* Careful here: start + PMD_SIZE might wrap around */
25862306a36Sopenharmony_ci	for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
25962306a36Sopenharmony_ci		populate_extra_pte(start);
26062306a36Sopenharmony_ci#endif
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_civoid __init setup_cpu_entry_areas(void)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	unsigned int cpu;
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	init_cea_offsets();
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	setup_cpu_entry_area_ptes();
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
27262306a36Sopenharmony_ci		setup_cpu_entry_area(cpu);
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	/*
27562306a36Sopenharmony_ci	 * This is the last essential update to swapper_pgdir which needs
27662306a36Sopenharmony_ci	 * to be synchronized to initial_page_table on 32bit.
27762306a36Sopenharmony_ci	 */
27862306a36Sopenharmony_ci	sync_initial_page_table();
27962306a36Sopenharmony_ci}
280