1// SPDX-License-Identifier: GPL-2.0
2#define DISABLE_BRANCH_PROFILING
3#define pr_fmt(fmt) "kasan: " fmt
4
5/* cpu_feature_enabled() cannot be used this early */
6#define USE_EARLY_PGTABLE_L5
7
8#include <linux/memblock.h>
9#include <linux/kasan.h>
10#include <linux/kdebug.h>
11#include <linux/mm.h>
12#include <linux/sched.h>
13#include <linux/sched/task.h>
14#include <linux/vmalloc.h>
15
16#include <asm/e820/types.h>
17#include <asm/pgalloc.h>
18#include <asm/tlbflush.h>
19#include <asm/sections.h>
20#include <asm/cpu_entry_area.h>
21
22extern struct range pfn_mapped[E820_MAX_ENTRIES];
23
24static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
25
26static __init void *early_alloc(size_t size, int nid, bool should_panic)
27{
28	void *ptr = memblock_alloc_try_nid(size, size,
29			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
30
31	if (!ptr && should_panic)
32		panic("%pS: Failed to allocate page, nid=%d from=%lx\n",
33		      (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS));
34
35	return ptr;
36}
37
38static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
39				      unsigned long end, int nid)
40{
41	pte_t *pte;
42
43	if (pmd_none(*pmd)) {
44		void *p;
45
46		if (boot_cpu_has(X86_FEATURE_PSE) &&
47		    ((end - addr) == PMD_SIZE) &&
48		    IS_ALIGNED(addr, PMD_SIZE)) {
49			p = early_alloc(PMD_SIZE, nid, false);
50			if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
51				return;
52			else if (p)
53				memblock_free(__pa(p), PMD_SIZE);
54		}
55
56		p = early_alloc(PAGE_SIZE, nid, true);
57		pmd_populate_kernel(&init_mm, pmd, p);
58	}
59
60	pte = pte_offset_kernel(pmd, addr);
61	do {
62		pte_t entry;
63		void *p;
64
65		if (!pte_none(*pte))
66			continue;
67
68		p = early_alloc(PAGE_SIZE, nid, true);
69		entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
70		set_pte_at(&init_mm, addr, pte, entry);
71	} while (pte++, addr += PAGE_SIZE, addr != end);
72}
73
74static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
75				      unsigned long end, int nid)
76{
77	pmd_t *pmd;
78	unsigned long next;
79
80	if (pud_none(*pud)) {
81		void *p;
82
83		if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
84		    ((end - addr) == PUD_SIZE) &&
85		    IS_ALIGNED(addr, PUD_SIZE)) {
86			p = early_alloc(PUD_SIZE, nid, false);
87			if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
88				return;
89			else if (p)
90				memblock_free(__pa(p), PUD_SIZE);
91		}
92
93		p = early_alloc(PAGE_SIZE, nid, true);
94		pud_populate(&init_mm, pud, p);
95	}
96
97	pmd = pmd_offset(pud, addr);
98	do {
99		next = pmd_addr_end(addr, end);
100		if (!pmd_large(*pmd))
101			kasan_populate_pmd(pmd, addr, next, nid);
102	} while (pmd++, addr = next, addr != end);
103}
104
105static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
106				      unsigned long end, int nid)
107{
108	pud_t *pud;
109	unsigned long next;
110
111	if (p4d_none(*p4d)) {
112		void *p = early_alloc(PAGE_SIZE, nid, true);
113
114		p4d_populate(&init_mm, p4d, p);
115	}
116
117	pud = pud_offset(p4d, addr);
118	do {
119		next = pud_addr_end(addr, end);
120		if (!pud_large(*pud))
121			kasan_populate_pud(pud, addr, next, nid);
122	} while (pud++, addr = next, addr != end);
123}
124
125static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
126				      unsigned long end, int nid)
127{
128	void *p;
129	p4d_t *p4d;
130	unsigned long next;
131
132	if (pgd_none(*pgd)) {
133		p = early_alloc(PAGE_SIZE, nid, true);
134		pgd_populate(&init_mm, pgd, p);
135	}
136
137	p4d = p4d_offset(pgd, addr);
138	do {
139		next = p4d_addr_end(addr, end);
140		kasan_populate_p4d(p4d, addr, next, nid);
141	} while (p4d++, addr = next, addr != end);
142}
143
144static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
145					 int nid)
146{
147	pgd_t *pgd;
148	unsigned long next;
149
150	addr = addr & PAGE_MASK;
151	end = round_up(end, PAGE_SIZE);
152	pgd = pgd_offset_k(addr);
153	do {
154		next = pgd_addr_end(addr, end);
155		kasan_populate_pgd(pgd, addr, next, nid);
156	} while (pgd++, addr = next, addr != end);
157}
158
159static void __init map_range(struct range *range)
160{
161	unsigned long start;
162	unsigned long end;
163
164	start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
165	end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
166
167	kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
168}
169
170static void __init clear_pgds(unsigned long start,
171			unsigned long end)
172{
173	pgd_t *pgd;
174	/* See comment in kasan_init() */
175	unsigned long pgd_end = end & PGDIR_MASK;
176
177	for (; start < pgd_end; start += PGDIR_SIZE) {
178		pgd = pgd_offset_k(start);
179		/*
180		 * With folded p4d, pgd_clear() is nop, use p4d_clear()
181		 * instead.
182		 */
183		if (pgtable_l5_enabled())
184			pgd_clear(pgd);
185		else
186			p4d_clear(p4d_offset(pgd, start));
187	}
188
189	pgd = pgd_offset_k(start);
190	for (; start < end; start += P4D_SIZE)
191		p4d_clear(p4d_offset(pgd, start));
192}
193
194static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
195{
196	unsigned long p4d;
197
198	if (!pgtable_l5_enabled())
199		return (p4d_t *)pgd;
200
201	p4d = pgd_val(*pgd) & PTE_PFN_MASK;
202	p4d += __START_KERNEL_map - phys_base;
203	return (p4d_t *)p4d + p4d_index(addr);
204}
205
206static void __init kasan_early_p4d_populate(pgd_t *pgd,
207		unsigned long addr,
208		unsigned long end)
209{
210	pgd_t pgd_entry;
211	p4d_t *p4d, p4d_entry;
212	unsigned long next;
213
214	if (pgd_none(*pgd)) {
215		pgd_entry = __pgd(_KERNPG_TABLE |
216					__pa_nodebug(kasan_early_shadow_p4d));
217		set_pgd(pgd, pgd_entry);
218	}
219
220	p4d = early_p4d_offset(pgd, addr);
221	do {
222		next = p4d_addr_end(addr, end);
223
224		if (!p4d_none(*p4d))
225			continue;
226
227		p4d_entry = __p4d(_KERNPG_TABLE |
228					__pa_nodebug(kasan_early_shadow_pud));
229		set_p4d(p4d, p4d_entry);
230	} while (p4d++, addr = next, addr != end && p4d_none(*p4d));
231}
232
233static void __init kasan_map_early_shadow(pgd_t *pgd)
234{
235	/* See comment in kasan_init() */
236	unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
237	unsigned long end = KASAN_SHADOW_END;
238	unsigned long next;
239
240	pgd += pgd_index(addr);
241	do {
242		next = pgd_addr_end(addr, end);
243		kasan_early_p4d_populate(pgd, addr, next);
244	} while (pgd++, addr = next, addr != end);
245}
246
247static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,
248					       unsigned long addr,
249					       unsigned long end)
250{
251	p4d_t *p4d;
252	unsigned long next;
253	void *p;
254
255	p4d = p4d_offset(pgd, addr);
256	do {
257		next = p4d_addr_end(addr, end);
258
259		if (p4d_none(*p4d)) {
260			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
261			p4d_populate(&init_mm, p4d, p);
262		}
263	} while (p4d++, addr = next, addr != end);
264}
265
266static void __init kasan_shallow_populate_pgds(void *start, void *end)
267{
268	unsigned long addr, next;
269	pgd_t *pgd;
270	void *p;
271
272	addr = (unsigned long)start;
273	pgd = pgd_offset_k(addr);
274	do {
275		next = pgd_addr_end(addr, (unsigned long)end);
276
277		if (pgd_none(*pgd)) {
278			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
279			pgd_populate(&init_mm, pgd, p);
280		}
281
282		/*
283		 * we need to populate p4ds to be synced when running in
284		 * four level mode - see sync_global_pgds_l4()
285		 */
286		kasan_shallow_populate_p4ds(pgd, addr, next);
287	} while (pgd++, addr = next, addr != (unsigned long)end);
288}
289
290void __init kasan_early_init(void)
291{
292	int i;
293	pteval_t pte_val = __pa_nodebug(kasan_early_shadow_page) |
294				__PAGE_KERNEL | _PAGE_ENC;
295	pmdval_t pmd_val = __pa_nodebug(kasan_early_shadow_pte) | _KERNPG_TABLE;
296	pudval_t pud_val = __pa_nodebug(kasan_early_shadow_pmd) | _KERNPG_TABLE;
297	p4dval_t p4d_val = __pa_nodebug(kasan_early_shadow_pud) | _KERNPG_TABLE;
298
299	/* Mask out unsupported __PAGE_KERNEL bits: */
300	pte_val &= __default_kernel_pte_mask;
301	pmd_val &= __default_kernel_pte_mask;
302	pud_val &= __default_kernel_pte_mask;
303	p4d_val &= __default_kernel_pte_mask;
304
305	for (i = 0; i < PTRS_PER_PTE; i++)
306		kasan_early_shadow_pte[i] = __pte(pte_val);
307
308	for (i = 0; i < PTRS_PER_PMD; i++)
309		kasan_early_shadow_pmd[i] = __pmd(pmd_val);
310
311	for (i = 0; i < PTRS_PER_PUD; i++)
312		kasan_early_shadow_pud[i] = __pud(pud_val);
313
314	for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
315		kasan_early_shadow_p4d[i] = __p4d(p4d_val);
316
317	kasan_map_early_shadow(early_top_pgt);
318	kasan_map_early_shadow(init_top_pgt);
319}
320
321static unsigned long kasan_mem_to_shadow_align_down(unsigned long va)
322{
323	unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
324
325	return round_down(shadow, PAGE_SIZE);
326}
327
328static unsigned long kasan_mem_to_shadow_align_up(unsigned long va)
329{
330	unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
331
332	return round_up(shadow, PAGE_SIZE);
333}
334
335void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
336{
337	unsigned long shadow_start, shadow_end;
338
339	shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va);
340	shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size);
341	kasan_populate_shadow(shadow_start, shadow_end, nid);
342}
343
344void __init kasan_init(void)
345{
346	unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end;
347	int i;
348
349	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
350
351	/*
352	 * We use the same shadow offset for 4- and 5-level paging to
353	 * facilitate boot-time switching between paging modes.
354	 * As result in 5-level paging mode KASAN_SHADOW_START and
355	 * KASAN_SHADOW_END are not aligned to PGD boundary.
356	 *
357	 * KASAN_SHADOW_START doesn't share PGD with anything else.
358	 * We claim whole PGD entry to make things easier.
359	 *
360	 * KASAN_SHADOW_END lands in the last PGD entry and it collides with
361	 * bunch of things like kernel code, modules, EFI mapping, etc.
362	 * We need to take extra steps to not overwrite them.
363	 */
364	if (pgtable_l5_enabled()) {
365		void *ptr;
366
367		ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
368		memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
369		set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
370				__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
371	}
372
373	load_cr3(early_top_pgt);
374	__flush_tlb_all();
375
376	clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
377
378	kasan_populate_early_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
379			kasan_mem_to_shadow((void *)PAGE_OFFSET));
380
381	for (i = 0; i < E820_MAX_ENTRIES; i++) {
382		if (pfn_mapped[i].end == 0)
383			break;
384
385		map_range(&pfn_mapped[i]);
386	}
387
388	shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE);
389	shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU);
390	shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE +
391						      CPU_ENTRY_AREA_MAP_SIZE);
392
393	kasan_populate_early_shadow(
394		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
395		kasan_mem_to_shadow((void *)VMALLOC_START));
396
397	/*
398	 * If we're in full vmalloc mode, don't back vmalloc space with early
399	 * shadow pages. Instead, prepopulate pgds/p4ds so they are synced to
400	 * the global table and we can populate the lower levels on demand.
401	 */
402	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
403		kasan_shallow_populate_pgds(
404			kasan_mem_to_shadow((void *)VMALLOC_START),
405			kasan_mem_to_shadow((void *)VMALLOC_END));
406	else
407		kasan_populate_early_shadow(
408			kasan_mem_to_shadow((void *)VMALLOC_START),
409			kasan_mem_to_shadow((void *)VMALLOC_END));
410
411	kasan_populate_early_shadow(
412		kasan_mem_to_shadow((void *)VMALLOC_END + 1),
413		(void *)shadow_cea_begin);
414
415	/*
416	 * Populate the shadow for the shared portion of the CPU entry area.
417	 * Shadows for the per-CPU areas are mapped on-demand, as each CPU's
418	 * area is randomly placed somewhere in the 512GiB range and mapping
419	 * the entire 512GiB range is prohibitively expensive.
420	 */
421	kasan_populate_shadow(shadow_cea_begin,
422			      shadow_cea_per_cpu_begin, 0);
423
424	kasan_populate_early_shadow((void *)shadow_cea_end,
425			kasan_mem_to_shadow((void *)__START_KERNEL_map));
426
427	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
428			      (unsigned long)kasan_mem_to_shadow(_end),
429			      early_pfn_to_nid(__pa(_stext)));
430
431	kasan_populate_early_shadow(kasan_mem_to_shadow((void *)MODULES_END),
432					(void *)KASAN_SHADOW_END);
433
434	load_cr3(init_top_pgt);
435	__flush_tlb_all();
436
437	/*
438	 * kasan_early_shadow_page has been used as early shadow memory, thus
439	 * it may contain some garbage. Now we can clear and write protect it,
440	 * since after the TLB flush no one should write to it.
441	 */
442	memset(kasan_early_shadow_page, 0, PAGE_SIZE);
443	for (i = 0; i < PTRS_PER_PTE; i++) {
444		pte_t pte;
445		pgprot_t prot;
446
447		prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC);
448		pgprot_val(prot) &= __default_kernel_pte_mask;
449
450		pte = __pte(__pa(kasan_early_shadow_page) | pgprot_val(prot));
451		set_pte(&kasan_early_shadow_pte[i], pte);
452	}
453	/* Flush TLBs again to be sure that write protection applied. */
454	__flush_tlb_all();
455
456	init_task.kasan_depth = 0;
457	pr_info("KernelAddressSanitizer initialized\n");
458}
459