xref: /kernel/linux/linux-6.6/arch/riscv/mm/init.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Regents of the University of California
4 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
5 * Copyright (C) 2020 FORTH-ICS/CARV
6 *  Nick Kossifidis <mick@ics.forth.gr>
7 */
8
9#include <linux/init.h>
10#include <linux/mm.h>
11#include <linux/memblock.h>
12#include <linux/initrd.h>
13#include <linux/swap.h>
14#include <linux/swiotlb.h>
15#include <linux/sizes.h>
16#include <linux/of_fdt.h>
17#include <linux/of_reserved_mem.h>
18#include <linux/libfdt.h>
19#include <linux/set_memory.h>
20#include <linux/dma-map-ops.h>
21#include <linux/crash_dump.h>
22#include <linux/hugetlb.h>
23#ifdef CONFIG_RELOCATABLE
24#include <linux/elf.h>
25#endif
26#include <linux/kfence.h>
27
28#include <asm/fixmap.h>
29#include <asm/io.h>
30#include <asm/numa.h>
31#include <asm/pgtable.h>
32#include <asm/ptdump.h>
33#include <asm/sections.h>
34#include <asm/soc.h>
35#include <asm/tlbflush.h>
36
37#include "../kernel/head.h"
38
39struct kernel_mapping kernel_map __ro_after_init;
40EXPORT_SYMBOL(kernel_map);
41#ifdef CONFIG_XIP_KERNEL
42#define kernel_map	(*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
43#endif
44
45#ifdef CONFIG_64BIT
46u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
47#else
48u64 satp_mode __ro_after_init = SATP_MODE_32;
49#endif
50EXPORT_SYMBOL(satp_mode);
51
52bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
53bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
54EXPORT_SYMBOL(pgtable_l4_enabled);
55EXPORT_SYMBOL(pgtable_l5_enabled);
56
57phys_addr_t phys_ram_base __ro_after_init;
58EXPORT_SYMBOL(phys_ram_base);
59
60unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
61							__page_aligned_bss;
62EXPORT_SYMBOL(empty_zero_page);
63
64extern char _start[];
65void *_dtb_early_va __initdata;
66uintptr_t _dtb_early_pa __initdata;
67
68static phys_addr_t dma32_phys_limit __initdata;
69
70static void __init zone_sizes_init(void)
71{
72	unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
73
74#ifdef CONFIG_ZONE_DMA32
75	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
76#endif
77	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
78
79	free_area_init(max_zone_pfns);
80}
81
82#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
83
84#define LOG2_SZ_1K  ilog2(SZ_1K)
85#define LOG2_SZ_1M  ilog2(SZ_1M)
86#define LOG2_SZ_1G  ilog2(SZ_1G)
87#define LOG2_SZ_1T  ilog2(SZ_1T)
88
89static inline void print_mlk(char *name, unsigned long b, unsigned long t)
90{
91	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld kB)\n", name, b, t,
92		  (((t) - (b)) >> LOG2_SZ_1K));
93}
94
95static inline void print_mlm(char *name, unsigned long b, unsigned long t)
96{
97	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld MB)\n", name, b, t,
98		  (((t) - (b)) >> LOG2_SZ_1M));
99}
100
101static inline void print_mlg(char *name, unsigned long b, unsigned long t)
102{
103	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld GB)\n", name, b, t,
104		   (((t) - (b)) >> LOG2_SZ_1G));
105}
106
107#ifdef CONFIG_64BIT
108static inline void print_mlt(char *name, unsigned long b, unsigned long t)
109{
110	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld TB)\n", name, b, t,
111		   (((t) - (b)) >> LOG2_SZ_1T));
112}
113#else
114#define print_mlt(n, b, t) do {} while (0)
115#endif
116
117static inline void print_ml(char *name, unsigned long b, unsigned long t)
118{
119	unsigned long diff = t - b;
120
121	if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10)
122		print_mlt(name, b, t);
123	else if ((diff >> LOG2_SZ_1G) >= 10)
124		print_mlg(name, b, t);
125	else if ((diff >> LOG2_SZ_1M) >= 10)
126		print_mlm(name, b, t);
127	else
128		print_mlk(name, b, t);
129}
130
131static void __init print_vm_layout(void)
132{
133	pr_notice("Virtual kernel memory layout:\n");
134	print_ml("fixmap", (unsigned long)FIXADDR_START,
135		(unsigned long)FIXADDR_TOP);
136	print_ml("pci io", (unsigned long)PCI_IO_START,
137		(unsigned long)PCI_IO_END);
138	print_ml("vmemmap", (unsigned long)VMEMMAP_START,
139		(unsigned long)VMEMMAP_END);
140	print_ml("vmalloc", (unsigned long)VMALLOC_START,
141		(unsigned long)VMALLOC_END);
142#ifdef CONFIG_64BIT
143	print_ml("modules", (unsigned long)MODULES_VADDR,
144		(unsigned long)MODULES_END);
145#endif
146	print_ml("lowmem", (unsigned long)PAGE_OFFSET,
147		(unsigned long)high_memory);
148	if (IS_ENABLED(CONFIG_64BIT)) {
149#ifdef CONFIG_KASAN
150		print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
151#endif
152
153		print_ml("kernel", (unsigned long)kernel_map.virt_addr,
154			 (unsigned long)ADDRESS_SPACE_END);
155	}
156}
157#else
158static void print_vm_layout(void) { }
159#endif /* CONFIG_DEBUG_VM */
160
161void __init mem_init(void)
162{
163#ifdef CONFIG_FLATMEM
164	BUG_ON(!mem_map);
165#endif /* CONFIG_FLATMEM */
166
167	swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE);
168	memblock_free_all();
169
170	print_vm_layout();
171}
172
173/* Limit the memory size via mem. */
174static phys_addr_t memory_limit;
175#ifdef CONFIG_XIP_KERNEL
176#define memory_limit	(*(phys_addr_t *)XIP_FIXUP(&memory_limit))
177#endif /* CONFIG_XIP_KERNEL */
178
179static int __init early_mem(char *p)
180{
181	u64 size;
182
183	if (!p)
184		return 1;
185
186	size = memparse(p, &p) & PAGE_MASK;
187	memory_limit = min_t(u64, size, memory_limit);
188
189	pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20);
190
191	return 0;
192}
193early_param("mem", early_mem);
194
195static void __init setup_bootmem(void)
196{
197	phys_addr_t vmlinux_end = __pa_symbol(&_end);
198	phys_addr_t max_mapped_addr;
199	phys_addr_t phys_ram_end, vmlinux_start;
200
201	if (IS_ENABLED(CONFIG_XIP_KERNEL))
202		vmlinux_start = __pa_symbol(&_sdata);
203	else
204		vmlinux_start = __pa_symbol(&_start);
205
206	memblock_enforce_memory_limit(memory_limit);
207
208	/*
209	 * Make sure we align the reservation on PMD_SIZE since we will
210	 * map the kernel in the linear mapping as read-only: we do not want
211	 * any allocation to happen between _end and the next pmd aligned page.
212	 */
213	if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
214		vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
215	/*
216	 * Reserve from the start of the kernel to the end of the kernel
217	 */
218	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
219
220	phys_ram_end = memblock_end_of_DRAM();
221
222	/*
223	 * Make sure we align the start of the memory on a PMD boundary so that
224	 * at worst, we map the linear mapping with PMD mappings.
225	 */
226	if (!IS_ENABLED(CONFIG_XIP_KERNEL))
227		phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
228
229	/*
230	 * In 64-bit, any use of __va/__pa before this point is wrong as we
231	 * did not know the start of DRAM before.
232	 */
233	if (IS_ENABLED(CONFIG_64BIT))
234		kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
235
236	/*
237	 * memblock allocator is not aware of the fact that last 4K bytes of
238	 * the addressable memory can not be mapped because of IS_ERR_VALUE
239	 * macro. Make sure that last 4k bytes are not usable by memblock
240	 * if end of dram is equal to maximum addressable memory.  For 64-bit
241	 * kernel, this problem can't happen here as the end of the virtual
242	 * address space is occupied by the kernel mapping then this check must
243	 * be done as soon as the kernel mapping base address is determined.
244	 */
245	if (!IS_ENABLED(CONFIG_64BIT)) {
246		max_mapped_addr = __pa(~(ulong)0);
247		if (max_mapped_addr == (phys_ram_end - 1))
248			memblock_set_current_limit(max_mapped_addr - 4096);
249	}
250
251	min_low_pfn = PFN_UP(phys_ram_base);
252	max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
253	high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
254
255	dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
256	set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
257
258	reserve_initrd_mem();
259
260	/*
261	 * No allocation should be done before reserving the memory as defined
262	 * in the device tree, otherwise the allocation could end up in a
263	 * reserved region.
264	 */
265	early_init_fdt_scan_reserved_mem();
266
267	/*
268	 * If DTB is built in, no need to reserve its memblock.
269	 * Otherwise, do reserve it but avoid using
270	 * early_init_fdt_reserve_self() since __pa() does
271	 * not work for DTB pointers that are fixmap addresses
272	 */
273	if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
274		memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
275
276	dma_contiguous_reserve(dma32_phys_limit);
277	if (IS_ENABLED(CONFIG_64BIT))
278		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
279}
280
281#ifdef CONFIG_MMU
282struct pt_alloc_ops pt_ops __initdata;
283
284pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
285pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
286static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
287
288pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
289
290#ifdef CONFIG_XIP_KERNEL
291#define pt_ops			(*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
292#define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
293#define fixmap_pte             ((pte_t *)XIP_FIXUP(fixmap_pte))
294#define early_pg_dir           ((pgd_t *)XIP_FIXUP(early_pg_dir))
295#endif /* CONFIG_XIP_KERNEL */
296
297static const pgprot_t protection_map[16] = {
298	[VM_NONE]					= PAGE_NONE,
299	[VM_READ]					= PAGE_READ,
300	[VM_WRITE]					= PAGE_COPY,
301	[VM_WRITE | VM_READ]				= PAGE_COPY,
302	[VM_EXEC]					= PAGE_EXEC,
303	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
304	[VM_EXEC | VM_WRITE]				= PAGE_COPY_EXEC,
305	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_COPY_EXEC,
306	[VM_SHARED]					= PAGE_NONE,
307	[VM_SHARED | VM_READ]				= PAGE_READ,
308	[VM_SHARED | VM_WRITE]				= PAGE_SHARED,
309	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_SHARED,
310	[VM_SHARED | VM_EXEC]				= PAGE_EXEC,
311	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_READ_EXEC,
312	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_SHARED_EXEC,
313	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_SHARED_EXEC
314};
315DECLARE_VM_GET_PAGE_PROT
316
317void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
318{
319	unsigned long addr = __fix_to_virt(idx);
320	pte_t *ptep;
321
322	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
323
324	ptep = &fixmap_pte[pte_index(addr)];
325
326	if (pgprot_val(prot))
327		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
328	else
329		pte_clear(&init_mm, addr, ptep);
330	local_flush_tlb_page(addr);
331}
332
333static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
334{
335	return (pte_t *)((uintptr_t)pa);
336}
337
338static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
339{
340	clear_fixmap(FIX_PTE);
341	return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
342}
343
344static inline pte_t *__init get_pte_virt_late(phys_addr_t pa)
345{
346	return (pte_t *) __va(pa);
347}
348
349static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
350{
351	/*
352	 * We only create PMD or PGD early mappings so we
353	 * should never reach here with MMU disabled.
354	 */
355	BUG();
356}
357
358static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
359{
360	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
361}
362
363static phys_addr_t __init alloc_pte_late(uintptr_t va)
364{
365	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
366
367	BUG_ON(!ptdesc || !pagetable_pte_ctor(ptdesc));
368	return __pa((pte_t *)ptdesc_address(ptdesc));
369}
370
371static void __init create_pte_mapping(pte_t *ptep,
372				      uintptr_t va, phys_addr_t pa,
373				      phys_addr_t sz, pgprot_t prot)
374{
375	uintptr_t pte_idx = pte_index(va);
376
377	BUG_ON(sz != PAGE_SIZE);
378
379	if (pte_none(ptep[pte_idx]))
380		ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
381}
382
383#ifndef __PAGETABLE_PMD_FOLDED
384
385static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
386static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
387static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
388
389#ifdef CONFIG_XIP_KERNEL
390#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
391#define fixmap_pmd     ((pmd_t *)XIP_FIXUP(fixmap_pmd))
392#define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
393#endif /* CONFIG_XIP_KERNEL */
394
395static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
396static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
397static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
398
399#ifdef CONFIG_XIP_KERNEL
400#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
401#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
402#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
403#endif /* CONFIG_XIP_KERNEL */
404
405static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
406static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
407static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
408
409#ifdef CONFIG_XIP_KERNEL
410#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
411#define fixmap_pud     ((pud_t *)XIP_FIXUP(fixmap_pud))
412#define early_pud      ((pud_t *)XIP_FIXUP(early_pud))
413#endif /* CONFIG_XIP_KERNEL */
414
415static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
416{
417	/* Before MMU is enabled */
418	return (pmd_t *)((uintptr_t)pa);
419}
420
421static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
422{
423	clear_fixmap(FIX_PMD);
424	return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
425}
426
427static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
428{
429	return (pmd_t *) __va(pa);
430}
431
432static phys_addr_t __init alloc_pmd_early(uintptr_t va)
433{
434	BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
435
436	return (uintptr_t)early_pmd;
437}
438
439static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
440{
441	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
442}
443
444static phys_addr_t __init alloc_pmd_late(uintptr_t va)
445{
446	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
447
448	BUG_ON(!ptdesc || !pagetable_pmd_ctor(ptdesc));
449	return __pa((pmd_t *)ptdesc_address(ptdesc));
450}
451
452static void __init create_pmd_mapping(pmd_t *pmdp,
453				      uintptr_t va, phys_addr_t pa,
454				      phys_addr_t sz, pgprot_t prot)
455{
456	pte_t *ptep;
457	phys_addr_t pte_phys;
458	uintptr_t pmd_idx = pmd_index(va);
459
460	if (sz == PMD_SIZE) {
461		if (pmd_none(pmdp[pmd_idx]))
462			pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
463		return;
464	}
465
466	if (pmd_none(pmdp[pmd_idx])) {
467		pte_phys = pt_ops.alloc_pte(va);
468		pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
469		ptep = pt_ops.get_pte_virt(pte_phys);
470		memset(ptep, 0, PAGE_SIZE);
471	} else {
472		pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
473		ptep = pt_ops.get_pte_virt(pte_phys);
474	}
475
476	create_pte_mapping(ptep, va, pa, sz, prot);
477}
478
479static pud_t *__init get_pud_virt_early(phys_addr_t pa)
480{
481	return (pud_t *)((uintptr_t)pa);
482}
483
484static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
485{
486	clear_fixmap(FIX_PUD);
487	return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
488}
489
490static pud_t *__init get_pud_virt_late(phys_addr_t pa)
491{
492	return (pud_t *)__va(pa);
493}
494
495static phys_addr_t __init alloc_pud_early(uintptr_t va)
496{
497	/* Only one PUD is available for early mapping */
498	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
499
500	return (uintptr_t)early_pud;
501}
502
503static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
504{
505	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
506}
507
508static phys_addr_t alloc_pud_late(uintptr_t va)
509{
510	unsigned long vaddr;
511
512	vaddr = __get_free_page(GFP_KERNEL);
513	BUG_ON(!vaddr);
514	return __pa(vaddr);
515}
516
517static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
518{
519	return (p4d_t *)((uintptr_t)pa);
520}
521
522static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
523{
524	clear_fixmap(FIX_P4D);
525	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
526}
527
528static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
529{
530	return (p4d_t *)__va(pa);
531}
532
533static phys_addr_t __init alloc_p4d_early(uintptr_t va)
534{
535	/* Only one P4D is available for early mapping */
536	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
537
538	return (uintptr_t)early_p4d;
539}
540
541static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
542{
543	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
544}
545
546static phys_addr_t alloc_p4d_late(uintptr_t va)
547{
548	unsigned long vaddr;
549
550	vaddr = __get_free_page(GFP_KERNEL);
551	BUG_ON(!vaddr);
552	return __pa(vaddr);
553}
554
555static void __init create_pud_mapping(pud_t *pudp,
556				      uintptr_t va, phys_addr_t pa,
557				      phys_addr_t sz, pgprot_t prot)
558{
559	pmd_t *nextp;
560	phys_addr_t next_phys;
561	uintptr_t pud_index = pud_index(va);
562
563	if (sz == PUD_SIZE) {
564		if (pud_val(pudp[pud_index]) == 0)
565			pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
566		return;
567	}
568
569	if (pud_val(pudp[pud_index]) == 0) {
570		next_phys = pt_ops.alloc_pmd(va);
571		pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
572		nextp = pt_ops.get_pmd_virt(next_phys);
573		memset(nextp, 0, PAGE_SIZE);
574	} else {
575		next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
576		nextp = pt_ops.get_pmd_virt(next_phys);
577	}
578
579	create_pmd_mapping(nextp, va, pa, sz, prot);
580}
581
582static void __init create_p4d_mapping(p4d_t *p4dp,
583				      uintptr_t va, phys_addr_t pa,
584				      phys_addr_t sz, pgprot_t prot)
585{
586	pud_t *nextp;
587	phys_addr_t next_phys;
588	uintptr_t p4d_index = p4d_index(va);
589
590	if (sz == P4D_SIZE) {
591		if (p4d_val(p4dp[p4d_index]) == 0)
592			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
593		return;
594	}
595
596	if (p4d_val(p4dp[p4d_index]) == 0) {
597		next_phys = pt_ops.alloc_pud(va);
598		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
599		nextp = pt_ops.get_pud_virt(next_phys);
600		memset(nextp, 0, PAGE_SIZE);
601	} else {
602		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
603		nextp = pt_ops.get_pud_virt(next_phys);
604	}
605
606	create_pud_mapping(nextp, va, pa, sz, prot);
607}
608
609#define pgd_next_t		p4d_t
610#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
611		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
612		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
613#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
614		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
615		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
616#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
617				(pgtable_l5_enabled ?			\
618		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
619				(pgtable_l4_enabled ?			\
620		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
621		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
622#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
623		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
624		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
625#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
626		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
627		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
628#else
629#define pgd_next_t		pte_t
630#define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
631#define get_pgd_next_virt(__pa)	pt_ops.get_pte_virt(__pa)
632#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
633	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
634#define fixmap_pgd_next		((uintptr_t)fixmap_pte)
635#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
636#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
637#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
638#endif /* __PAGETABLE_PMD_FOLDED */
639
640void __init create_pgd_mapping(pgd_t *pgdp,
641				      uintptr_t va, phys_addr_t pa,
642				      phys_addr_t sz, pgprot_t prot)
643{
644	pgd_next_t *nextp;
645	phys_addr_t next_phys;
646	uintptr_t pgd_idx = pgd_index(va);
647
648	if (sz == PGDIR_SIZE) {
649		if (pgd_val(pgdp[pgd_idx]) == 0)
650			pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
651		return;
652	}
653
654	if (pgd_val(pgdp[pgd_idx]) == 0) {
655		next_phys = alloc_pgd_next(va);
656		pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
657		nextp = get_pgd_next_virt(next_phys);
658		memset(nextp, 0, PAGE_SIZE);
659	} else {
660		next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
661		nextp = get_pgd_next_virt(next_phys);
662	}
663
664	create_pgd_next_mapping(nextp, va, pa, sz, prot);
665}
666
667static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
668				      phys_addr_t size)
669{
670	if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
671		return PGDIR_SIZE;
672
673	if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
674		return P4D_SIZE;
675
676	if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
677		return PUD_SIZE;
678
679	if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
680		return PMD_SIZE;
681
682	return PAGE_SIZE;
683}
684
685#ifdef CONFIG_XIP_KERNEL
686#define phys_ram_base  (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
687extern char _xiprom[], _exiprom[], __data_loc;
688
689/* called from head.S with MMU off */
690asmlinkage void __init __copy_data(void)
691{
692	void *from = (void *)(&__data_loc);
693	void *to = (void *)CONFIG_PHYS_RAM_BASE;
694	size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
695
696	memcpy(to, from, sz);
697}
698#endif
699
700#ifdef CONFIG_STRICT_KERNEL_RWX
701static __init pgprot_t pgprot_from_va(uintptr_t va)
702{
703	if (is_va_kernel_text(va))
704		return PAGE_KERNEL_READ_EXEC;
705
706	/*
707	 * In 64-bit kernel, the kernel mapping is outside the linear mapping so
708	 * we must protect its linear mapping alias from being executed and
709	 * written.
710	 * And rodata section is marked readonly in mark_rodata_ro.
711	 */
712	if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va))
713		return PAGE_KERNEL_READ;
714
715	return PAGE_KERNEL;
716}
717
718void mark_rodata_ro(void)
719{
720	set_kernel_memory(__start_rodata, _data, set_memory_ro);
721	if (IS_ENABLED(CONFIG_64BIT))
722		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
723				  set_memory_ro);
724
725	debug_checkwx();
726}
727#else
728static __init pgprot_t pgprot_from_va(uintptr_t va)
729{
730	if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va))
731		return PAGE_KERNEL;
732
733	return PAGE_KERNEL_EXEC;
734}
735#endif /* CONFIG_STRICT_KERNEL_RWX */
736
737#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
738u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa);
739
740static void __init disable_pgtable_l5(void)
741{
742	pgtable_l5_enabled = false;
743	kernel_map.page_offset = PAGE_OFFSET_L4;
744	satp_mode = SATP_MODE_48;
745}
746
747static void __init disable_pgtable_l4(void)
748{
749	pgtable_l4_enabled = false;
750	kernel_map.page_offset = PAGE_OFFSET_L3;
751	satp_mode = SATP_MODE_39;
752}
753
754static int __init print_no4lvl(char *p)
755{
756	pr_info("Disabled 4-level and 5-level paging");
757	return 0;
758}
759early_param("no4lvl", print_no4lvl);
760
761static int __init print_no5lvl(char *p)
762{
763	pr_info("Disabled 5-level paging");
764	return 0;
765}
766early_param("no5lvl", print_no5lvl);
767
768/*
769 * There is a simple way to determine if 4-level is supported by the
770 * underlying hardware: establish 1:1 mapping in 4-level page table mode
771 * then read SATP to see if the configuration was taken into account
772 * meaning sv48 is supported.
773 */
774static __init void set_satp_mode(uintptr_t dtb_pa)
775{
776	u64 identity_satp, hw_satp;
777	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
778	u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
779
780	if (satp_mode_cmdline == SATP_MODE_57) {
781		disable_pgtable_l5();
782	} else if (satp_mode_cmdline == SATP_MODE_48) {
783		disable_pgtable_l5();
784		disable_pgtable_l4();
785		return;
786	}
787
788	create_p4d_mapping(early_p4d,
789			set_satp_mode_pmd, (uintptr_t)early_pud,
790			P4D_SIZE, PAGE_TABLE);
791	create_pud_mapping(early_pud,
792			   set_satp_mode_pmd, (uintptr_t)early_pmd,
793			   PUD_SIZE, PAGE_TABLE);
794	/* Handle the case where set_satp_mode straddles 2 PMDs */
795	create_pmd_mapping(early_pmd,
796			   set_satp_mode_pmd, set_satp_mode_pmd,
797			   PMD_SIZE, PAGE_KERNEL_EXEC);
798	create_pmd_mapping(early_pmd,
799			   set_satp_mode_pmd + PMD_SIZE,
800			   set_satp_mode_pmd + PMD_SIZE,
801			   PMD_SIZE, PAGE_KERNEL_EXEC);
802retry:
803	create_pgd_mapping(early_pg_dir,
804			   set_satp_mode_pmd,
805			   pgtable_l5_enabled ?
806				(uintptr_t)early_p4d : (uintptr_t)early_pud,
807			   PGDIR_SIZE, PAGE_TABLE);
808
809	identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
810
811	local_flush_tlb_all();
812	csr_write(CSR_SATP, identity_satp);
813	hw_satp = csr_swap(CSR_SATP, 0ULL);
814	local_flush_tlb_all();
815
816	if (hw_satp != identity_satp) {
817		if (pgtable_l5_enabled) {
818			disable_pgtable_l5();
819			memset(early_pg_dir, 0, PAGE_SIZE);
820			goto retry;
821		}
822		disable_pgtable_l4();
823	}
824
825	memset(early_pg_dir, 0, PAGE_SIZE);
826	memset(early_p4d, 0, PAGE_SIZE);
827	memset(early_pud, 0, PAGE_SIZE);
828	memset(early_pmd, 0, PAGE_SIZE);
829}
830#endif
831
832/*
833 * setup_vm() is called from head.S with MMU-off.
834 *
835 * Following requirements should be honoured for setup_vm() to work
836 * correctly:
837 * 1) It should use PC-relative addressing for accessing kernel symbols.
838 *    To achieve this we always use GCC cmodel=medany.
839 * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
840 *    so disable compiler instrumentation when FTRACE is enabled.
841 *
842 * Currently, the above requirements are honoured by using custom CFLAGS
843 * for init.o in mm/Makefile.
844 */
845
846#ifndef __riscv_cmodel_medany
847#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
848#endif
849
850#ifdef CONFIG_RELOCATABLE
851extern unsigned long __rela_dyn_start, __rela_dyn_end;
852
853static void __init relocate_kernel(void)
854{
855	Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
856	/*
857	 * This holds the offset between the linked virtual address and the
858	 * relocated virtual address.
859	 */
860	uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
861	/*
862	 * This holds the offset between kernel linked virtual address and
863	 * physical address.
864	 */
865	uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
866
867	for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
868		Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
869		Elf64_Addr relocated_addr = rela->r_addend;
870
871		if (rela->r_info != R_RISCV_RELATIVE)
872			continue;
873
874		/*
875		 * Make sure to not relocate vdso symbols like rt_sigreturn
876		 * which are linked from the address 0 in vmlinux since
877		 * vdso symbol addresses are actually used as an offset from
878		 * mm->context.vdso in VDSO_OFFSET macro.
879		 */
880		if (relocated_addr >= KERNEL_LINK_ADDR)
881			relocated_addr += reloc_offset;
882
883		*(Elf64_Addr *)addr = relocated_addr;
884	}
885}
886#endif /* CONFIG_RELOCATABLE */
887
888#ifdef CONFIG_XIP_KERNEL
889static void __init create_kernel_page_table(pgd_t *pgdir,
890					    __always_unused bool early)
891{
892	uintptr_t va, end_va;
893
894	/* Map the flash resident part */
895	end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
896	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
897		create_pgd_mapping(pgdir, va,
898				   kernel_map.xiprom + (va - kernel_map.virt_addr),
899				   PMD_SIZE, PAGE_KERNEL_EXEC);
900
901	/* Map the data in RAM */
902	end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
903	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
904		create_pgd_mapping(pgdir, va,
905				   kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
906				   PMD_SIZE, PAGE_KERNEL);
907}
908#else
909static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
910{
911	uintptr_t va, end_va;
912
913	end_va = kernel_map.virt_addr + kernel_map.size;
914	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
915		create_pgd_mapping(pgdir, va,
916				   kernel_map.phys_addr + (va - kernel_map.virt_addr),
917				   PMD_SIZE,
918				   early ?
919					PAGE_KERNEL_EXEC : pgprot_from_va(va));
920}
921#endif
922
923/*
924 * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
925 * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
926 * entry.
927 */
928static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va,
929					       uintptr_t dtb_pa)
930{
931#ifndef CONFIG_BUILTIN_DTB
932	uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
933
934	/* Make sure the fdt fixmap address is always aligned on PMD size */
935	BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
936
937	/* In 32-bit only, the fdt lies in its own PGD */
938	if (!IS_ENABLED(CONFIG_64BIT)) {
939		create_pgd_mapping(early_pg_dir, fix_fdt_va,
940				   pa, MAX_FDT_SIZE, PAGE_KERNEL);
941	} else {
942		create_pmd_mapping(fixmap_pmd, fix_fdt_va,
943				   pa, PMD_SIZE, PAGE_KERNEL);
944		create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE,
945				   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
946	}
947
948	dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1));
949#else
950	/*
951	 * For 64-bit kernel, __va can't be used since it would return a linear
952	 * mapping address whereas dtb_early_va will be used before
953	 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
954	 * kernel is mapped in the linear mapping, that makes no difference.
955	 */
956	dtb_early_va = kernel_mapping_pa_to_va(dtb_pa);
957#endif
958
959	dtb_early_pa = dtb_pa;
960}
961
962/*
963 * MMU is not enabled, the page tables are allocated directly using
964 * early_pmd/pud/p4d and the address returned is the physical one.
965 */
966static void __init pt_ops_set_early(void)
967{
968	pt_ops.alloc_pte = alloc_pte_early;
969	pt_ops.get_pte_virt = get_pte_virt_early;
970#ifndef __PAGETABLE_PMD_FOLDED
971	pt_ops.alloc_pmd = alloc_pmd_early;
972	pt_ops.get_pmd_virt = get_pmd_virt_early;
973	pt_ops.alloc_pud = alloc_pud_early;
974	pt_ops.get_pud_virt = get_pud_virt_early;
975	pt_ops.alloc_p4d = alloc_p4d_early;
976	pt_ops.get_p4d_virt = get_p4d_virt_early;
977#endif
978}
979
980/*
981 * MMU is enabled but page table setup is not complete yet.
982 * fixmap page table alloc functions must be used as a means to temporarily
983 * map the allocated physical pages since the linear mapping does not exist yet.
984 *
985 * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
986 * but it will be used as described above.
987 */
988static void __init pt_ops_set_fixmap(void)
989{
990	pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap);
991	pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap);
992#ifndef __PAGETABLE_PMD_FOLDED
993	pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap);
994	pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap);
995	pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap);
996	pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap);
997	pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap);
998	pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap);
999#endif
1000}
1001
1002/*
1003 * MMU is enabled and page table setup is complete, so from now, we can use
1004 * generic page allocation functions to setup page table.
1005 */
1006static void __init pt_ops_set_late(void)
1007{
1008	pt_ops.alloc_pte = alloc_pte_late;
1009	pt_ops.get_pte_virt = get_pte_virt_late;
1010#ifndef __PAGETABLE_PMD_FOLDED
1011	pt_ops.alloc_pmd = alloc_pmd_late;
1012	pt_ops.get_pmd_virt = get_pmd_virt_late;
1013	pt_ops.alloc_pud = alloc_pud_late;
1014	pt_ops.get_pud_virt = get_pud_virt_late;
1015	pt_ops.alloc_p4d = alloc_p4d_late;
1016	pt_ops.get_p4d_virt = get_p4d_virt_late;
1017#endif
1018}
1019
1020#ifdef CONFIG_RANDOMIZE_BASE
1021extern bool __init __pi_set_nokaslr_from_cmdline(uintptr_t dtb_pa);
1022extern u64 __init __pi_get_kaslr_seed(uintptr_t dtb_pa);
1023
1024static int __init print_nokaslr(char *p)
1025{
1026	pr_info("Disabled KASLR");
1027	return 0;
1028}
1029early_param("nokaslr", print_nokaslr);
1030
1031unsigned long kaslr_offset(void)
1032{
1033	return kernel_map.virt_offset;
1034}
1035#endif
1036
1037asmlinkage void __init setup_vm(uintptr_t dtb_pa)
1038{
1039	pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
1040
1041#ifdef CONFIG_RANDOMIZE_BASE
1042	if (!__pi_set_nokaslr_from_cmdline(dtb_pa)) {
1043		u64 kaslr_seed = __pi_get_kaslr_seed(dtb_pa);
1044		u32 kernel_size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
1045		u32 nr_pos;
1046
1047		/*
1048		 * Compute the number of positions available: we are limited
1049		 * by the early page table that only has one PUD and we must
1050		 * be aligned on PMD_SIZE.
1051		 */
1052		nr_pos = (PUD_SIZE - kernel_size) / PMD_SIZE;
1053
1054		kernel_map.virt_offset = (kaslr_seed % nr_pos) * PMD_SIZE;
1055	}
1056#endif
1057
1058	kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
1059
1060#ifdef CONFIG_XIP_KERNEL
1061#ifdef CONFIG_64BIT
1062	kernel_map.page_offset = PAGE_OFFSET_L3;
1063#else
1064	kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
1065#endif
1066	kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
1067	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
1068
1069	phys_ram_base = CONFIG_PHYS_RAM_BASE;
1070	kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
1071	kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
1072
1073	kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
1074#else
1075	kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
1076	kernel_map.phys_addr = (uintptr_t)(&_start);
1077	kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
1078#endif
1079
1080#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
1081	set_satp_mode(dtb_pa);
1082#endif
1083
1084	/*
1085	 * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
1086	 * where we have the system memory layout: this allows us to align
1087	 * the physical and virtual mappings and then make use of PUD/P4D/PGD
1088	 * for the linear mapping. This is only possible because the kernel
1089	 * mapping lies outside the linear mapping.
1090	 * In 32-bit however, as the kernel resides in the linear mapping,
1091	 * setup_vm_final can not change the mapping established here,
1092	 * otherwise the same kernel addresses would get mapped to different
1093	 * physical addresses (if the start of dram is different from the
1094	 * kernel physical address start).
1095	 */
1096	kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
1097				0UL : PAGE_OFFSET - kernel_map.phys_addr;
1098	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
1099
1100	/*
1101	 * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
1102	 * kernel, whereas for 64-bit kernel, the end of the virtual address
1103	 * space is occupied by the modules/BPF/kernel mappings which reduces
1104	 * the available size of the linear mapping.
1105	 */
1106	memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
1107
1108	/* Sanity check alignment and size */
1109	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
1110	BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
1111
1112#ifdef CONFIG_64BIT
1113	/*
1114	 * The last 4K bytes of the addressable memory can not be mapped because
1115	 * of IS_ERR_VALUE macro.
1116	 */
1117	BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
1118#endif
1119
1120#ifdef CONFIG_RELOCATABLE
1121	/*
1122	 * Early page table uses only one PUD, which makes it possible
1123	 * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
1124	 * makes the kernel cross over a PUD_SIZE boundary, raise a bug
1125	 * since a part of the kernel would not get mapped.
1126	 */
1127	BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
1128	relocate_kernel();
1129#endif
1130
1131	apply_early_boot_alternatives();
1132	pt_ops_set_early();
1133
1134	/* Setup early PGD for fixmap */
1135	create_pgd_mapping(early_pg_dir, FIXADDR_START,
1136			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1137
1138#ifndef __PAGETABLE_PMD_FOLDED
1139	/* Setup fixmap P4D and PUD */
1140	if (pgtable_l5_enabled)
1141		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
1142				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
1143	/* Setup fixmap PUD and PMD */
1144	if (pgtable_l4_enabled)
1145		create_pud_mapping(fixmap_pud, FIXADDR_START,
1146				   (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
1147	create_pmd_mapping(fixmap_pmd, FIXADDR_START,
1148			   (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
1149	/* Setup trampoline PGD and PMD */
1150	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
1151			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1152	if (pgtable_l5_enabled)
1153		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
1154				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
1155	if (pgtable_l4_enabled)
1156		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
1157				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
1158#ifdef CONFIG_XIP_KERNEL
1159	create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
1160			   kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
1161#else
1162	create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
1163			   kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC);
1164#endif
1165#else
1166	/* Setup trampoline PGD */
1167	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
1168			   kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC);
1169#endif
1170
1171	/*
1172	 * Setup early PGD covering entire kernel which will allow
1173	 * us to reach paging_init(). We map all memory banks later
1174	 * in setup_vm_final() below.
1175	 */
1176	create_kernel_page_table(early_pg_dir, true);
1177
1178	/* Setup early mapping for FDT early scan */
1179	create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa);
1180
1181	/*
1182	 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
1183	 * range can not span multiple pmds.
1184	 */
1185	BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
1186		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
1187
1188#ifndef __PAGETABLE_PMD_FOLDED
1189	/*
1190	 * Early ioremap fixmap is already created as it lies within first 2MB
1191	 * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
1192	 * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
1193	 * the user if not.
1194	 */
1195	fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
1196	fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
1197	if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
1198		WARN_ON(1);
1199		pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
1200			pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
1201		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
1202			fix_to_virt(FIX_BTMAP_BEGIN));
1203		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
1204			fix_to_virt(FIX_BTMAP_END));
1205
1206		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
1207		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
1208	}
1209#endif
1210
1211	pt_ops_set_fixmap();
1212}
1213
1214static void __init create_linear_mapping_range(phys_addr_t start,
1215					       phys_addr_t end,
1216					       uintptr_t fixed_map_size)
1217{
1218	phys_addr_t pa;
1219	uintptr_t va, map_size;
1220
1221	for (pa = start; pa < end; pa += map_size) {
1222		va = (uintptr_t)__va(pa);
1223		map_size = fixed_map_size ? fixed_map_size :
1224					    best_map_size(pa, va, end - pa);
1225
1226		create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
1227				   pgprot_from_va(va));
1228	}
1229}
1230
1231static void __init create_linear_mapping_page_table(void)
1232{
1233	phys_addr_t start, end;
1234	phys_addr_t kfence_pool __maybe_unused;
1235	u64 i;
1236
1237#ifdef CONFIG_STRICT_KERNEL_RWX
1238	phys_addr_t ktext_start = __pa_symbol(_start);
1239	phys_addr_t ktext_size = __init_data_begin - _start;
1240	phys_addr_t krodata_start = __pa_symbol(__start_rodata);
1241	phys_addr_t krodata_size = _data - __start_rodata;
1242
1243	/* Isolate kernel text and rodata so they don't get mapped with a PUD */
1244	memblock_mark_nomap(ktext_start,  ktext_size);
1245	memblock_mark_nomap(krodata_start, krodata_size);
1246#endif
1247
1248#ifdef CONFIG_KFENCE
1249	/*
1250	 *  kfence pool must be backed by PAGE_SIZE mappings, so allocate it
1251	 *  before we setup the linear mapping so that we avoid using hugepages
1252	 *  for this region.
1253	 */
1254	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
1255	BUG_ON(!kfence_pool);
1256
1257	memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
1258	__kfence_pool = __va(kfence_pool);
1259#endif
1260
1261	/* Map all memory banks in the linear mapping */
1262	for_each_mem_range(i, &start, &end) {
1263		if (start >= end)
1264			break;
1265		if (start <= __pa(PAGE_OFFSET) &&
1266		    __pa(PAGE_OFFSET) < end)
1267			start = __pa(PAGE_OFFSET);
1268		if (end >= __pa(PAGE_OFFSET) + memory_limit)
1269			end = __pa(PAGE_OFFSET) + memory_limit;
1270
1271		create_linear_mapping_range(start, end, 0);
1272	}
1273
1274#ifdef CONFIG_STRICT_KERNEL_RWX
1275	create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0);
1276	create_linear_mapping_range(krodata_start,
1277				    krodata_start + krodata_size, 0);
1278
1279	memblock_clear_nomap(ktext_start,  ktext_size);
1280	memblock_clear_nomap(krodata_start, krodata_size);
1281#endif
1282
1283#ifdef CONFIG_KFENCE
1284	create_linear_mapping_range(kfence_pool,
1285				    kfence_pool + KFENCE_POOL_SIZE,
1286				    PAGE_SIZE);
1287
1288	memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
1289#endif
1290}
1291
1292static void __init setup_vm_final(void)
1293{
1294	/* Setup swapper PGD for fixmap */
1295#if !defined(CONFIG_64BIT)
1296	/*
1297	 * In 32-bit, the device tree lies in a pgd entry, so it must be copied
1298	 * directly in swapper_pg_dir in addition to the pgd entry that points
1299	 * to fixmap_pte.
1300	 */
1301	unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT));
1302
1303	set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]);
1304#endif
1305	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
1306			   __pa_symbol(fixmap_pgd_next),
1307			   PGDIR_SIZE, PAGE_TABLE);
1308
1309	/* Map the linear mapping */
1310	create_linear_mapping_page_table();
1311
1312	/* Map the kernel */
1313	if (IS_ENABLED(CONFIG_64BIT))
1314		create_kernel_page_table(swapper_pg_dir, false);
1315
1316#ifdef CONFIG_KASAN
1317	kasan_swapper_init();
1318#endif
1319
1320	/* Clear fixmap PTE and PMD mappings */
1321	clear_fixmap(FIX_PTE);
1322	clear_fixmap(FIX_PMD);
1323	clear_fixmap(FIX_PUD);
1324	clear_fixmap(FIX_P4D);
1325
1326	/* Move to swapper page table */
1327	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
1328	local_flush_tlb_all();
1329
1330	pt_ops_set_late();
1331}
1332#else
1333asmlinkage void __init setup_vm(uintptr_t dtb_pa)
1334{
1335	dtb_early_va = (void *)dtb_pa;
1336	dtb_early_pa = dtb_pa;
1337}
1338
1339static inline void setup_vm_final(void)
1340{
1341}
1342#endif /* CONFIG_MMU */
1343
1344/* Reserve 128M low memory by default for swiotlb buffer */
1345#define DEFAULT_CRASH_KERNEL_LOW_SIZE	(128UL << 20)
1346
1347static int __init reserve_crashkernel_low(unsigned long long low_size)
1348{
1349	unsigned long long low_base;
1350
1351	low_base = memblock_phys_alloc_range(low_size, PMD_SIZE, 0, dma32_phys_limit);
1352	if (!low_base) {
1353		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
1354		return -ENOMEM;
1355	}
1356
1357	pr_info("crashkernel low memory reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
1358		low_base, low_base + low_size, low_size >> 20);
1359
1360	crashk_low_res.start = low_base;
1361	crashk_low_res.end = low_base + low_size - 1;
1362
1363	return 0;
1364}
1365
1366/*
1367 * reserve_crashkernel() - reserves memory for crash kernel
1368 *
1369 * This function reserves memory area given in "crashkernel=" kernel command
1370 * line parameter. The memory reserved is used by dump capture kernel when
1371 * primary kernel is crashing.
1372 */
1373static void __init reserve_crashkernel(void)
1374{
1375	unsigned long long crash_base = 0;
1376	unsigned long long crash_size = 0;
1377	unsigned long long crash_low_size = 0;
1378	unsigned long search_start = memblock_start_of_DRAM();
1379	unsigned long search_end = (unsigned long)dma32_phys_limit;
1380	char *cmdline = boot_command_line;
1381	bool fixed_base = false;
1382	bool high = false;
1383
1384	int ret = 0;
1385
1386	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
1387		return;
1388	/*
1389	 * Don't reserve a region for a crash kernel on a crash kernel
1390	 * since it doesn't make much sense and we have limited memory
1391	 * resources.
1392	 */
1393	if (is_kdump_kernel()) {
1394		pr_info("crashkernel: ignoring reservation request\n");
1395		return;
1396	}
1397
1398	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
1399				&crash_size, &crash_base);
1400	if (ret == -ENOENT) {
1401		/* Fallback to crashkernel=X,[high,low] */
1402		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
1403		if (ret || !crash_size)
1404			return;
1405
1406		/*
1407		 * crashkernel=Y,low is valid only when crashkernel=X,high
1408		 * is passed.
1409		 */
1410		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
1411		if (ret == -ENOENT)
1412			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
1413		else if (ret)
1414			return;
1415
1416		search_start = (unsigned long)dma32_phys_limit;
1417		search_end = memblock_end_of_DRAM();
1418		high = true;
1419	} else if (ret || !crash_size) {
1420		/* Invalid argument value specified */
1421		return;
1422	}
1423
1424	crash_size = PAGE_ALIGN(crash_size);
1425
1426	if (crash_base) {
1427		fixed_base = true;
1428		search_start = crash_base;
1429		search_end = crash_base + crash_size;
1430	}
1431
1432	/*
1433	 * Current riscv boot protocol requires 2MB alignment for
1434	 * RV64 and 4MB alignment for RV32 (hugepage size)
1435	 *
1436	 * Try to alloc from 32bit addressible physical memory so that
1437	 * swiotlb can work on the crash kernel.
1438	 */
1439	crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
1440					       search_start, search_end);
1441	if (crash_base == 0) {
1442		/*
1443		 * For crashkernel=size[KMG]@offset[KMG], print out failure
1444		 * message if can't reserve the specified region.
1445		 */
1446		if (fixed_base) {
1447			pr_warn("crashkernel: allocating failed with given size@offset\n");
1448			return;
1449		}
1450
1451		if (high) {
1452			/*
1453			 * For crashkernel=size[KMG],high, if the first attempt was
1454			 * for high memory, fall back to low memory.
1455			 */
1456			search_start = memblock_start_of_DRAM();
1457			search_end = (unsigned long)dma32_phys_limit;
1458		} else {
1459			/*
1460			 * For crashkernel=size[KMG], if the first attempt was for
1461			 * low memory, fall back to high memory, the minimum required
1462			 * low memory will be reserved later.
1463			 */
1464			search_start = (unsigned long)dma32_phys_limit;
1465			search_end = memblock_end_of_DRAM();
1466			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
1467		}
1468
1469		crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
1470						       search_start, search_end);
1471		if (crash_base == 0) {
1472			pr_warn("crashkernel: couldn't allocate %lldKB\n",
1473				crash_size >> 10);
1474			return;
1475		}
1476	}
1477
1478	if ((crash_base >= dma32_phys_limit) && crash_low_size &&
1479	     reserve_crashkernel_low(crash_low_size)) {
1480		memblock_phys_free(crash_base, crash_size);
1481		return;
1482	}
1483
1484	pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
1485		crash_base, crash_base + crash_size, crash_size >> 20);
1486
1487	crashk_res.start = crash_base;
1488	crashk_res.end = crash_base + crash_size - 1;
1489}
1490
1491void __init paging_init(void)
1492{
1493	setup_bootmem();
1494	setup_vm_final();
1495
1496	/* Depend on that Linear Mapping is ready */
1497	memblock_allow_resize();
1498}
1499
1500void __init misc_mem_init(void)
1501{
1502	early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
1503	arch_numa_init();
1504	sparse_init();
1505#ifdef CONFIG_SPARSEMEM_VMEMMAP
1506	/* The entire VMEMMAP region has been populated. Flush TLB for this region */
1507	local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
1508#endif
1509	zone_sizes_init();
1510	reserve_crashkernel();
1511	memblock_dump_all();
1512}
1513
1514#ifdef CONFIG_SPARSEMEM_VMEMMAP
1515int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
1516			       struct vmem_altmap *altmap)
1517{
1518	return vmemmap_populate_basepages(start, end, node, NULL);
1519}
1520#endif
1521
1522#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
1523/*
1524 * Pre-allocates page-table pages for a specific area in the kernel
1525 * page-table. Only the level which needs to be synchronized between
1526 * all page-tables is allocated because the synchronization can be
1527 * expensive.
1528 */
1529static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end,
1530					       const char *area)
1531{
1532	unsigned long addr;
1533	const char *lvl;
1534
1535	for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
1536		pgd_t *pgd = pgd_offset_k(addr);
1537		p4d_t *p4d;
1538		pud_t *pud;
1539		pmd_t *pmd;
1540
1541		lvl = "p4d";
1542		p4d = p4d_alloc(&init_mm, pgd, addr);
1543		if (!p4d)
1544			goto failed;
1545
1546		if (pgtable_l5_enabled)
1547			continue;
1548
1549		lvl = "pud";
1550		pud = pud_alloc(&init_mm, p4d, addr);
1551		if (!pud)
1552			goto failed;
1553
1554		if (pgtable_l4_enabled)
1555			continue;
1556
1557		lvl = "pmd";
1558		pmd = pmd_alloc(&init_mm, pud, addr);
1559		if (!pmd)
1560			goto failed;
1561	}
1562	return;
1563
1564failed:
1565	/*
1566	 * The pages have to be there now or they will be missing in
1567	 * process page-tables later.
1568	 */
1569	panic("Failed to pre-allocate %s pages for %s area\n", lvl, area);
1570}
1571
1572void __init pgtable_cache_init(void)
1573{
1574	preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc");
1575	if (IS_ENABLED(CONFIG_MODULES))
1576		preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules");
1577}
1578#endif
1579