162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* 462306a36Sopenharmony_ci * Transitional page tables for kexec and hibernate 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This file derived from: arch/arm64/kernel/hibernate.c 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright (c) 2021, Microsoft Corporation. 962306a36Sopenharmony_ci * Pasha Tatashin <pasha.tatashin@soleen.com> 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci/* 1462306a36Sopenharmony_ci * Transitional tables are used during system transferring from one world to 1562306a36Sopenharmony_ci * another: such as during hibernate restore, and kexec reboots. During these 1662306a36Sopenharmony_ci * phases one cannot rely on page table not being overwritten. This is because 1762306a36Sopenharmony_ci * hibernate and kexec can overwrite the current page tables during transition. 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <asm/trans_pgd.h> 2162306a36Sopenharmony_ci#include <asm/pgalloc.h> 2262306a36Sopenharmony_ci#include <asm/pgtable.h> 2362306a36Sopenharmony_ci#include <linux/suspend.h> 2462306a36Sopenharmony_ci#include <linux/bug.h> 2562306a36Sopenharmony_ci#include <linux/mm.h> 2662306a36Sopenharmony_ci#include <linux/mmzone.h> 2762306a36Sopenharmony_ci#include <linux/kfence.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic void *trans_alloc(struct trans_pgd_info *info) 3062306a36Sopenharmony_ci{ 3162306a36Sopenharmony_ci return info->trans_alloc_page(info->trans_alloc_arg); 3262306a36Sopenharmony_ci} 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_cistatic void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci pte_t pte = READ_ONCE(*src_ptep); 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci if (pte_valid(pte)) { 3962306a36Sopenharmony_ci /* 4062306a36Sopenharmony_ci * Resume will overwrite areas that may be marked 4162306a36Sopenharmony_ci * read only (code, rodata). Clear the RDONLY bit from 4262306a36Sopenharmony_ci * the temporary mappings we use during restore. 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_ci set_pte(dst_ptep, pte_mkwrite_novma(pte)); 4562306a36Sopenharmony_ci } else if ((debug_pagealloc_enabled() || 4662306a36Sopenharmony_ci is_kfence_address((void *)addr)) && !pte_none(pte)) { 4762306a36Sopenharmony_ci /* 4862306a36Sopenharmony_ci * debug_pagealloc will removed the PTE_VALID bit if 4962306a36Sopenharmony_ci * the page isn't in use by the resume kernel. It may have 5062306a36Sopenharmony_ci * been in use by the original kernel, in which case we need 5162306a36Sopenharmony_ci * to put it back in our copy to do the restore. 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * Before marking this entry valid, check the pfn should 5462306a36Sopenharmony_ci * be mapped. 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci BUG_ON(!pfn_valid(pte_pfn(pte))); 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte))); 5962306a36Sopenharmony_ci } 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, 6362306a36Sopenharmony_ci pmd_t *src_pmdp, unsigned long start, unsigned long end) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci pte_t *src_ptep; 6662306a36Sopenharmony_ci pte_t *dst_ptep; 6762306a36Sopenharmony_ci unsigned long addr = start; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci dst_ptep = trans_alloc(info); 7062306a36Sopenharmony_ci if (!dst_ptep) 7162306a36Sopenharmony_ci return -ENOMEM; 7262306a36Sopenharmony_ci pmd_populate_kernel(NULL, dst_pmdp, dst_ptep); 7362306a36Sopenharmony_ci dst_ptep = pte_offset_kernel(dst_pmdp, start); 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci src_ptep = pte_offset_kernel(src_pmdp, start); 7662306a36Sopenharmony_ci do { 7762306a36Sopenharmony_ci _copy_pte(dst_ptep, src_ptep, addr); 7862306a36Sopenharmony_ci } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci return 0; 8162306a36Sopenharmony_ci} 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_cistatic int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, 8462306a36Sopenharmony_ci pud_t *src_pudp, unsigned long start, unsigned long end) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci pmd_t *src_pmdp; 8762306a36Sopenharmony_ci pmd_t *dst_pmdp; 8862306a36Sopenharmony_ci unsigned long next; 8962306a36Sopenharmony_ci unsigned long addr = start; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci if (pud_none(READ_ONCE(*dst_pudp))) { 9262306a36Sopenharmony_ci dst_pmdp = trans_alloc(info); 9362306a36Sopenharmony_ci if (!dst_pmdp) 9462306a36Sopenharmony_ci return -ENOMEM; 9562306a36Sopenharmony_ci pud_populate(NULL, dst_pudp, dst_pmdp); 9662306a36Sopenharmony_ci } 9762306a36Sopenharmony_ci dst_pmdp = pmd_offset(dst_pudp, start); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci src_pmdp = pmd_offset(src_pudp, start); 10062306a36Sopenharmony_ci do { 10162306a36Sopenharmony_ci pmd_t pmd = READ_ONCE(*src_pmdp); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci next = pmd_addr_end(addr, end); 10462306a36Sopenharmony_ci if (pmd_none(pmd)) 10562306a36Sopenharmony_ci continue; 10662306a36Sopenharmony_ci if (pmd_table(pmd)) { 10762306a36Sopenharmony_ci if (copy_pte(info, dst_pmdp, src_pmdp, addr, next)) 10862306a36Sopenharmony_ci return -ENOMEM; 10962306a36Sopenharmony_ci } else { 11062306a36Sopenharmony_ci set_pmd(dst_pmdp, 11162306a36Sopenharmony_ci __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); 11262306a36Sopenharmony_ci } 11362306a36Sopenharmony_ci } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci return 0; 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, 11962306a36Sopenharmony_ci p4d_t *src_p4dp, unsigned long start, 12062306a36Sopenharmony_ci unsigned long end) 12162306a36Sopenharmony_ci{ 12262306a36Sopenharmony_ci pud_t *dst_pudp; 12362306a36Sopenharmony_ci pud_t *src_pudp; 12462306a36Sopenharmony_ci unsigned long next; 12562306a36Sopenharmony_ci unsigned long addr = start; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (p4d_none(READ_ONCE(*dst_p4dp))) { 12862306a36Sopenharmony_ci dst_pudp = trans_alloc(info); 12962306a36Sopenharmony_ci if (!dst_pudp) 13062306a36Sopenharmony_ci return -ENOMEM; 13162306a36Sopenharmony_ci p4d_populate(NULL, dst_p4dp, dst_pudp); 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci dst_pudp = pud_offset(dst_p4dp, start); 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci src_pudp = pud_offset(src_p4dp, start); 13662306a36Sopenharmony_ci do { 13762306a36Sopenharmony_ci pud_t pud = READ_ONCE(*src_pudp); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci next = pud_addr_end(addr, end); 14062306a36Sopenharmony_ci if (pud_none(pud)) 14162306a36Sopenharmony_ci continue; 14262306a36Sopenharmony_ci if (pud_table(pud)) { 14362306a36Sopenharmony_ci if (copy_pmd(info, dst_pudp, src_pudp, addr, next)) 14462306a36Sopenharmony_ci return -ENOMEM; 14562306a36Sopenharmony_ci } else { 14662306a36Sopenharmony_ci set_pud(dst_pudp, 14762306a36Sopenharmony_ci __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci } while (dst_pudp++, src_pudp++, addr = next, addr != end); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci return 0; 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cistatic int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, 15562306a36Sopenharmony_ci pgd_t *src_pgdp, unsigned long start, 15662306a36Sopenharmony_ci unsigned long end) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci p4d_t *dst_p4dp; 15962306a36Sopenharmony_ci p4d_t *src_p4dp; 16062306a36Sopenharmony_ci unsigned long next; 16162306a36Sopenharmony_ci unsigned long addr = start; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci dst_p4dp = p4d_offset(dst_pgdp, start); 16462306a36Sopenharmony_ci src_p4dp = p4d_offset(src_pgdp, start); 16562306a36Sopenharmony_ci do { 16662306a36Sopenharmony_ci next = p4d_addr_end(addr, end); 16762306a36Sopenharmony_ci if (p4d_none(READ_ONCE(*src_p4dp))) 16862306a36Sopenharmony_ci continue; 16962306a36Sopenharmony_ci if (copy_pud(info, dst_p4dp, src_p4dp, addr, next)) 17062306a36Sopenharmony_ci return -ENOMEM; 17162306a36Sopenharmony_ci } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistatic int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp, 17762306a36Sopenharmony_ci unsigned long start, unsigned long end) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci unsigned long next; 18062306a36Sopenharmony_ci unsigned long addr = start; 18162306a36Sopenharmony_ci pgd_t *src_pgdp = pgd_offset_k(start); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci dst_pgdp = pgd_offset_pgd(dst_pgdp, start); 18462306a36Sopenharmony_ci do { 18562306a36Sopenharmony_ci next = pgd_addr_end(addr, end); 18662306a36Sopenharmony_ci if (pgd_none(READ_ONCE(*src_pgdp))) 18762306a36Sopenharmony_ci continue; 18862306a36Sopenharmony_ci if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next)) 18962306a36Sopenharmony_ci return -ENOMEM; 19062306a36Sopenharmony_ci } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci return 0; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci/* 19662306a36Sopenharmony_ci * Create trans_pgd and copy linear map. 19762306a36Sopenharmony_ci * info: contains allocator and its argument 19862306a36Sopenharmony_ci * dst_pgdp: new page table that is created, and to which map is copied. 19962306a36Sopenharmony_ci * start: Start of the interval (inclusive). 20062306a36Sopenharmony_ci * end: End of the interval (exclusive). 20162306a36Sopenharmony_ci * 20262306a36Sopenharmony_ci * Returns 0 on success, and -ENOMEM on failure. 20362306a36Sopenharmony_ci */ 20462306a36Sopenharmony_ciint trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, 20562306a36Sopenharmony_ci unsigned long start, unsigned long end) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci int rc; 20862306a36Sopenharmony_ci pgd_t *trans_pgd = trans_alloc(info); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci if (!trans_pgd) { 21162306a36Sopenharmony_ci pr_err("Failed to allocate memory for temporary page tables.\n"); 21262306a36Sopenharmony_ci return -ENOMEM; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci rc = copy_page_tables(info, trans_pgd, start, end); 21662306a36Sopenharmony_ci if (!rc) 21762306a36Sopenharmony_ci *dst_pgdp = trans_pgd; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci return rc; 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci/* 22362306a36Sopenharmony_ci * The page we want to idmap may be outside the range covered by VA_BITS that 22462306a36Sopenharmony_ci * can be built using the kernel's p?d_populate() helpers. As a one off, for a 22562306a36Sopenharmony_ci * single page, we build these page tables bottom up and just assume that will 22662306a36Sopenharmony_ci * need the maximum T0SZ. 22762306a36Sopenharmony_ci * 22862306a36Sopenharmony_ci * Returns 0 on success, and -ENOMEM on failure. 22962306a36Sopenharmony_ci * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to 23062306a36Sopenharmony_ci * maximum T0SZ for this page. 23162306a36Sopenharmony_ci */ 23262306a36Sopenharmony_ciint trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, 23362306a36Sopenharmony_ci unsigned long *t0sz, void *page) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci phys_addr_t dst_addr = virt_to_phys(page); 23662306a36Sopenharmony_ci unsigned long pfn = __phys_to_pfn(dst_addr); 23762306a36Sopenharmony_ci int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47; 23862306a36Sopenharmony_ci int bits_mapped = PAGE_SHIFT - 4; 23962306a36Sopenharmony_ci unsigned long level_mask, prev_level_entry, *levels[4]; 24062306a36Sopenharmony_ci int this_level, index, level_lsb, level_msb; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci dst_addr &= PAGE_MASK; 24362306a36Sopenharmony_ci prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX)); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci for (this_level = 3; this_level >= 0; this_level--) { 24662306a36Sopenharmony_ci levels[this_level] = trans_alloc(info); 24762306a36Sopenharmony_ci if (!levels[this_level]) 24862306a36Sopenharmony_ci return -ENOMEM; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); 25162306a36Sopenharmony_ci level_msb = min(level_lsb + bits_mapped, max_msb); 25262306a36Sopenharmony_ci level_mask = GENMASK_ULL(level_msb, level_lsb); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci index = (dst_addr & level_mask) >> level_lsb; 25562306a36Sopenharmony_ci *(levels[this_level] + index) = prev_level_entry; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci pfn = virt_to_pfn(levels[this_level]); 25862306a36Sopenharmony_ci prev_level_entry = pte_val(pfn_pte(pfn, 25962306a36Sopenharmony_ci __pgprot(PMD_TYPE_TABLE))); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci if (level_msb == max_msb) 26262306a36Sopenharmony_ci break; 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn)); 26662306a36Sopenharmony_ci *t0sz = TCR_T0SZ(max_msb + 1); 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci return 0; 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci/* 27262306a36Sopenharmony_ci * Create a copy of the vector table so we can call HVC_SET_VECTORS or 27362306a36Sopenharmony_ci * HVC_SOFT_RESTART from contexts where the table may be overwritten. 27462306a36Sopenharmony_ci */ 27562306a36Sopenharmony_ciint trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, 27662306a36Sopenharmony_ci phys_addr_t *el2_vectors) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci void *hyp_stub = trans_alloc(info); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci if (!hyp_stub) 28162306a36Sopenharmony_ci return -ENOMEM; 28262306a36Sopenharmony_ci *el2_vectors = virt_to_phys(hyp_stub); 28362306a36Sopenharmony_ci memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); 28462306a36Sopenharmony_ci caches_clean_inval_pou((unsigned long)hyp_stub, 28562306a36Sopenharmony_ci (unsigned long)hyp_stub + 28662306a36Sopenharmony_ci ARM64_VECTOR_TABLE_LEN); 28762306a36Sopenharmony_ci dcache_clean_inval_poc((unsigned long)hyp_stub, 28862306a36Sopenharmony_ci (unsigned long)hyp_stub + 28962306a36Sopenharmony_ci ARM64_VECTOR_TABLE_LEN); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci return 0; 29262306a36Sopenharmony_ci} 293