18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2012 - Virtual Open Systems and Columbia University 48c2ecf20Sopenharmony_ci * Author: Christoffer Dall <c.dall@virtualopensystems.com> 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include <linux/mman.h> 88c2ecf20Sopenharmony_ci#include <linux/kvm_host.h> 98c2ecf20Sopenharmony_ci#include <linux/io.h> 108c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 118c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 128c2ecf20Sopenharmony_ci#include <trace/events/kvm.h> 138c2ecf20Sopenharmony_ci#include <asm/pgalloc.h> 148c2ecf20Sopenharmony_ci#include <asm/cacheflush.h> 158c2ecf20Sopenharmony_ci#include <asm/kvm_arm.h> 168c2ecf20Sopenharmony_ci#include <asm/kvm_mmu.h> 178c2ecf20Sopenharmony_ci#include <asm/kvm_pgtable.h> 188c2ecf20Sopenharmony_ci#include <asm/kvm_ras.h> 198c2ecf20Sopenharmony_ci#include <asm/kvm_asm.h> 208c2ecf20Sopenharmony_ci#include <asm/kvm_emulate.h> 218c2ecf20Sopenharmony_ci#include <asm/virt.h> 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci#include "trace.h" 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_cistatic struct kvm_pgtable *hyp_pgtable; 268c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(kvm_hyp_pgd_mutex); 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_cistatic unsigned long hyp_idmap_start; 298c2ecf20Sopenharmony_cistatic unsigned long hyp_idmap_end; 308c2ecf20Sopenharmony_cistatic phys_addr_t hyp_idmap_vector; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic unsigned long io_map_base; 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci/* 368c2ecf20Sopenharmony_ci * Release kvm_mmu_lock periodically if the memory region is large. Otherwise, 378c2ecf20Sopenharmony_ci * we may see kernel panics with CONFIG_DETECT_HUNG_TASK, 388c2ecf20Sopenharmony_ci * CONFIG_LOCKUP_DETECTOR, CONFIG_LOCKDEP. Additionally, holding the lock too 398c2ecf20Sopenharmony_ci * long will also starve other vCPUs. We have to also make sure that the page 408c2ecf20Sopenharmony_ci * tables are not freed while we released the lock. 418c2ecf20Sopenharmony_ci */ 428c2ecf20Sopenharmony_cistatic int stage2_apply_range(struct kvm *kvm, phys_addr_t addr, 438c2ecf20Sopenharmony_ci phys_addr_t end, 448c2ecf20Sopenharmony_ci int (*fn)(struct kvm_pgtable *, u64, u64), 458c2ecf20Sopenharmony_ci bool resched) 468c2ecf20Sopenharmony_ci{ 478c2ecf20Sopenharmony_ci int ret; 488c2ecf20Sopenharmony_ci u64 next; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci do { 518c2ecf20Sopenharmony_ci struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; 528c2ecf20Sopenharmony_ci if (!pgt) 538c2ecf20Sopenharmony_ci return -EINVAL; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci next = stage2_pgd_addr_end(kvm, addr, end); 568c2ecf20Sopenharmony_ci ret = fn(pgt, addr, next - addr); 578c2ecf20Sopenharmony_ci if (ret) 588c2ecf20Sopenharmony_ci break; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci if (resched && next != end) 618c2ecf20Sopenharmony_ci cond_resched_lock(&kvm->mmu_lock); 628c2ecf20Sopenharmony_ci } while (addr = next, addr != end); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci return ret; 658c2ecf20Sopenharmony_ci} 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci#define stage2_apply_range_resched(kvm, addr, end, fn) \ 688c2ecf20Sopenharmony_ci stage2_apply_range(kvm, addr, end, fn, true) 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic bool memslot_is_logging(struct kvm_memory_slot *memslot) 718c2ecf20Sopenharmony_ci{ 728c2ecf20Sopenharmony_ci return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); 738c2ecf20Sopenharmony_ci} 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci/** 768c2ecf20Sopenharmony_ci * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 778c2ecf20Sopenharmony_ci * @kvm: pointer to kvm structure. 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci * Interface to HYP function to flush all VM TLB entries 808c2ecf20Sopenharmony_ci */ 818c2ecf20Sopenharmony_civoid kvm_flush_remote_tlbs(struct kvm *kvm) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); 848c2ecf20Sopenharmony_ci} 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_cistatic bool kvm_is_device_pfn(unsigned long pfn) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci return !pfn_valid(pfn); 898c2ecf20Sopenharmony_ci} 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci/* 928c2ecf20Sopenharmony_ci * Unmapping vs dcache management: 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * If a guest maps certain memory pages as uncached, all writes will 958c2ecf20Sopenharmony_ci * bypass the data cache and go directly to RAM. However, the CPUs 968c2ecf20Sopenharmony_ci * can still speculate reads (not writes) and fill cache lines with 978c2ecf20Sopenharmony_ci * data. 988c2ecf20Sopenharmony_ci * 998c2ecf20Sopenharmony_ci * Those cache lines will be *clean* cache lines though, so a 1008c2ecf20Sopenharmony_ci * clean+invalidate operation is equivalent to an invalidate 1018c2ecf20Sopenharmony_ci * operation, because no cache lines are marked dirty. 1028c2ecf20Sopenharmony_ci * 1038c2ecf20Sopenharmony_ci * Those clean cache lines could be filled prior to an uncached write 1048c2ecf20Sopenharmony_ci * by the guest, and the cache coherent IO subsystem would therefore 1058c2ecf20Sopenharmony_ci * end up writing old data to disk. 1068c2ecf20Sopenharmony_ci * 1078c2ecf20Sopenharmony_ci * This is why right after unmapping a page/section and invalidating 1088c2ecf20Sopenharmony_ci * the corresponding TLBs, we flush to make sure the IO subsystem will 1098c2ecf20Sopenharmony_ci * never hit in the cache. 1108c2ecf20Sopenharmony_ci * 1118c2ecf20Sopenharmony_ci * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as 1128c2ecf20Sopenharmony_ci * we then fully enforce cacheability of RAM, no matter what the guest 1138c2ecf20Sopenharmony_ci * does. 1148c2ecf20Sopenharmony_ci */ 1158c2ecf20Sopenharmony_ci/** 1168c2ecf20Sopenharmony_ci * unmap_stage2_range -- Clear stage2 page table entries to unmap a range 1178c2ecf20Sopenharmony_ci * @mmu: The KVM stage-2 MMU pointer 1188c2ecf20Sopenharmony_ci * @start: The intermediate physical base address of the range to unmap 1198c2ecf20Sopenharmony_ci * @size: The size of the area to unmap 1208c2ecf20Sopenharmony_ci * @may_block: Whether or not we are permitted to block 1218c2ecf20Sopenharmony_ci * 1228c2ecf20Sopenharmony_ci * Clear a range of stage-2 mappings, lowering the various ref-counts. Must 1238c2ecf20Sopenharmony_ci * be called while holding mmu_lock (unless for freeing the stage2 pgd before 1248c2ecf20Sopenharmony_ci * destroying the VM), otherwise another faulting VCPU may come in and mess 1258c2ecf20Sopenharmony_ci * with things behind our backs. 1268c2ecf20Sopenharmony_ci */ 1278c2ecf20Sopenharmony_cistatic void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, 1288c2ecf20Sopenharmony_ci bool may_block) 1298c2ecf20Sopenharmony_ci{ 1308c2ecf20Sopenharmony_ci struct kvm *kvm = mmu->kvm; 1318c2ecf20Sopenharmony_ci phys_addr_t end = start + size; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci assert_spin_locked(&kvm->mmu_lock); 1348c2ecf20Sopenharmony_ci WARN_ON(size & ~PAGE_MASK); 1358c2ecf20Sopenharmony_ci WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap, 1368c2ecf20Sopenharmony_ci may_block)); 1378c2ecf20Sopenharmony_ci} 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_cistatic void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) 1408c2ecf20Sopenharmony_ci{ 1418c2ecf20Sopenharmony_ci __unmap_stage2_range(mmu, start, size, true); 1428c2ecf20Sopenharmony_ci} 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_cistatic void stage2_flush_memslot(struct kvm *kvm, 1458c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot) 1468c2ecf20Sopenharmony_ci{ 1478c2ecf20Sopenharmony_ci phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; 1488c2ecf20Sopenharmony_ci phys_addr_t end = addr + PAGE_SIZE * memslot->npages; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush); 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci/** 1548c2ecf20Sopenharmony_ci * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 1558c2ecf20Sopenharmony_ci * @kvm: The struct kvm pointer 1568c2ecf20Sopenharmony_ci * 1578c2ecf20Sopenharmony_ci * Go through the stage 2 page tables and invalidate any cache lines 1588c2ecf20Sopenharmony_ci * backing memory already mapped to the VM. 1598c2ecf20Sopenharmony_ci */ 1608c2ecf20Sopenharmony_cistatic void stage2_flush_vm(struct kvm *kvm) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci struct kvm_memslots *slots; 1638c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot; 1648c2ecf20Sopenharmony_ci int idx; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 1678c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci slots = kvm_memslots(kvm); 1708c2ecf20Sopenharmony_ci kvm_for_each_memslot(memslot, slots) 1718c2ecf20Sopenharmony_ci stage2_flush_memslot(kvm, memslot); 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 1748c2ecf20Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 1758c2ecf20Sopenharmony_ci} 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci/** 1788c2ecf20Sopenharmony_ci * free_hyp_pgds - free Hyp-mode page tables 1798c2ecf20Sopenharmony_ci */ 1808c2ecf20Sopenharmony_civoid free_hyp_pgds(void) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci mutex_lock(&kvm_hyp_pgd_mutex); 1838c2ecf20Sopenharmony_ci if (hyp_pgtable) { 1848c2ecf20Sopenharmony_ci kvm_pgtable_hyp_destroy(hyp_pgtable); 1858c2ecf20Sopenharmony_ci kfree(hyp_pgtable); 1868c2ecf20Sopenharmony_ci } 1878c2ecf20Sopenharmony_ci mutex_unlock(&kvm_hyp_pgd_mutex); 1888c2ecf20Sopenharmony_ci} 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_cistatic int __create_hyp_mappings(unsigned long start, unsigned long size, 1918c2ecf20Sopenharmony_ci unsigned long phys, enum kvm_pgtable_prot prot) 1928c2ecf20Sopenharmony_ci{ 1938c2ecf20Sopenharmony_ci int err; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci mutex_lock(&kvm_hyp_pgd_mutex); 1968c2ecf20Sopenharmony_ci err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot); 1978c2ecf20Sopenharmony_ci mutex_unlock(&kvm_hyp_pgd_mutex); 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci return err; 2008c2ecf20Sopenharmony_ci} 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_cistatic phys_addr_t kvm_kaddr_to_phys(void *kaddr) 2038c2ecf20Sopenharmony_ci{ 2048c2ecf20Sopenharmony_ci if (!is_vmalloc_addr(kaddr)) { 2058c2ecf20Sopenharmony_ci BUG_ON(!virt_addr_valid(kaddr)); 2068c2ecf20Sopenharmony_ci return __pa(kaddr); 2078c2ecf20Sopenharmony_ci } else { 2088c2ecf20Sopenharmony_ci return page_to_phys(vmalloc_to_page(kaddr)) + 2098c2ecf20Sopenharmony_ci offset_in_page(kaddr); 2108c2ecf20Sopenharmony_ci } 2118c2ecf20Sopenharmony_ci} 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci/** 2148c2ecf20Sopenharmony_ci * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode 2158c2ecf20Sopenharmony_ci * @from: The virtual kernel start address of the range 2168c2ecf20Sopenharmony_ci * @to: The virtual kernel end address of the range (exclusive) 2178c2ecf20Sopenharmony_ci * @prot: The protection to be applied to this range 2188c2ecf20Sopenharmony_ci * 2198c2ecf20Sopenharmony_ci * The same virtual address as the kernel virtual address is also used 2208c2ecf20Sopenharmony_ci * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying 2218c2ecf20Sopenharmony_ci * physical pages. 2228c2ecf20Sopenharmony_ci */ 2238c2ecf20Sopenharmony_ciint create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci phys_addr_t phys_addr; 2268c2ecf20Sopenharmony_ci unsigned long virt_addr; 2278c2ecf20Sopenharmony_ci unsigned long start = kern_hyp_va((unsigned long)from); 2288c2ecf20Sopenharmony_ci unsigned long end = kern_hyp_va((unsigned long)to); 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci if (is_kernel_in_hyp_mode()) 2318c2ecf20Sopenharmony_ci return 0; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci start = start & PAGE_MASK; 2348c2ecf20Sopenharmony_ci end = PAGE_ALIGN(end); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { 2378c2ecf20Sopenharmony_ci int err; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); 2408c2ecf20Sopenharmony_ci err = __create_hyp_mappings(virt_addr, PAGE_SIZE, phys_addr, 2418c2ecf20Sopenharmony_ci prot); 2428c2ecf20Sopenharmony_ci if (err) 2438c2ecf20Sopenharmony_ci return err; 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci return 0; 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, 2508c2ecf20Sopenharmony_ci unsigned long *haddr, 2518c2ecf20Sopenharmony_ci enum kvm_pgtable_prot prot) 2528c2ecf20Sopenharmony_ci{ 2538c2ecf20Sopenharmony_ci unsigned long base; 2548c2ecf20Sopenharmony_ci int ret = 0; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci mutex_lock(&kvm_hyp_pgd_mutex); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci /* 2598c2ecf20Sopenharmony_ci * This assumes that we have enough space below the idmap 2608c2ecf20Sopenharmony_ci * page to allocate our VAs. If not, the check below will 2618c2ecf20Sopenharmony_ci * kick. A potential alternative would be to detect that 2628c2ecf20Sopenharmony_ci * overflow and switch to an allocation above the idmap. 2638c2ecf20Sopenharmony_ci * 2648c2ecf20Sopenharmony_ci * The allocated size is always a multiple of PAGE_SIZE. 2658c2ecf20Sopenharmony_ci */ 2668c2ecf20Sopenharmony_ci size = PAGE_ALIGN(size + offset_in_page(phys_addr)); 2678c2ecf20Sopenharmony_ci base = io_map_base - size; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci /* 2708c2ecf20Sopenharmony_ci * Verify that BIT(VA_BITS - 1) hasn't been flipped by 2718c2ecf20Sopenharmony_ci * allocating the new area, as it would indicate we've 2728c2ecf20Sopenharmony_ci * overflowed the idmap/IO address range. 2738c2ecf20Sopenharmony_ci */ 2748c2ecf20Sopenharmony_ci if ((base ^ io_map_base) & BIT(VA_BITS - 1)) 2758c2ecf20Sopenharmony_ci ret = -ENOMEM; 2768c2ecf20Sopenharmony_ci else 2778c2ecf20Sopenharmony_ci io_map_base = base; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci mutex_unlock(&kvm_hyp_pgd_mutex); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci if (ret) 2828c2ecf20Sopenharmony_ci goto out; 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci ret = __create_hyp_mappings(base, size, phys_addr, prot); 2858c2ecf20Sopenharmony_ci if (ret) 2868c2ecf20Sopenharmony_ci goto out; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci *haddr = base + offset_in_page(phys_addr); 2898c2ecf20Sopenharmony_ciout: 2908c2ecf20Sopenharmony_ci return ret; 2918c2ecf20Sopenharmony_ci} 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci/** 2948c2ecf20Sopenharmony_ci * create_hyp_io_mappings - Map IO into both kernel and HYP 2958c2ecf20Sopenharmony_ci * @phys_addr: The physical start address which gets mapped 2968c2ecf20Sopenharmony_ci * @size: Size of the region being mapped 2978c2ecf20Sopenharmony_ci * @kaddr: Kernel VA for this mapping 2988c2ecf20Sopenharmony_ci * @haddr: HYP VA for this mapping 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_ciint create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, 3018c2ecf20Sopenharmony_ci void __iomem **kaddr, 3028c2ecf20Sopenharmony_ci void __iomem **haddr) 3038c2ecf20Sopenharmony_ci{ 3048c2ecf20Sopenharmony_ci unsigned long addr; 3058c2ecf20Sopenharmony_ci int ret; 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci *kaddr = ioremap(phys_addr, size); 3088c2ecf20Sopenharmony_ci if (!*kaddr) 3098c2ecf20Sopenharmony_ci return -ENOMEM; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci if (is_kernel_in_hyp_mode()) { 3128c2ecf20Sopenharmony_ci *haddr = *kaddr; 3138c2ecf20Sopenharmony_ci return 0; 3148c2ecf20Sopenharmony_ci } 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci ret = __create_hyp_private_mapping(phys_addr, size, 3178c2ecf20Sopenharmony_ci &addr, PAGE_HYP_DEVICE); 3188c2ecf20Sopenharmony_ci if (ret) { 3198c2ecf20Sopenharmony_ci iounmap(*kaddr); 3208c2ecf20Sopenharmony_ci *kaddr = NULL; 3218c2ecf20Sopenharmony_ci *haddr = NULL; 3228c2ecf20Sopenharmony_ci return ret; 3238c2ecf20Sopenharmony_ci } 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci *haddr = (void __iomem *)addr; 3268c2ecf20Sopenharmony_ci return 0; 3278c2ecf20Sopenharmony_ci} 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci/** 3308c2ecf20Sopenharmony_ci * create_hyp_exec_mappings - Map an executable range into HYP 3318c2ecf20Sopenharmony_ci * @phys_addr: The physical start address which gets mapped 3328c2ecf20Sopenharmony_ci * @size: Size of the region being mapped 3338c2ecf20Sopenharmony_ci * @haddr: HYP VA for this mapping 3348c2ecf20Sopenharmony_ci */ 3358c2ecf20Sopenharmony_ciint create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, 3368c2ecf20Sopenharmony_ci void **haddr) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci unsigned long addr; 3398c2ecf20Sopenharmony_ci int ret; 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci BUG_ON(is_kernel_in_hyp_mode()); 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci ret = __create_hyp_private_mapping(phys_addr, size, 3448c2ecf20Sopenharmony_ci &addr, PAGE_HYP_EXEC); 3458c2ecf20Sopenharmony_ci if (ret) { 3468c2ecf20Sopenharmony_ci *haddr = NULL; 3478c2ecf20Sopenharmony_ci return ret; 3488c2ecf20Sopenharmony_ci } 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci *haddr = (void *)addr; 3518c2ecf20Sopenharmony_ci return 0; 3528c2ecf20Sopenharmony_ci} 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci/** 3558c2ecf20Sopenharmony_ci * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure 3568c2ecf20Sopenharmony_ci * @kvm: The pointer to the KVM structure 3578c2ecf20Sopenharmony_ci * @mmu: The pointer to the s2 MMU structure 3588c2ecf20Sopenharmony_ci * 3598c2ecf20Sopenharmony_ci * Allocates only the stage-2 HW PGD level table(s). 3608c2ecf20Sopenharmony_ci * Note we don't need locking here as this is only called when the VM is 3618c2ecf20Sopenharmony_ci * created, which can only be done once. 3628c2ecf20Sopenharmony_ci */ 3638c2ecf20Sopenharmony_ciint kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) 3648c2ecf20Sopenharmony_ci{ 3658c2ecf20Sopenharmony_ci int cpu, err; 3668c2ecf20Sopenharmony_ci struct kvm_pgtable *pgt; 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci if (mmu->pgt != NULL) { 3698c2ecf20Sopenharmony_ci kvm_err("kvm_arch already initialized?\n"); 3708c2ecf20Sopenharmony_ci return -EINVAL; 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci pgt = kzalloc(sizeof(*pgt), GFP_KERNEL); 3748c2ecf20Sopenharmony_ci if (!pgt) 3758c2ecf20Sopenharmony_ci return -ENOMEM; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci err = kvm_pgtable_stage2_init(pgt, kvm); 3788c2ecf20Sopenharmony_ci if (err) 3798c2ecf20Sopenharmony_ci goto out_free_pgtable; 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); 3828c2ecf20Sopenharmony_ci if (!mmu->last_vcpu_ran) { 3838c2ecf20Sopenharmony_ci err = -ENOMEM; 3848c2ecf20Sopenharmony_ci goto out_destroy_pgtable; 3858c2ecf20Sopenharmony_ci } 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 3888c2ecf20Sopenharmony_ci *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci mmu->kvm = kvm; 3918c2ecf20Sopenharmony_ci mmu->pgt = pgt; 3928c2ecf20Sopenharmony_ci mmu->pgd_phys = __pa(pgt->pgd); 3938c2ecf20Sopenharmony_ci mmu->vmid.vmid_gen = 0; 3948c2ecf20Sopenharmony_ci return 0; 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ciout_destroy_pgtable: 3978c2ecf20Sopenharmony_ci kvm_pgtable_stage2_destroy(pgt); 3988c2ecf20Sopenharmony_ciout_free_pgtable: 3998c2ecf20Sopenharmony_ci kfree(pgt); 4008c2ecf20Sopenharmony_ci return err; 4018c2ecf20Sopenharmony_ci} 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_cistatic void stage2_unmap_memslot(struct kvm *kvm, 4048c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot) 4058c2ecf20Sopenharmony_ci{ 4068c2ecf20Sopenharmony_ci hva_t hva = memslot->userspace_addr; 4078c2ecf20Sopenharmony_ci phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; 4088c2ecf20Sopenharmony_ci phys_addr_t size = PAGE_SIZE * memslot->npages; 4098c2ecf20Sopenharmony_ci hva_t reg_end = hva + size; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci /* 4128c2ecf20Sopenharmony_ci * A memory region could potentially cover multiple VMAs, and any holes 4138c2ecf20Sopenharmony_ci * between them, so iterate over all of them to find out if we should 4148c2ecf20Sopenharmony_ci * unmap any of them. 4158c2ecf20Sopenharmony_ci * 4168c2ecf20Sopenharmony_ci * +--------------------------------------------+ 4178c2ecf20Sopenharmony_ci * +---------------+----------------+ +----------------+ 4188c2ecf20Sopenharmony_ci * | : VMA 1 | VMA 2 | | VMA 3 : | 4198c2ecf20Sopenharmony_ci * +---------------+----------------+ +----------------+ 4208c2ecf20Sopenharmony_ci * | memory region | 4218c2ecf20Sopenharmony_ci * +--------------------------------------------+ 4228c2ecf20Sopenharmony_ci */ 4238c2ecf20Sopenharmony_ci do { 4248c2ecf20Sopenharmony_ci struct vm_area_struct *vma = find_vma(current->mm, hva); 4258c2ecf20Sopenharmony_ci hva_t vm_start, vm_end; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci if (!vma || vma->vm_start >= reg_end) 4288c2ecf20Sopenharmony_ci break; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci /* 4318c2ecf20Sopenharmony_ci * Take the intersection of this VMA with the memory region 4328c2ecf20Sopenharmony_ci */ 4338c2ecf20Sopenharmony_ci vm_start = max(hva, vma->vm_start); 4348c2ecf20Sopenharmony_ci vm_end = min(reg_end, vma->vm_end); 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci if (!(vma->vm_flags & VM_PFNMAP)) { 4378c2ecf20Sopenharmony_ci gpa_t gpa = addr + (vm_start - memslot->userspace_addr); 4388c2ecf20Sopenharmony_ci unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); 4398c2ecf20Sopenharmony_ci } 4408c2ecf20Sopenharmony_ci hva = vm_end; 4418c2ecf20Sopenharmony_ci } while (hva < reg_end); 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci/** 4458c2ecf20Sopenharmony_ci * stage2_unmap_vm - Unmap Stage-2 RAM mappings 4468c2ecf20Sopenharmony_ci * @kvm: The struct kvm pointer 4478c2ecf20Sopenharmony_ci * 4488c2ecf20Sopenharmony_ci * Go through the memregions and unmap any regular RAM 4498c2ecf20Sopenharmony_ci * backing memory already mapped to the VM. 4508c2ecf20Sopenharmony_ci */ 4518c2ecf20Sopenharmony_civoid stage2_unmap_vm(struct kvm *kvm) 4528c2ecf20Sopenharmony_ci{ 4538c2ecf20Sopenharmony_ci struct kvm_memslots *slots; 4548c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot; 4558c2ecf20Sopenharmony_ci int idx; 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 4588c2ecf20Sopenharmony_ci mmap_read_lock(current->mm); 4598c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci slots = kvm_memslots(kvm); 4628c2ecf20Sopenharmony_ci kvm_for_each_memslot(memslot, slots) 4638c2ecf20Sopenharmony_ci stage2_unmap_memslot(kvm, memslot); 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 4668c2ecf20Sopenharmony_ci mmap_read_unlock(current->mm); 4678c2ecf20Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 4688c2ecf20Sopenharmony_ci} 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_civoid kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) 4718c2ecf20Sopenharmony_ci{ 4728c2ecf20Sopenharmony_ci struct kvm *kvm = mmu->kvm; 4738c2ecf20Sopenharmony_ci struct kvm_pgtable *pgt = NULL; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 4768c2ecf20Sopenharmony_ci pgt = mmu->pgt; 4778c2ecf20Sopenharmony_ci if (pgt) { 4788c2ecf20Sopenharmony_ci mmu->pgd_phys = 0; 4798c2ecf20Sopenharmony_ci mmu->pgt = NULL; 4808c2ecf20Sopenharmony_ci free_percpu(mmu->last_vcpu_ran); 4818c2ecf20Sopenharmony_ci } 4828c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci if (pgt) { 4858c2ecf20Sopenharmony_ci kvm_pgtable_stage2_destroy(pgt); 4868c2ecf20Sopenharmony_ci kfree(pgt); 4878c2ecf20Sopenharmony_ci } 4888c2ecf20Sopenharmony_ci} 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci/** 4918c2ecf20Sopenharmony_ci * kvm_phys_addr_ioremap - map a device range to guest IPA 4928c2ecf20Sopenharmony_ci * 4938c2ecf20Sopenharmony_ci * @kvm: The KVM pointer 4948c2ecf20Sopenharmony_ci * @guest_ipa: The IPA at which to insert the mapping 4958c2ecf20Sopenharmony_ci * @pa: The physical address of the device 4968c2ecf20Sopenharmony_ci * @size: The size of the mapping 4978c2ecf20Sopenharmony_ci * @writable: Whether or not to create a writable mapping 4988c2ecf20Sopenharmony_ci */ 4998c2ecf20Sopenharmony_ciint kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 5008c2ecf20Sopenharmony_ci phys_addr_t pa, unsigned long size, bool writable) 5018c2ecf20Sopenharmony_ci{ 5028c2ecf20Sopenharmony_ci phys_addr_t addr; 5038c2ecf20Sopenharmony_ci int ret = 0; 5048c2ecf20Sopenharmony_ci struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; 5058c2ecf20Sopenharmony_ci struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; 5068c2ecf20Sopenharmony_ci enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | 5078c2ecf20Sopenharmony_ci KVM_PGTABLE_PROT_R | 5088c2ecf20Sopenharmony_ci (writable ? KVM_PGTABLE_PROT_W : 0); 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_ci size += offset_in_page(guest_ipa); 5118c2ecf20Sopenharmony_ci guest_ipa &= PAGE_MASK; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) { 5148c2ecf20Sopenharmony_ci ret = kvm_mmu_topup_memory_cache(&cache, 5158c2ecf20Sopenharmony_ci kvm_mmu_cache_min_pages(kvm)); 5168c2ecf20Sopenharmony_ci if (ret) 5178c2ecf20Sopenharmony_ci break; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 5208c2ecf20Sopenharmony_ci ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, 5218c2ecf20Sopenharmony_ci &cache); 5228c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 5238c2ecf20Sopenharmony_ci if (ret) 5248c2ecf20Sopenharmony_ci break; 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_ci pa += PAGE_SIZE; 5278c2ecf20Sopenharmony_ci } 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci kvm_mmu_free_memory_cache(&cache); 5308c2ecf20Sopenharmony_ci return ret; 5318c2ecf20Sopenharmony_ci} 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci/** 5348c2ecf20Sopenharmony_ci * stage2_wp_range() - write protect stage2 memory region range 5358c2ecf20Sopenharmony_ci * @mmu: The KVM stage-2 MMU pointer 5368c2ecf20Sopenharmony_ci * @addr: Start address of range 5378c2ecf20Sopenharmony_ci * @end: End address of range 5388c2ecf20Sopenharmony_ci */ 5398c2ecf20Sopenharmony_cistatic void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) 5408c2ecf20Sopenharmony_ci{ 5418c2ecf20Sopenharmony_ci struct kvm *kvm = mmu->kvm; 5428c2ecf20Sopenharmony_ci stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect); 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/** 5468c2ecf20Sopenharmony_ci * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot 5478c2ecf20Sopenharmony_ci * @kvm: The KVM pointer 5488c2ecf20Sopenharmony_ci * @slot: The memory slot to write protect 5498c2ecf20Sopenharmony_ci * 5508c2ecf20Sopenharmony_ci * Called to start logging dirty pages after memory region 5518c2ecf20Sopenharmony_ci * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns 5528c2ecf20Sopenharmony_ci * all present PUD, PMD and PTEs are write protected in the memory region. 5538c2ecf20Sopenharmony_ci * Afterwards read of dirty page log can be called. 5548c2ecf20Sopenharmony_ci * 5558c2ecf20Sopenharmony_ci * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, 5568c2ecf20Sopenharmony_ci * serializing operations for VM memory regions. 5578c2ecf20Sopenharmony_ci */ 5588c2ecf20Sopenharmony_civoid kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci struct kvm_memslots *slots = kvm_memslots(kvm); 5618c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); 5628c2ecf20Sopenharmony_ci phys_addr_t start, end; 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!memslot)) 5658c2ecf20Sopenharmony_ci return; 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci start = memslot->base_gfn << PAGE_SHIFT; 5688c2ecf20Sopenharmony_ci end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 5718c2ecf20Sopenharmony_ci stage2_wp_range(&kvm->arch.mmu, start, end); 5728c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 5738c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 5748c2ecf20Sopenharmony_ci} 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci/** 5778c2ecf20Sopenharmony_ci * kvm_mmu_write_protect_pt_masked() - write protect dirty pages 5788c2ecf20Sopenharmony_ci * @kvm: The KVM pointer 5798c2ecf20Sopenharmony_ci * @slot: The memory slot associated with mask 5808c2ecf20Sopenharmony_ci * @gfn_offset: The gfn offset in memory slot 5818c2ecf20Sopenharmony_ci * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory 5828c2ecf20Sopenharmony_ci * slot to be write protected 5838c2ecf20Sopenharmony_ci * 5848c2ecf20Sopenharmony_ci * Walks bits set in mask write protects the associated pte's. Caller must 5858c2ecf20Sopenharmony_ci * acquire kvm_mmu_lock. 5868c2ecf20Sopenharmony_ci */ 5878c2ecf20Sopenharmony_cistatic void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 5888c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, 5898c2ecf20Sopenharmony_ci gfn_t gfn_offset, unsigned long mask) 5908c2ecf20Sopenharmony_ci{ 5918c2ecf20Sopenharmony_ci phys_addr_t base_gfn = slot->base_gfn + gfn_offset; 5928c2ecf20Sopenharmony_ci phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; 5938c2ecf20Sopenharmony_ci phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci stage2_wp_range(&kvm->arch.mmu, start, end); 5968c2ecf20Sopenharmony_ci} 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci/* 5998c2ecf20Sopenharmony_ci * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected 6008c2ecf20Sopenharmony_ci * dirty pages. 6018c2ecf20Sopenharmony_ci * 6028c2ecf20Sopenharmony_ci * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to 6038c2ecf20Sopenharmony_ci * enable dirty logging for them. 6048c2ecf20Sopenharmony_ci */ 6058c2ecf20Sopenharmony_civoid kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, 6068c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, 6078c2ecf20Sopenharmony_ci gfn_t gfn_offset, unsigned long mask) 6088c2ecf20Sopenharmony_ci{ 6098c2ecf20Sopenharmony_ci kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); 6108c2ecf20Sopenharmony_ci} 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_cistatic void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) 6138c2ecf20Sopenharmony_ci{ 6148c2ecf20Sopenharmony_ci __clean_dcache_guest_page(pfn, size); 6158c2ecf20Sopenharmony_ci} 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_cistatic void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) 6188c2ecf20Sopenharmony_ci{ 6198c2ecf20Sopenharmony_ci __invalidate_icache_guest_page(pfn, size); 6208c2ecf20Sopenharmony_ci} 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_cistatic void kvm_send_hwpoison_signal(unsigned long address, short lsb) 6238c2ecf20Sopenharmony_ci{ 6248c2ecf20Sopenharmony_ci send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); 6258c2ecf20Sopenharmony_ci} 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_cistatic bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, 6288c2ecf20Sopenharmony_ci unsigned long hva, 6298c2ecf20Sopenharmony_ci unsigned long map_size) 6308c2ecf20Sopenharmony_ci{ 6318c2ecf20Sopenharmony_ci gpa_t gpa_start; 6328c2ecf20Sopenharmony_ci hva_t uaddr_start, uaddr_end; 6338c2ecf20Sopenharmony_ci size_t size; 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */ 6368c2ecf20Sopenharmony_ci if (map_size == PAGE_SIZE) 6378c2ecf20Sopenharmony_ci return true; 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci size = memslot->npages * PAGE_SIZE; 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci gpa_start = memslot->base_gfn << PAGE_SHIFT; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci uaddr_start = memslot->userspace_addr; 6448c2ecf20Sopenharmony_ci uaddr_end = uaddr_start + size; 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci /* 6478c2ecf20Sopenharmony_ci * Pages belonging to memslots that don't have the same alignment 6488c2ecf20Sopenharmony_ci * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2 6498c2ecf20Sopenharmony_ci * PMD/PUD entries, because we'll end up mapping the wrong pages. 6508c2ecf20Sopenharmony_ci * 6518c2ecf20Sopenharmony_ci * Consider a layout like the following: 6528c2ecf20Sopenharmony_ci * 6538c2ecf20Sopenharmony_ci * memslot->userspace_addr: 6548c2ecf20Sopenharmony_ci * +-----+--------------------+--------------------+---+ 6558c2ecf20Sopenharmony_ci * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| 6568c2ecf20Sopenharmony_ci * +-----+--------------------+--------------------+---+ 6578c2ecf20Sopenharmony_ci * 6588c2ecf20Sopenharmony_ci * memslot->base_gfn << PAGE_SHIFT: 6598c2ecf20Sopenharmony_ci * +---+--------------------+--------------------+-----+ 6608c2ecf20Sopenharmony_ci * |abc|def Stage-2 block | Stage-2 block |tvxyz| 6618c2ecf20Sopenharmony_ci * +---+--------------------+--------------------+-----+ 6628c2ecf20Sopenharmony_ci * 6638c2ecf20Sopenharmony_ci * If we create those stage-2 blocks, we'll end up with this incorrect 6648c2ecf20Sopenharmony_ci * mapping: 6658c2ecf20Sopenharmony_ci * d -> f 6668c2ecf20Sopenharmony_ci * e -> g 6678c2ecf20Sopenharmony_ci * f -> h 6688c2ecf20Sopenharmony_ci */ 6698c2ecf20Sopenharmony_ci if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) 6708c2ecf20Sopenharmony_ci return false; 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci /* 6738c2ecf20Sopenharmony_ci * Next, let's make sure we're not trying to map anything not covered 6748c2ecf20Sopenharmony_ci * by the memslot. This means we have to prohibit block size mappings 6758c2ecf20Sopenharmony_ci * for the beginning and end of a non-block aligned and non-block sized 6768c2ecf20Sopenharmony_ci * memory slot (illustrated by the head and tail parts of the 6778c2ecf20Sopenharmony_ci * userspace view above containing pages 'abcde' and 'xyz', 6788c2ecf20Sopenharmony_ci * respectively). 6798c2ecf20Sopenharmony_ci * 6808c2ecf20Sopenharmony_ci * Note that it doesn't matter if we do the check using the 6818c2ecf20Sopenharmony_ci * userspace_addr or the base_gfn, as both are equally aligned (per 6828c2ecf20Sopenharmony_ci * the check above) and equally sized. 6838c2ecf20Sopenharmony_ci */ 6848c2ecf20Sopenharmony_ci return (hva & ~(map_size - 1)) >= uaddr_start && 6858c2ecf20Sopenharmony_ci (hva & ~(map_size - 1)) + map_size <= uaddr_end; 6868c2ecf20Sopenharmony_ci} 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci/* 6898c2ecf20Sopenharmony_ci * Check if the given hva is backed by a transparent huge page (THP) and 6908c2ecf20Sopenharmony_ci * whether it can be mapped using block mapping in stage2. If so, adjust 6918c2ecf20Sopenharmony_ci * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently 6928c2ecf20Sopenharmony_ci * supported. This will need to be updated to support other THP sizes. 6938c2ecf20Sopenharmony_ci * 6948c2ecf20Sopenharmony_ci * Returns the size of the mapping. 6958c2ecf20Sopenharmony_ci */ 6968c2ecf20Sopenharmony_cistatic unsigned long 6978c2ecf20Sopenharmony_citransparent_hugepage_adjust(struct kvm_memory_slot *memslot, 6988c2ecf20Sopenharmony_ci unsigned long hva, kvm_pfn_t *pfnp, 6998c2ecf20Sopenharmony_ci phys_addr_t *ipap) 7008c2ecf20Sopenharmony_ci{ 7018c2ecf20Sopenharmony_ci kvm_pfn_t pfn = *pfnp; 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci /* 7048c2ecf20Sopenharmony_ci * Make sure the adjustment is done only for THP pages. Also make 7058c2ecf20Sopenharmony_ci * sure that the HVA and IPA are sufficiently aligned and that the 7068c2ecf20Sopenharmony_ci * block map is contained within the memslot. 7078c2ecf20Sopenharmony_ci */ 7088c2ecf20Sopenharmony_ci if (kvm_is_transparent_hugepage(pfn) && 7098c2ecf20Sopenharmony_ci fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { 7108c2ecf20Sopenharmony_ci /* 7118c2ecf20Sopenharmony_ci * The address we faulted on is backed by a transparent huge 7128c2ecf20Sopenharmony_ci * page. However, because we map the compound huge page and 7138c2ecf20Sopenharmony_ci * not the individual tail page, we need to transfer the 7148c2ecf20Sopenharmony_ci * refcount to the head page. We have to be careful that the 7158c2ecf20Sopenharmony_ci * THP doesn't start to split while we are adjusting the 7168c2ecf20Sopenharmony_ci * refcounts. 7178c2ecf20Sopenharmony_ci * 7188c2ecf20Sopenharmony_ci * We are sure this doesn't happen, because mmu_notifier_retry 7198c2ecf20Sopenharmony_ci * was successful and we are holding the mmu_lock, so if this 7208c2ecf20Sopenharmony_ci * THP is trying to split, it will be blocked in the mmu 7218c2ecf20Sopenharmony_ci * notifier before touching any of the pages, specifically 7228c2ecf20Sopenharmony_ci * before being able to call __split_huge_page_refcount(). 7238c2ecf20Sopenharmony_ci * 7248c2ecf20Sopenharmony_ci * We can therefore safely transfer the refcount from PG_tail 7258c2ecf20Sopenharmony_ci * to PG_head and switch the pfn from a tail page to the head 7268c2ecf20Sopenharmony_ci * page accordingly. 7278c2ecf20Sopenharmony_ci */ 7288c2ecf20Sopenharmony_ci *ipap &= PMD_MASK; 7298c2ecf20Sopenharmony_ci kvm_release_pfn_clean(pfn); 7308c2ecf20Sopenharmony_ci pfn &= ~(PTRS_PER_PMD - 1); 7318c2ecf20Sopenharmony_ci kvm_get_pfn(pfn); 7328c2ecf20Sopenharmony_ci *pfnp = pfn; 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci return PMD_SIZE; 7358c2ecf20Sopenharmony_ci } 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_ci /* Use page mapping if we cannot use block mapping. */ 7388c2ecf20Sopenharmony_ci return PAGE_SIZE; 7398c2ecf20Sopenharmony_ci} 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_cistatic int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 7428c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot, unsigned long hva, 7438c2ecf20Sopenharmony_ci unsigned long fault_status) 7448c2ecf20Sopenharmony_ci{ 7458c2ecf20Sopenharmony_ci int ret = 0; 7468c2ecf20Sopenharmony_ci bool write_fault, writable, force_pte = false; 7478c2ecf20Sopenharmony_ci bool exec_fault; 7488c2ecf20Sopenharmony_ci bool device = false; 7498c2ecf20Sopenharmony_ci unsigned long mmu_seq; 7508c2ecf20Sopenharmony_ci struct kvm *kvm = vcpu->kvm; 7518c2ecf20Sopenharmony_ci struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 7528c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 7538c2ecf20Sopenharmony_ci short vma_shift; 7548c2ecf20Sopenharmony_ci gfn_t gfn; 7558c2ecf20Sopenharmony_ci kvm_pfn_t pfn; 7568c2ecf20Sopenharmony_ci bool logging_active = memslot_is_logging(memslot); 7578c2ecf20Sopenharmony_ci unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); 7588c2ecf20Sopenharmony_ci unsigned long vma_pagesize, fault_granule; 7598c2ecf20Sopenharmony_ci enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; 7608c2ecf20Sopenharmony_ci struct kvm_pgtable *pgt; 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); 7638c2ecf20Sopenharmony_ci write_fault = kvm_is_write_fault(vcpu); 7648c2ecf20Sopenharmony_ci exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); 7658c2ecf20Sopenharmony_ci VM_BUG_ON(write_fault && exec_fault); 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci if (fault_status == FSC_PERM && !write_fault && !exec_fault) { 7688c2ecf20Sopenharmony_ci kvm_err("Unexpected L2 read permission error\n"); 7698c2ecf20Sopenharmony_ci return -EFAULT; 7708c2ecf20Sopenharmony_ci } 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci /* Let's check if we will get back a huge page backed by hugetlbfs */ 7738c2ecf20Sopenharmony_ci mmap_read_lock(current->mm); 7748c2ecf20Sopenharmony_ci vma = find_vma_intersection(current->mm, hva, hva + 1); 7758c2ecf20Sopenharmony_ci if (unlikely(!vma)) { 7768c2ecf20Sopenharmony_ci kvm_err("Failed to find VMA for hva 0x%lx\n", hva); 7778c2ecf20Sopenharmony_ci mmap_read_unlock(current->mm); 7788c2ecf20Sopenharmony_ci return -EFAULT; 7798c2ecf20Sopenharmony_ci } 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci if (is_vm_hugetlb_page(vma)) 7828c2ecf20Sopenharmony_ci vma_shift = huge_page_shift(hstate_vma(vma)); 7838c2ecf20Sopenharmony_ci else 7848c2ecf20Sopenharmony_ci vma_shift = PAGE_SHIFT; 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (logging_active || 7878c2ecf20Sopenharmony_ci (vma->vm_flags & VM_PFNMAP)) { 7888c2ecf20Sopenharmony_ci force_pte = true; 7898c2ecf20Sopenharmony_ci vma_shift = PAGE_SHIFT; 7908c2ecf20Sopenharmony_ci } 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci switch (vma_shift) { 7938c2ecf20Sopenharmony_ci#ifndef __PAGETABLE_PMD_FOLDED 7948c2ecf20Sopenharmony_ci case PUD_SHIFT: 7958c2ecf20Sopenharmony_ci if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) 7968c2ecf20Sopenharmony_ci break; 7978c2ecf20Sopenharmony_ci fallthrough; 7988c2ecf20Sopenharmony_ci#endif 7998c2ecf20Sopenharmony_ci case CONT_PMD_SHIFT: 8008c2ecf20Sopenharmony_ci vma_shift = PMD_SHIFT; 8018c2ecf20Sopenharmony_ci fallthrough; 8028c2ecf20Sopenharmony_ci case PMD_SHIFT: 8038c2ecf20Sopenharmony_ci if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) 8048c2ecf20Sopenharmony_ci break; 8058c2ecf20Sopenharmony_ci fallthrough; 8068c2ecf20Sopenharmony_ci case CONT_PTE_SHIFT: 8078c2ecf20Sopenharmony_ci vma_shift = PAGE_SHIFT; 8088c2ecf20Sopenharmony_ci force_pte = true; 8098c2ecf20Sopenharmony_ci fallthrough; 8108c2ecf20Sopenharmony_ci case PAGE_SHIFT: 8118c2ecf20Sopenharmony_ci break; 8128c2ecf20Sopenharmony_ci default: 8138c2ecf20Sopenharmony_ci WARN_ONCE(1, "Unknown vma_shift %d", vma_shift); 8148c2ecf20Sopenharmony_ci } 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci vma_pagesize = 1UL << vma_shift; 8178c2ecf20Sopenharmony_ci if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) 8188c2ecf20Sopenharmony_ci fault_ipa &= ~(vma_pagesize - 1); 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci gfn = fault_ipa >> PAGE_SHIFT; 8218c2ecf20Sopenharmony_ci mmap_read_unlock(current->mm); 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci /* 8248c2ecf20Sopenharmony_ci * Permission faults just need to update the existing leaf entry, 8258c2ecf20Sopenharmony_ci * and so normally don't require allocations from the memcache. The 8268c2ecf20Sopenharmony_ci * only exception to this is when dirty logging is enabled at runtime 8278c2ecf20Sopenharmony_ci * and a write fault needs to collapse a block entry into a table. 8288c2ecf20Sopenharmony_ci */ 8298c2ecf20Sopenharmony_ci if (fault_status != FSC_PERM || (logging_active && write_fault)) { 8308c2ecf20Sopenharmony_ci ret = kvm_mmu_topup_memory_cache(memcache, 8318c2ecf20Sopenharmony_ci kvm_mmu_cache_min_pages(kvm)); 8328c2ecf20Sopenharmony_ci if (ret) 8338c2ecf20Sopenharmony_ci return ret; 8348c2ecf20Sopenharmony_ci } 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci mmu_seq = vcpu->kvm->mmu_notifier_seq; 8378c2ecf20Sopenharmony_ci /* 8388c2ecf20Sopenharmony_ci * Ensure the read of mmu_notifier_seq happens before we call 8398c2ecf20Sopenharmony_ci * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk 8408c2ecf20Sopenharmony_ci * the page we just got a reference to gets unmapped before we have a 8418c2ecf20Sopenharmony_ci * chance to grab the mmu_lock, which ensure that if the page gets 8428c2ecf20Sopenharmony_ci * unmapped afterwards, the call to kvm_unmap_hva will take it away 8438c2ecf20Sopenharmony_ci * from us again properly. This smp_rmb() interacts with the smp_wmb() 8448c2ecf20Sopenharmony_ci * in kvm_mmu_notifier_invalidate_<page|range_end>. 8458c2ecf20Sopenharmony_ci */ 8468c2ecf20Sopenharmony_ci smp_rmb(); 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); 8498c2ecf20Sopenharmony_ci if (pfn == KVM_PFN_ERR_HWPOISON) { 8508c2ecf20Sopenharmony_ci kvm_send_hwpoison_signal(hva, vma_shift); 8518c2ecf20Sopenharmony_ci return 0; 8528c2ecf20Sopenharmony_ci } 8538c2ecf20Sopenharmony_ci if (is_error_noslot_pfn(pfn)) 8548c2ecf20Sopenharmony_ci return -EFAULT; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci if (kvm_is_device_pfn(pfn)) { 8578c2ecf20Sopenharmony_ci device = true; 8588c2ecf20Sopenharmony_ci force_pte = true; 8598c2ecf20Sopenharmony_ci } else if (logging_active && !write_fault) { 8608c2ecf20Sopenharmony_ci /* 8618c2ecf20Sopenharmony_ci * Only actually map the page as writable if this was a write 8628c2ecf20Sopenharmony_ci * fault. 8638c2ecf20Sopenharmony_ci */ 8648c2ecf20Sopenharmony_ci writable = false; 8658c2ecf20Sopenharmony_ci } 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_ci if (exec_fault && device) 8688c2ecf20Sopenharmony_ci return -ENOEXEC; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 8718c2ecf20Sopenharmony_ci pgt = vcpu->arch.hw_mmu->pgt; 8728c2ecf20Sopenharmony_ci if (mmu_notifier_retry(kvm, mmu_seq)) 8738c2ecf20Sopenharmony_ci goto out_unlock; 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci /* 8768c2ecf20Sopenharmony_ci * If we are not forced to use page mapping, check if we are 8778c2ecf20Sopenharmony_ci * backed by a THP and thus use block mapping if possible. 8788c2ecf20Sopenharmony_ci */ 8798c2ecf20Sopenharmony_ci if (vma_pagesize == PAGE_SIZE && !force_pte) 8808c2ecf20Sopenharmony_ci vma_pagesize = transparent_hugepage_adjust(memslot, hva, 8818c2ecf20Sopenharmony_ci &pfn, &fault_ipa); 8828c2ecf20Sopenharmony_ci if (writable) { 8838c2ecf20Sopenharmony_ci prot |= KVM_PGTABLE_PROT_W; 8848c2ecf20Sopenharmony_ci kvm_set_pfn_dirty(pfn); 8858c2ecf20Sopenharmony_ci mark_page_dirty(kvm, gfn); 8868c2ecf20Sopenharmony_ci } 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci if (fault_status != FSC_PERM && !device) 8898c2ecf20Sopenharmony_ci clean_dcache_guest_page(pfn, vma_pagesize); 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci if (exec_fault) { 8928c2ecf20Sopenharmony_ci prot |= KVM_PGTABLE_PROT_X; 8938c2ecf20Sopenharmony_ci invalidate_icache_guest_page(pfn, vma_pagesize); 8948c2ecf20Sopenharmony_ci } 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci if (device) 8978c2ecf20Sopenharmony_ci prot |= KVM_PGTABLE_PROT_DEVICE; 8988c2ecf20Sopenharmony_ci else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) 8998c2ecf20Sopenharmony_ci prot |= KVM_PGTABLE_PROT_X; 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci /* 9028c2ecf20Sopenharmony_ci * Under the premise of getting a FSC_PERM fault, we just need to relax 9038c2ecf20Sopenharmony_ci * permissions only if vma_pagesize equals fault_granule. Otherwise, 9048c2ecf20Sopenharmony_ci * kvm_pgtable_stage2_map() should be called to change block size. 9058c2ecf20Sopenharmony_ci */ 9068c2ecf20Sopenharmony_ci if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { 9078c2ecf20Sopenharmony_ci ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); 9088c2ecf20Sopenharmony_ci } else { 9098c2ecf20Sopenharmony_ci ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, 9108c2ecf20Sopenharmony_ci __pfn_to_phys(pfn), prot, 9118c2ecf20Sopenharmony_ci memcache); 9128c2ecf20Sopenharmony_ci } 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ciout_unlock: 9158c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 9168c2ecf20Sopenharmony_ci kvm_set_pfn_accessed(pfn); 9178c2ecf20Sopenharmony_ci kvm_release_pfn_clean(pfn); 9188c2ecf20Sopenharmony_ci return ret; 9198c2ecf20Sopenharmony_ci} 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci/* Resolve the access fault by making the page young again. */ 9228c2ecf20Sopenharmony_cistatic void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 9238c2ecf20Sopenharmony_ci{ 9248c2ecf20Sopenharmony_ci pte_t pte; 9258c2ecf20Sopenharmony_ci kvm_pte_t kpte; 9268c2ecf20Sopenharmony_ci struct kvm_s2_mmu *mmu; 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci trace_kvm_access_fault(fault_ipa); 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci spin_lock(&vcpu->kvm->mmu_lock); 9318c2ecf20Sopenharmony_ci mmu = vcpu->arch.hw_mmu; 9328c2ecf20Sopenharmony_ci kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); 9338c2ecf20Sopenharmony_ci spin_unlock(&vcpu->kvm->mmu_lock); 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci pte = __pte(kpte); 9368c2ecf20Sopenharmony_ci if (pte_valid(pte)) 9378c2ecf20Sopenharmony_ci kvm_set_pfn_accessed(pte_pfn(pte)); 9388c2ecf20Sopenharmony_ci} 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ci/** 9418c2ecf20Sopenharmony_ci * kvm_handle_guest_abort - handles all 2nd stage aborts 9428c2ecf20Sopenharmony_ci * @vcpu: the VCPU pointer 9438c2ecf20Sopenharmony_ci * 9448c2ecf20Sopenharmony_ci * Any abort that gets to the host is almost guaranteed to be caused by a 9458c2ecf20Sopenharmony_ci * missing second stage translation table entry, which can mean that either the 9468c2ecf20Sopenharmony_ci * guest simply needs more memory and we must allocate an appropriate page or it 9478c2ecf20Sopenharmony_ci * can mean that the guest tried to access I/O memory, which is emulated by user 9488c2ecf20Sopenharmony_ci * space. The distinction is based on the IPA causing the fault and whether this 9498c2ecf20Sopenharmony_ci * memory region has been registered as standard RAM by user space. 9508c2ecf20Sopenharmony_ci */ 9518c2ecf20Sopenharmony_ciint kvm_handle_guest_abort(struct kvm_vcpu *vcpu) 9528c2ecf20Sopenharmony_ci{ 9538c2ecf20Sopenharmony_ci unsigned long fault_status; 9548c2ecf20Sopenharmony_ci phys_addr_t fault_ipa; 9558c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot; 9568c2ecf20Sopenharmony_ci unsigned long hva; 9578c2ecf20Sopenharmony_ci bool is_iabt, write_fault, writable; 9588c2ecf20Sopenharmony_ci gfn_t gfn; 9598c2ecf20Sopenharmony_ci int ret, idx; 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ci fault_status = kvm_vcpu_trap_get_fault_type(vcpu); 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_ci fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); 9648c2ecf20Sopenharmony_ci is_iabt = kvm_vcpu_trap_is_iabt(vcpu); 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci /* Synchronous External Abort? */ 9678c2ecf20Sopenharmony_ci if (kvm_vcpu_abt_issea(vcpu)) { 9688c2ecf20Sopenharmony_ci /* 9698c2ecf20Sopenharmony_ci * For RAS the host kernel may handle this abort. 9708c2ecf20Sopenharmony_ci * There is no need to pass the error into the guest. 9718c2ecf20Sopenharmony_ci */ 9728c2ecf20Sopenharmony_ci if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) 9738c2ecf20Sopenharmony_ci kvm_inject_vabt(vcpu); 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci return 1; 9768c2ecf20Sopenharmony_ci } 9778c2ecf20Sopenharmony_ci 9788c2ecf20Sopenharmony_ci trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), 9798c2ecf20Sopenharmony_ci kvm_vcpu_get_hfar(vcpu), fault_ipa); 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ci /* Check the stage-2 fault is trans. fault or write fault */ 9828c2ecf20Sopenharmony_ci if (fault_status != FSC_FAULT && fault_status != FSC_PERM && 9838c2ecf20Sopenharmony_ci fault_status != FSC_ACCESS) { 9848c2ecf20Sopenharmony_ci kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", 9858c2ecf20Sopenharmony_ci kvm_vcpu_trap_get_class(vcpu), 9868c2ecf20Sopenharmony_ci (unsigned long)kvm_vcpu_trap_get_fault(vcpu), 9878c2ecf20Sopenharmony_ci (unsigned long)kvm_vcpu_get_esr(vcpu)); 9888c2ecf20Sopenharmony_ci return -EFAULT; 9898c2ecf20Sopenharmony_ci } 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci idx = srcu_read_lock(&vcpu->kvm->srcu); 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci gfn = fault_ipa >> PAGE_SHIFT; 9948c2ecf20Sopenharmony_ci memslot = gfn_to_memslot(vcpu->kvm, gfn); 9958c2ecf20Sopenharmony_ci hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); 9968c2ecf20Sopenharmony_ci write_fault = kvm_is_write_fault(vcpu); 9978c2ecf20Sopenharmony_ci if (kvm_is_error_hva(hva) || (write_fault && !writable)) { 9988c2ecf20Sopenharmony_ci /* 9998c2ecf20Sopenharmony_ci * The guest has put either its instructions or its page-tables 10008c2ecf20Sopenharmony_ci * somewhere it shouldn't have. Userspace won't be able to do 10018c2ecf20Sopenharmony_ci * anything about this (there's no syndrome for a start), so 10028c2ecf20Sopenharmony_ci * re-inject the abort back into the guest. 10038c2ecf20Sopenharmony_ci */ 10048c2ecf20Sopenharmony_ci if (is_iabt) { 10058c2ecf20Sopenharmony_ci ret = -ENOEXEC; 10068c2ecf20Sopenharmony_ci goto out; 10078c2ecf20Sopenharmony_ci } 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci if (kvm_vcpu_abt_iss1tw(vcpu)) { 10108c2ecf20Sopenharmony_ci kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 10118c2ecf20Sopenharmony_ci ret = 1; 10128c2ecf20Sopenharmony_ci goto out_unlock; 10138c2ecf20Sopenharmony_ci } 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci /* 10168c2ecf20Sopenharmony_ci * Check for a cache maintenance operation. Since we 10178c2ecf20Sopenharmony_ci * ended-up here, we know it is outside of any memory 10188c2ecf20Sopenharmony_ci * slot. But we can't find out if that is for a device, 10198c2ecf20Sopenharmony_ci * or if the guest is just being stupid. The only thing 10208c2ecf20Sopenharmony_ci * we know for sure is that this range cannot be cached. 10218c2ecf20Sopenharmony_ci * 10228c2ecf20Sopenharmony_ci * So let's assume that the guest is just being 10238c2ecf20Sopenharmony_ci * cautious, and skip the instruction. 10248c2ecf20Sopenharmony_ci */ 10258c2ecf20Sopenharmony_ci if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { 10268c2ecf20Sopenharmony_ci kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); 10278c2ecf20Sopenharmony_ci ret = 1; 10288c2ecf20Sopenharmony_ci goto out_unlock; 10298c2ecf20Sopenharmony_ci } 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci /* 10328c2ecf20Sopenharmony_ci * The IPA is reported as [MAX:12], so we need to 10338c2ecf20Sopenharmony_ci * complement it with the bottom 12 bits from the 10348c2ecf20Sopenharmony_ci * faulting VA. This is always 12 bits, irrespective 10358c2ecf20Sopenharmony_ci * of the page size. 10368c2ecf20Sopenharmony_ci */ 10378c2ecf20Sopenharmony_ci fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); 10388c2ecf20Sopenharmony_ci ret = io_mem_abort(vcpu, fault_ipa); 10398c2ecf20Sopenharmony_ci goto out_unlock; 10408c2ecf20Sopenharmony_ci } 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci /* Userspace should not be able to register out-of-bounds IPAs */ 10438c2ecf20Sopenharmony_ci VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_ci if (fault_status == FSC_ACCESS) { 10468c2ecf20Sopenharmony_ci handle_access_fault(vcpu, fault_ipa); 10478c2ecf20Sopenharmony_ci ret = 1; 10488c2ecf20Sopenharmony_ci goto out_unlock; 10498c2ecf20Sopenharmony_ci } 10508c2ecf20Sopenharmony_ci 10518c2ecf20Sopenharmony_ci ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); 10528c2ecf20Sopenharmony_ci if (ret == 0) 10538c2ecf20Sopenharmony_ci ret = 1; 10548c2ecf20Sopenharmony_ciout: 10558c2ecf20Sopenharmony_ci if (ret == -ENOEXEC) { 10568c2ecf20Sopenharmony_ci kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 10578c2ecf20Sopenharmony_ci ret = 1; 10588c2ecf20Sopenharmony_ci } 10598c2ecf20Sopenharmony_ciout_unlock: 10608c2ecf20Sopenharmony_ci srcu_read_unlock(&vcpu->kvm->srcu, idx); 10618c2ecf20Sopenharmony_ci return ret; 10628c2ecf20Sopenharmony_ci} 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_cistatic int handle_hva_to_gpa(struct kvm *kvm, 10658c2ecf20Sopenharmony_ci unsigned long start, 10668c2ecf20Sopenharmony_ci unsigned long end, 10678c2ecf20Sopenharmony_ci int (*handler)(struct kvm *kvm, 10688c2ecf20Sopenharmony_ci gpa_t gpa, u64 size, 10698c2ecf20Sopenharmony_ci void *data), 10708c2ecf20Sopenharmony_ci void *data) 10718c2ecf20Sopenharmony_ci{ 10728c2ecf20Sopenharmony_ci struct kvm_memslots *slots; 10738c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot; 10748c2ecf20Sopenharmony_ci int ret = 0; 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci slots = kvm_memslots(kvm); 10778c2ecf20Sopenharmony_ci 10788c2ecf20Sopenharmony_ci /* we only care about the pages that the guest sees */ 10798c2ecf20Sopenharmony_ci kvm_for_each_memslot(memslot, slots) { 10808c2ecf20Sopenharmony_ci unsigned long hva_start, hva_end; 10818c2ecf20Sopenharmony_ci gfn_t gpa; 10828c2ecf20Sopenharmony_ci 10838c2ecf20Sopenharmony_ci hva_start = max(start, memslot->userspace_addr); 10848c2ecf20Sopenharmony_ci hva_end = min(end, memslot->userspace_addr + 10858c2ecf20Sopenharmony_ci (memslot->npages << PAGE_SHIFT)); 10868c2ecf20Sopenharmony_ci if (hva_start >= hva_end) 10878c2ecf20Sopenharmony_ci continue; 10888c2ecf20Sopenharmony_ci 10898c2ecf20Sopenharmony_ci gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT; 10908c2ecf20Sopenharmony_ci ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data); 10918c2ecf20Sopenharmony_ci } 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci return ret; 10948c2ecf20Sopenharmony_ci} 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_cistatic int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) 10978c2ecf20Sopenharmony_ci{ 10988c2ecf20Sopenharmony_ci unsigned flags = *(unsigned *)data; 10998c2ecf20Sopenharmony_ci bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE; 11008c2ecf20Sopenharmony_ci 11018c2ecf20Sopenharmony_ci __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block); 11028c2ecf20Sopenharmony_ci return 0; 11038c2ecf20Sopenharmony_ci} 11048c2ecf20Sopenharmony_ci 11058c2ecf20Sopenharmony_ciint kvm_unmap_hva_range(struct kvm *kvm, 11068c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, unsigned flags) 11078c2ecf20Sopenharmony_ci{ 11088c2ecf20Sopenharmony_ci if (!kvm->arch.mmu.pgt) 11098c2ecf20Sopenharmony_ci return 0; 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci trace_kvm_unmap_hva_range(start, end); 11128c2ecf20Sopenharmony_ci handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags); 11138c2ecf20Sopenharmony_ci return 0; 11148c2ecf20Sopenharmony_ci} 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_cistatic int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) 11178c2ecf20Sopenharmony_ci{ 11188c2ecf20Sopenharmony_ci kvm_pfn_t *pfn = (kvm_pfn_t *)data; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci WARN_ON(size != PAGE_SIZE); 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci /* 11238c2ecf20Sopenharmony_ci * The MMU notifiers will have unmapped a huge PMD before calling 11248c2ecf20Sopenharmony_ci * ->change_pte() (which in turn calls kvm_set_spte_hva()) and 11258c2ecf20Sopenharmony_ci * therefore we never need to clear out a huge PMD through this 11268c2ecf20Sopenharmony_ci * calling path and a memcache is not required. 11278c2ecf20Sopenharmony_ci */ 11288c2ecf20Sopenharmony_ci kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, gpa, PAGE_SIZE, 11298c2ecf20Sopenharmony_ci __pfn_to_phys(*pfn), KVM_PGTABLE_PROT_R, NULL); 11308c2ecf20Sopenharmony_ci return 0; 11318c2ecf20Sopenharmony_ci} 11328c2ecf20Sopenharmony_ci 11338c2ecf20Sopenharmony_ciint kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 11348c2ecf20Sopenharmony_ci{ 11358c2ecf20Sopenharmony_ci unsigned long end = hva + PAGE_SIZE; 11368c2ecf20Sopenharmony_ci kvm_pfn_t pfn = pte_pfn(pte); 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_ci if (!kvm->arch.mmu.pgt) 11398c2ecf20Sopenharmony_ci return 0; 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci trace_kvm_set_spte_hva(hva); 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci /* 11448c2ecf20Sopenharmony_ci * We've moved a page around, probably through CoW, so let's treat it 11458c2ecf20Sopenharmony_ci * just like a translation fault and clean the cache to the PoC. 11468c2ecf20Sopenharmony_ci */ 11478c2ecf20Sopenharmony_ci clean_dcache_guest_page(pfn, PAGE_SIZE); 11488c2ecf20Sopenharmony_ci handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pfn); 11498c2ecf20Sopenharmony_ci return 0; 11508c2ecf20Sopenharmony_ci} 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_cistatic int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) 11538c2ecf20Sopenharmony_ci{ 11548c2ecf20Sopenharmony_ci pte_t pte; 11558c2ecf20Sopenharmony_ci kvm_pte_t kpte; 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); 11588c2ecf20Sopenharmony_ci kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt, gpa); 11598c2ecf20Sopenharmony_ci pte = __pte(kpte); 11608c2ecf20Sopenharmony_ci return pte_valid(pte) && pte_young(pte); 11618c2ecf20Sopenharmony_ci} 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_cistatic int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) 11648c2ecf20Sopenharmony_ci{ 11658c2ecf20Sopenharmony_ci WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); 11668c2ecf20Sopenharmony_ci return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt, gpa); 11678c2ecf20Sopenharmony_ci} 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ciint kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 11708c2ecf20Sopenharmony_ci{ 11718c2ecf20Sopenharmony_ci if (!kvm->arch.mmu.pgt) 11728c2ecf20Sopenharmony_ci return 0; 11738c2ecf20Sopenharmony_ci trace_kvm_age_hva(start, end); 11748c2ecf20Sopenharmony_ci return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); 11758c2ecf20Sopenharmony_ci} 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ciint kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 11788c2ecf20Sopenharmony_ci{ 11798c2ecf20Sopenharmony_ci if (!kvm->arch.mmu.pgt) 11808c2ecf20Sopenharmony_ci return 0; 11818c2ecf20Sopenharmony_ci trace_kvm_test_age_hva(hva); 11828c2ecf20Sopenharmony_ci return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, 11838c2ecf20Sopenharmony_ci kvm_test_age_hva_handler, NULL); 11848c2ecf20Sopenharmony_ci} 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ciphys_addr_t kvm_mmu_get_httbr(void) 11878c2ecf20Sopenharmony_ci{ 11888c2ecf20Sopenharmony_ci return __pa(hyp_pgtable->pgd); 11898c2ecf20Sopenharmony_ci} 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ciphys_addr_t kvm_get_idmap_vector(void) 11928c2ecf20Sopenharmony_ci{ 11938c2ecf20Sopenharmony_ci return hyp_idmap_vector; 11948c2ecf20Sopenharmony_ci} 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_cistatic int kvm_map_idmap_text(void) 11978c2ecf20Sopenharmony_ci{ 11988c2ecf20Sopenharmony_ci unsigned long size = hyp_idmap_end - hyp_idmap_start; 11998c2ecf20Sopenharmony_ci int err = __create_hyp_mappings(hyp_idmap_start, size, hyp_idmap_start, 12008c2ecf20Sopenharmony_ci PAGE_HYP_EXEC); 12018c2ecf20Sopenharmony_ci if (err) 12028c2ecf20Sopenharmony_ci kvm_err("Failed to idmap %lx-%lx\n", 12038c2ecf20Sopenharmony_ci hyp_idmap_start, hyp_idmap_end); 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci return err; 12068c2ecf20Sopenharmony_ci} 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ciint kvm_mmu_init(void) 12098c2ecf20Sopenharmony_ci{ 12108c2ecf20Sopenharmony_ci int err; 12118c2ecf20Sopenharmony_ci u32 hyp_va_bits; 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); 12148c2ecf20Sopenharmony_ci hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); 12158c2ecf20Sopenharmony_ci hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end); 12168c2ecf20Sopenharmony_ci hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); 12178c2ecf20Sopenharmony_ci hyp_idmap_vector = __pa_symbol(__kvm_hyp_init); 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci /* 12208c2ecf20Sopenharmony_ci * We rely on the linker script to ensure at build time that the HYP 12218c2ecf20Sopenharmony_ci * init code does not cross a page boundary. 12228c2ecf20Sopenharmony_ci */ 12238c2ecf20Sopenharmony_ci BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_ci hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); 12268c2ecf20Sopenharmony_ci kvm_debug("Using %u-bit virtual addresses at EL2\n", hyp_va_bits); 12278c2ecf20Sopenharmony_ci kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); 12288c2ecf20Sopenharmony_ci kvm_debug("HYP VA range: %lx:%lx\n", 12298c2ecf20Sopenharmony_ci kern_hyp_va(PAGE_OFFSET), 12308c2ecf20Sopenharmony_ci kern_hyp_va((unsigned long)high_memory - 1)); 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && 12338c2ecf20Sopenharmony_ci hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && 12348c2ecf20Sopenharmony_ci hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { 12358c2ecf20Sopenharmony_ci /* 12368c2ecf20Sopenharmony_ci * The idmap page is intersecting with the VA space, 12378c2ecf20Sopenharmony_ci * it is not safe to continue further. 12388c2ecf20Sopenharmony_ci */ 12398c2ecf20Sopenharmony_ci kvm_err("IDMAP intersecting with HYP VA, unable to continue\n"); 12408c2ecf20Sopenharmony_ci err = -EINVAL; 12418c2ecf20Sopenharmony_ci goto out; 12428c2ecf20Sopenharmony_ci } 12438c2ecf20Sopenharmony_ci 12448c2ecf20Sopenharmony_ci hyp_pgtable = kzalloc(sizeof(*hyp_pgtable), GFP_KERNEL); 12458c2ecf20Sopenharmony_ci if (!hyp_pgtable) { 12468c2ecf20Sopenharmony_ci kvm_err("Hyp mode page-table not allocated\n"); 12478c2ecf20Sopenharmony_ci err = -ENOMEM; 12488c2ecf20Sopenharmony_ci goto out; 12498c2ecf20Sopenharmony_ci } 12508c2ecf20Sopenharmony_ci 12518c2ecf20Sopenharmony_ci err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits); 12528c2ecf20Sopenharmony_ci if (err) 12538c2ecf20Sopenharmony_ci goto out_free_pgtable; 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci err = kvm_map_idmap_text(); 12568c2ecf20Sopenharmony_ci if (err) 12578c2ecf20Sopenharmony_ci goto out_destroy_pgtable; 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci io_map_base = hyp_idmap_start; 12608c2ecf20Sopenharmony_ci return 0; 12618c2ecf20Sopenharmony_ci 12628c2ecf20Sopenharmony_ciout_destroy_pgtable: 12638c2ecf20Sopenharmony_ci kvm_pgtable_hyp_destroy(hyp_pgtable); 12648c2ecf20Sopenharmony_ciout_free_pgtable: 12658c2ecf20Sopenharmony_ci kfree(hyp_pgtable); 12668c2ecf20Sopenharmony_ci hyp_pgtable = NULL; 12678c2ecf20Sopenharmony_ciout: 12688c2ecf20Sopenharmony_ci return err; 12698c2ecf20Sopenharmony_ci} 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_civoid kvm_arch_commit_memory_region(struct kvm *kvm, 12728c2ecf20Sopenharmony_ci const struct kvm_userspace_memory_region *mem, 12738c2ecf20Sopenharmony_ci struct kvm_memory_slot *old, 12748c2ecf20Sopenharmony_ci const struct kvm_memory_slot *new, 12758c2ecf20Sopenharmony_ci enum kvm_mr_change change) 12768c2ecf20Sopenharmony_ci{ 12778c2ecf20Sopenharmony_ci /* 12788c2ecf20Sopenharmony_ci * At this point memslot has been committed and there is an 12798c2ecf20Sopenharmony_ci * allocated dirty_bitmap[], dirty pages will be tracked while the 12808c2ecf20Sopenharmony_ci * memory slot is write protected. 12818c2ecf20Sopenharmony_ci */ 12828c2ecf20Sopenharmony_ci if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { 12838c2ecf20Sopenharmony_ci /* 12848c2ecf20Sopenharmony_ci * If we're with initial-all-set, we don't need to write 12858c2ecf20Sopenharmony_ci * protect any pages because they're all reported as dirty. 12868c2ecf20Sopenharmony_ci * Huge pages and normal pages will be write protect gradually. 12878c2ecf20Sopenharmony_ci */ 12888c2ecf20Sopenharmony_ci if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) { 12898c2ecf20Sopenharmony_ci kvm_mmu_wp_memory_region(kvm, mem->slot); 12908c2ecf20Sopenharmony_ci } 12918c2ecf20Sopenharmony_ci } 12928c2ecf20Sopenharmony_ci} 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ciint kvm_arch_prepare_memory_region(struct kvm *kvm, 12958c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot, 12968c2ecf20Sopenharmony_ci const struct kvm_userspace_memory_region *mem, 12978c2ecf20Sopenharmony_ci enum kvm_mr_change change) 12988c2ecf20Sopenharmony_ci{ 12998c2ecf20Sopenharmony_ci hva_t hva = mem->userspace_addr; 13008c2ecf20Sopenharmony_ci hva_t reg_end = hva + mem->memory_size; 13018c2ecf20Sopenharmony_ci bool writable = !(mem->flags & KVM_MEM_READONLY); 13028c2ecf20Sopenharmony_ci int ret = 0; 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && 13058c2ecf20Sopenharmony_ci change != KVM_MR_FLAGS_ONLY) 13068c2ecf20Sopenharmony_ci return 0; 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci /* 13098c2ecf20Sopenharmony_ci * Prevent userspace from creating a memory region outside of the IPA 13108c2ecf20Sopenharmony_ci * space addressable by the KVM guest IPA space. 13118c2ecf20Sopenharmony_ci */ 13128c2ecf20Sopenharmony_ci if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) 13138c2ecf20Sopenharmony_ci return -EFAULT; 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_ci mmap_read_lock(current->mm); 13168c2ecf20Sopenharmony_ci /* 13178c2ecf20Sopenharmony_ci * A memory region could potentially cover multiple VMAs, and any holes 13188c2ecf20Sopenharmony_ci * between them, so iterate over all of them to find out if we can map 13198c2ecf20Sopenharmony_ci * any of them right now. 13208c2ecf20Sopenharmony_ci * 13218c2ecf20Sopenharmony_ci * +--------------------------------------------+ 13228c2ecf20Sopenharmony_ci * +---------------+----------------+ +----------------+ 13238c2ecf20Sopenharmony_ci * | : VMA 1 | VMA 2 | | VMA 3 : | 13248c2ecf20Sopenharmony_ci * +---------------+----------------+ +----------------+ 13258c2ecf20Sopenharmony_ci * | memory region | 13268c2ecf20Sopenharmony_ci * +--------------------------------------------+ 13278c2ecf20Sopenharmony_ci */ 13288c2ecf20Sopenharmony_ci do { 13298c2ecf20Sopenharmony_ci struct vm_area_struct *vma = find_vma(current->mm, hva); 13308c2ecf20Sopenharmony_ci hva_t vm_start, vm_end; 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci if (!vma || vma->vm_start >= reg_end) 13338c2ecf20Sopenharmony_ci break; 13348c2ecf20Sopenharmony_ci 13358c2ecf20Sopenharmony_ci /* 13368c2ecf20Sopenharmony_ci * Take the intersection of this VMA with the memory region 13378c2ecf20Sopenharmony_ci */ 13388c2ecf20Sopenharmony_ci vm_start = max(hva, vma->vm_start); 13398c2ecf20Sopenharmony_ci vm_end = min(reg_end, vma->vm_end); 13408c2ecf20Sopenharmony_ci 13418c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_PFNMAP) { 13428c2ecf20Sopenharmony_ci gpa_t gpa = mem->guest_phys_addr + 13438c2ecf20Sopenharmony_ci (vm_start - mem->userspace_addr); 13448c2ecf20Sopenharmony_ci phys_addr_t pa; 13458c2ecf20Sopenharmony_ci 13468c2ecf20Sopenharmony_ci pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; 13478c2ecf20Sopenharmony_ci pa += vm_start - vma->vm_start; 13488c2ecf20Sopenharmony_ci 13498c2ecf20Sopenharmony_ci /* IO region dirty page logging not allowed */ 13508c2ecf20Sopenharmony_ci if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { 13518c2ecf20Sopenharmony_ci ret = -EINVAL; 13528c2ecf20Sopenharmony_ci goto out; 13538c2ecf20Sopenharmony_ci } 13548c2ecf20Sopenharmony_ci 13558c2ecf20Sopenharmony_ci ret = kvm_phys_addr_ioremap(kvm, gpa, pa, 13568c2ecf20Sopenharmony_ci vm_end - vm_start, 13578c2ecf20Sopenharmony_ci writable); 13588c2ecf20Sopenharmony_ci if (ret) 13598c2ecf20Sopenharmony_ci break; 13608c2ecf20Sopenharmony_ci } 13618c2ecf20Sopenharmony_ci hva = vm_end; 13628c2ecf20Sopenharmony_ci } while (hva < reg_end); 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_ci if (change == KVM_MR_FLAGS_ONLY) 13658c2ecf20Sopenharmony_ci goto out; 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 13688c2ecf20Sopenharmony_ci if (ret) 13698c2ecf20Sopenharmony_ci unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); 13708c2ecf20Sopenharmony_ci else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) 13718c2ecf20Sopenharmony_ci stage2_flush_memslot(kvm, memslot); 13728c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 13738c2ecf20Sopenharmony_ciout: 13748c2ecf20Sopenharmony_ci mmap_read_unlock(current->mm); 13758c2ecf20Sopenharmony_ci return ret; 13768c2ecf20Sopenharmony_ci} 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_civoid kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) 13798c2ecf20Sopenharmony_ci{ 13808c2ecf20Sopenharmony_ci} 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_civoid kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) 13838c2ecf20Sopenharmony_ci{ 13848c2ecf20Sopenharmony_ci} 13858c2ecf20Sopenharmony_ci 13868c2ecf20Sopenharmony_civoid kvm_arch_flush_shadow_all(struct kvm *kvm) 13878c2ecf20Sopenharmony_ci{ 13888c2ecf20Sopenharmony_ci kvm_free_stage2_pgd(&kvm->arch.mmu); 13898c2ecf20Sopenharmony_ci} 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_civoid kvm_arch_flush_shadow_memslot(struct kvm *kvm, 13928c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot) 13938c2ecf20Sopenharmony_ci{ 13948c2ecf20Sopenharmony_ci gpa_t gpa = slot->base_gfn << PAGE_SHIFT; 13958c2ecf20Sopenharmony_ci phys_addr_t size = slot->npages << PAGE_SHIFT; 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 13988c2ecf20Sopenharmony_ci unmap_stage2_range(&kvm->arch.mmu, gpa, size); 13998c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 14008c2ecf20Sopenharmony_ci} 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_ci/* 14038c2ecf20Sopenharmony_ci * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). 14048c2ecf20Sopenharmony_ci * 14058c2ecf20Sopenharmony_ci * Main problems: 14068c2ecf20Sopenharmony_ci * - S/W ops are local to a CPU (not broadcast) 14078c2ecf20Sopenharmony_ci * - We have line migration behind our back (speculation) 14088c2ecf20Sopenharmony_ci * - System caches don't support S/W at all (damn!) 14098c2ecf20Sopenharmony_ci * 14108c2ecf20Sopenharmony_ci * In the face of the above, the best we can do is to try and convert 14118c2ecf20Sopenharmony_ci * S/W ops to VA ops. Because the guest is not allowed to infer the 14128c2ecf20Sopenharmony_ci * S/W to PA mapping, it can only use S/W to nuke the whole cache, 14138c2ecf20Sopenharmony_ci * which is a rather good thing for us. 14148c2ecf20Sopenharmony_ci * 14158c2ecf20Sopenharmony_ci * Also, it is only used when turning caches on/off ("The expected 14168c2ecf20Sopenharmony_ci * usage of the cache maintenance instructions that operate by set/way 14178c2ecf20Sopenharmony_ci * is associated with the cache maintenance instructions associated 14188c2ecf20Sopenharmony_ci * with the powerdown and powerup of caches, if this is required by 14198c2ecf20Sopenharmony_ci * the implementation."). 14208c2ecf20Sopenharmony_ci * 14218c2ecf20Sopenharmony_ci * We use the following policy: 14228c2ecf20Sopenharmony_ci * 14238c2ecf20Sopenharmony_ci * - If we trap a S/W operation, we enable VM trapping to detect 14248c2ecf20Sopenharmony_ci * caches being turned on/off, and do a full clean. 14258c2ecf20Sopenharmony_ci * 14268c2ecf20Sopenharmony_ci * - We flush the caches on both caches being turned on and off. 14278c2ecf20Sopenharmony_ci * 14288c2ecf20Sopenharmony_ci * - Once the caches are enabled, we stop trapping VM ops. 14298c2ecf20Sopenharmony_ci */ 14308c2ecf20Sopenharmony_civoid kvm_set_way_flush(struct kvm_vcpu *vcpu) 14318c2ecf20Sopenharmony_ci{ 14328c2ecf20Sopenharmony_ci unsigned long hcr = *vcpu_hcr(vcpu); 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci /* 14358c2ecf20Sopenharmony_ci * If this is the first time we do a S/W operation 14368c2ecf20Sopenharmony_ci * (i.e. HCR_TVM not set) flush the whole memory, and set the 14378c2ecf20Sopenharmony_ci * VM trapping. 14388c2ecf20Sopenharmony_ci * 14398c2ecf20Sopenharmony_ci * Otherwise, rely on the VM trapping to wait for the MMU + 14408c2ecf20Sopenharmony_ci * Caches to be turned off. At that point, we'll be able to 14418c2ecf20Sopenharmony_ci * clean the caches again. 14428c2ecf20Sopenharmony_ci */ 14438c2ecf20Sopenharmony_ci if (!(hcr & HCR_TVM)) { 14448c2ecf20Sopenharmony_ci trace_kvm_set_way_flush(*vcpu_pc(vcpu), 14458c2ecf20Sopenharmony_ci vcpu_has_cache_enabled(vcpu)); 14468c2ecf20Sopenharmony_ci stage2_flush_vm(vcpu->kvm); 14478c2ecf20Sopenharmony_ci *vcpu_hcr(vcpu) = hcr | HCR_TVM; 14488c2ecf20Sopenharmony_ci } 14498c2ecf20Sopenharmony_ci} 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_civoid kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) 14528c2ecf20Sopenharmony_ci{ 14538c2ecf20Sopenharmony_ci bool now_enabled = vcpu_has_cache_enabled(vcpu); 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci /* 14568c2ecf20Sopenharmony_ci * If switching the MMU+caches on, need to invalidate the caches. 14578c2ecf20Sopenharmony_ci * If switching it off, need to clean the caches. 14588c2ecf20Sopenharmony_ci * Clean + invalidate does the trick always. 14598c2ecf20Sopenharmony_ci */ 14608c2ecf20Sopenharmony_ci if (now_enabled != was_enabled) 14618c2ecf20Sopenharmony_ci stage2_flush_vm(vcpu->kvm); 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci /* Caches are now on, stop trapping VM ops (until a S/W op) */ 14648c2ecf20Sopenharmony_ci if (now_enabled) 14658c2ecf20Sopenharmony_ci *vcpu_hcr(vcpu) &= ~HCR_TVM; 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); 14688c2ecf20Sopenharmony_ci} 1469