162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Kernel-based Virtual Machine driver for Linux 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Macros and functions to access KVM PTEs (also known as SPTEs) 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2006 Qumranet, Inc. 862306a36Sopenharmony_ci * Copyright 2020 Red Hat, Inc. and/or its affiliates. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/kvm_host.h> 1362306a36Sopenharmony_ci#include "mmu.h" 1462306a36Sopenharmony_ci#include "mmu_internal.h" 1562306a36Sopenharmony_ci#include "x86.h" 1662306a36Sopenharmony_ci#include "spte.h" 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <asm/e820/api.h> 1962306a36Sopenharmony_ci#include <asm/memtype.h> 2062306a36Sopenharmony_ci#include <asm/vmx.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cibool __read_mostly enable_mmio_caching = true; 2362306a36Sopenharmony_cistatic bool __ro_after_init allow_mmio_caching; 2462306a36Sopenharmony_cimodule_param_named(mmio_caching, enable_mmio_caching, bool, 0444); 2562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(enable_mmio_caching); 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ciu64 __read_mostly shadow_host_writable_mask; 2862306a36Sopenharmony_ciu64 __read_mostly shadow_mmu_writable_mask; 2962306a36Sopenharmony_ciu64 __read_mostly shadow_nx_mask; 3062306a36Sopenharmony_ciu64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ 3162306a36Sopenharmony_ciu64 __read_mostly shadow_user_mask; 3262306a36Sopenharmony_ciu64 __read_mostly shadow_accessed_mask; 3362306a36Sopenharmony_ciu64 __read_mostly shadow_dirty_mask; 3462306a36Sopenharmony_ciu64 __read_mostly shadow_mmio_value; 3562306a36Sopenharmony_ciu64 __read_mostly shadow_mmio_mask; 3662306a36Sopenharmony_ciu64 __read_mostly shadow_mmio_access_mask; 3762306a36Sopenharmony_ciu64 __read_mostly shadow_present_mask; 3862306a36Sopenharmony_ciu64 __read_mostly shadow_memtype_mask; 3962306a36Sopenharmony_ciu64 __read_mostly shadow_me_value; 4062306a36Sopenharmony_ciu64 __read_mostly shadow_me_mask; 4162306a36Sopenharmony_ciu64 __read_mostly shadow_acc_track_mask; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ciu64 __read_mostly shadow_nonpresent_or_rsvd_mask; 4462306a36Sopenharmony_ciu64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ciu8 __read_mostly shadow_phys_bits; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_civoid __init kvm_mmu_spte_module_init(void) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci /* 5162306a36Sopenharmony_ci * Snapshot userspace's desire to allow MMIO caching. Whether or not 5262306a36Sopenharmony_ci * KVM can actually enable MMIO caching depends on vendor-specific 5362306a36Sopenharmony_ci * hardware capabilities and other module params that can't be resolved 5462306a36Sopenharmony_ci * until the vendor module is loaded, i.e. enable_mmio_caching can and 5562306a36Sopenharmony_ci * will change when the vendor module is (re)loaded. 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_ci allow_mmio_caching = enable_mmio_caching; 5862306a36Sopenharmony_ci} 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic u64 generation_mmio_spte_mask(u64 gen) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci u64 mask; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK; 6762306a36Sopenharmony_ci mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK; 6862306a36Sopenharmony_ci return mask; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ciu64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; 7462306a36Sopenharmony_ci u64 spte = generation_mmio_spte_mask(gen); 7562306a36Sopenharmony_ci u64 gpa = gfn << PAGE_SHIFT; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci WARN_ON_ONCE(!shadow_mmio_value); 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci access &= shadow_mmio_access_mask; 8062306a36Sopenharmony_ci spte |= shadow_mmio_value | access; 8162306a36Sopenharmony_ci spte |= gpa | shadow_nonpresent_or_rsvd_mask; 8262306a36Sopenharmony_ci spte |= (gpa & shadow_nonpresent_or_rsvd_mask) 8362306a36Sopenharmony_ci << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci return spte; 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cistatic bool kvm_is_mmio_pfn(kvm_pfn_t pfn) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci if (pfn_valid(pfn)) 9162306a36Sopenharmony_ci return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) && 9262306a36Sopenharmony_ci /* 9362306a36Sopenharmony_ci * Some reserved pages, such as those from NVDIMM 9462306a36Sopenharmony_ci * DAX devices, are not for MMIO, and can be mapped 9562306a36Sopenharmony_ci * with cached memory type for better performance. 9662306a36Sopenharmony_ci * However, the above check misconceives those pages 9762306a36Sopenharmony_ci * as MMIO, and results in KVM mapping them with UC 9862306a36Sopenharmony_ci * memory type, which would hurt the performance. 9962306a36Sopenharmony_ci * Therefore, we check the host memory type in addition 10062306a36Sopenharmony_ci * and only treat UC/UC-/WC pages as MMIO. 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_ci (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn)); 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci return !e820__mapped_raw_any(pfn_to_hpa(pfn), 10562306a36Sopenharmony_ci pfn_to_hpa(pfn + 1) - 1, 10662306a36Sopenharmony_ci E820_TYPE_RAM); 10762306a36Sopenharmony_ci} 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci/* 11062306a36Sopenharmony_ci * Returns true if the SPTE has bits that may be set without holding mmu_lock. 11162306a36Sopenharmony_ci * The caller is responsible for checking if the SPTE is shadow-present, and 11262306a36Sopenharmony_ci * for determining whether or not the caller cares about non-leaf SPTEs. 11362306a36Sopenharmony_ci */ 11462306a36Sopenharmony_cibool spte_has_volatile_bits(u64 spte) 11562306a36Sopenharmony_ci{ 11662306a36Sopenharmony_ci /* 11762306a36Sopenharmony_ci * Always atomically update spte if it can be updated 11862306a36Sopenharmony_ci * out of mmu-lock, it can ensure dirty bit is not lost, 11962306a36Sopenharmony_ci * also, it can help us to get a stable is_writable_pte() 12062306a36Sopenharmony_ci * to ensure tlb flush is not missed. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) 12362306a36Sopenharmony_ci return true; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci if (is_access_track_spte(spte)) 12662306a36Sopenharmony_ci return true; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (spte_ad_enabled(spte)) { 12962306a36Sopenharmony_ci if (!(spte & shadow_accessed_mask) || 13062306a36Sopenharmony_ci (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) 13162306a36Sopenharmony_ci return true; 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci return false; 13562306a36Sopenharmony_ci} 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_cibool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 13862306a36Sopenharmony_ci const struct kvm_memory_slot *slot, 13962306a36Sopenharmony_ci unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, 14062306a36Sopenharmony_ci u64 old_spte, bool prefetch, bool can_unsync, 14162306a36Sopenharmony_ci bool host_writable, u64 *new_spte) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci int level = sp->role.level; 14462306a36Sopenharmony_ci u64 spte = SPTE_MMU_PRESENT_MASK; 14562306a36Sopenharmony_ci bool wrprot = false; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci WARN_ON_ONCE(!pte_access && !shadow_present_mask); 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci if (sp->role.ad_disabled) 15062306a36Sopenharmony_ci spte |= SPTE_TDP_AD_DISABLED; 15162306a36Sopenharmony_ci else if (kvm_mmu_page_ad_need_write_protect(sp)) 15262306a36Sopenharmony_ci spte |= SPTE_TDP_AD_WRPROT_ONLY; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci /* 15562306a36Sopenharmony_ci * For the EPT case, shadow_present_mask is 0 if hardware 15662306a36Sopenharmony_ci * supports exec-only page table entries. In that case, 15762306a36Sopenharmony_ci * ACC_USER_MASK and shadow_user_mask are used to represent 15862306a36Sopenharmony_ci * read access. See FNAME(gpte_access) in paging_tmpl.h. 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_ci spte |= shadow_present_mask; 16162306a36Sopenharmony_ci if (!prefetch) 16262306a36Sopenharmony_ci spte |= spte_shadow_accessed_mask(spte); 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci /* 16562306a36Sopenharmony_ci * For simplicity, enforce the NX huge page mitigation even if not 16662306a36Sopenharmony_ci * strictly necessary. KVM could ignore the mitigation if paging is 16762306a36Sopenharmony_ci * disabled in the guest, as the guest doesn't have any page tables to 16862306a36Sopenharmony_ci * abuse. But to safely ignore the mitigation, KVM would have to 16962306a36Sopenharmony_ci * ensure a new MMU is loaded (or all shadow pages zapped) when CR0.PG 17062306a36Sopenharmony_ci * is toggled on, and that's a net negative for performance when TDP is 17162306a36Sopenharmony_ci * enabled. When TDP is disabled, KVM will always switch to a new MMU 17262306a36Sopenharmony_ci * when CR0.PG is toggled, but leveraging that to ignore the mitigation 17362306a36Sopenharmony_ci * would tie make_spte() further to vCPU/MMU state, and add complexity 17462306a36Sopenharmony_ci * just to optimize a mode that is anything but performance critical. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) && 17762306a36Sopenharmony_ci is_nx_huge_page_enabled(vcpu->kvm)) { 17862306a36Sopenharmony_ci pte_access &= ~ACC_EXEC_MASK; 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci if (pte_access & ACC_EXEC_MASK) 18262306a36Sopenharmony_ci spte |= shadow_x_mask; 18362306a36Sopenharmony_ci else 18462306a36Sopenharmony_ci spte |= shadow_nx_mask; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci if (pte_access & ACC_USER_MASK) 18762306a36Sopenharmony_ci spte |= shadow_user_mask; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci if (level > PG_LEVEL_4K) 19062306a36Sopenharmony_ci spte |= PT_PAGE_SIZE_MASK; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci if (shadow_memtype_mask) 19362306a36Sopenharmony_ci spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn, 19462306a36Sopenharmony_ci kvm_is_mmio_pfn(pfn)); 19562306a36Sopenharmony_ci if (host_writable) 19662306a36Sopenharmony_ci spte |= shadow_host_writable_mask; 19762306a36Sopenharmony_ci else 19862306a36Sopenharmony_ci pte_access &= ~ACC_WRITE_MASK; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci if (shadow_me_value && !kvm_is_mmio_pfn(pfn)) 20162306a36Sopenharmony_ci spte |= shadow_me_value; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci spte |= (u64)pfn << PAGE_SHIFT; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci if (pte_access & ACC_WRITE_MASK) { 20662306a36Sopenharmony_ci spte |= PT_WRITABLE_MASK | shadow_mmu_writable_mask; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci /* 20962306a36Sopenharmony_ci * Optimization: for pte sync, if spte was writable the hash 21062306a36Sopenharmony_ci * lookup is unnecessary (and expensive). Write protection 21162306a36Sopenharmony_ci * is responsibility of kvm_mmu_get_page / kvm_mmu_sync_roots. 21262306a36Sopenharmony_ci * Same reasoning can be applied to dirty page accounting. 21362306a36Sopenharmony_ci */ 21462306a36Sopenharmony_ci if (is_writable_pte(old_spte)) 21562306a36Sopenharmony_ci goto out; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* 21862306a36Sopenharmony_ci * Unsync shadow pages that are reachable by the new, writable 21962306a36Sopenharmony_ci * SPTE. Write-protect the SPTE if the page can't be unsync'd, 22062306a36Sopenharmony_ci * e.g. it's write-tracked (upper-level SPs) or has one or more 22162306a36Sopenharmony_ci * shadow pages and unsync'ing pages is not allowed. 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) { 22462306a36Sopenharmony_ci wrprot = true; 22562306a36Sopenharmony_ci pte_access &= ~ACC_WRITE_MASK; 22662306a36Sopenharmony_ci spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask); 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci if (pte_access & ACC_WRITE_MASK) 23162306a36Sopenharmony_ci spte |= spte_shadow_dirty_mask(spte); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ciout: 23462306a36Sopenharmony_ci if (prefetch) 23562306a36Sopenharmony_ci spte = mark_spte_for_access_track(spte); 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci WARN_ONCE(is_rsvd_spte(&vcpu->arch.mmu->shadow_zero_check, spte, level), 23862306a36Sopenharmony_ci "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, 23962306a36Sopenharmony_ci get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { 24262306a36Sopenharmony_ci /* Enforced by kvm_mmu_hugepage_adjust. */ 24362306a36Sopenharmony_ci WARN_ON_ONCE(level > PG_LEVEL_4K); 24462306a36Sopenharmony_ci mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); 24562306a36Sopenharmony_ci } 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci *new_spte = spte; 24862306a36Sopenharmony_ci return wrprot; 24962306a36Sopenharmony_ci} 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_cistatic u64 make_spte_executable(u64 spte) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci bool is_access_track = is_access_track_spte(spte); 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci if (is_access_track) 25662306a36Sopenharmony_ci spte = restore_acc_track_spte(spte); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci spte &= ~shadow_nx_mask; 25962306a36Sopenharmony_ci spte |= shadow_x_mask; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci if (is_access_track) 26262306a36Sopenharmony_ci spte = mark_spte_for_access_track(spte); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci return spte; 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci/* 26862306a36Sopenharmony_ci * Construct an SPTE that maps a sub-page of the given huge page SPTE where 26962306a36Sopenharmony_ci * `index` identifies which sub-page. 27062306a36Sopenharmony_ci * 27162306a36Sopenharmony_ci * This is used during huge page splitting to build the SPTEs that make up the 27262306a36Sopenharmony_ci * new page table. 27362306a36Sopenharmony_ci */ 27462306a36Sopenharmony_ciu64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte, union kvm_mmu_page_role role, 27562306a36Sopenharmony_ci int index) 27662306a36Sopenharmony_ci{ 27762306a36Sopenharmony_ci u64 child_spte; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci if (WARN_ON_ONCE(!is_shadow_present_pte(huge_spte))) 28062306a36Sopenharmony_ci return 0; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci if (WARN_ON_ONCE(!is_large_pte(huge_spte))) 28362306a36Sopenharmony_ci return 0; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci child_spte = huge_spte; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci /* 28862306a36Sopenharmony_ci * The child_spte already has the base address of the huge page being 28962306a36Sopenharmony_ci * split. So we just have to OR in the offset to the page at the next 29062306a36Sopenharmony_ci * lower level for the given index. 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_ci child_spte |= (index * KVM_PAGES_PER_HPAGE(role.level)) << PAGE_SHIFT; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci if (role.level == PG_LEVEL_4K) { 29562306a36Sopenharmony_ci child_spte &= ~PT_PAGE_SIZE_MASK; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci /* 29862306a36Sopenharmony_ci * When splitting to a 4K page where execution is allowed, mark 29962306a36Sopenharmony_ci * the page executable as the NX hugepage mitigation no longer 30062306a36Sopenharmony_ci * applies. 30162306a36Sopenharmony_ci */ 30262306a36Sopenharmony_ci if ((role.access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(kvm)) 30362306a36Sopenharmony_ci child_spte = make_spte_executable(child_spte); 30462306a36Sopenharmony_ci } 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci return child_spte; 30762306a36Sopenharmony_ci} 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ciu64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci u64 spte = SPTE_MMU_PRESENT_MASK; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK | 31562306a36Sopenharmony_ci shadow_user_mask | shadow_x_mask | shadow_me_value; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci if (ad_disabled) 31862306a36Sopenharmony_ci spte |= SPTE_TDP_AD_DISABLED; 31962306a36Sopenharmony_ci else 32062306a36Sopenharmony_ci spte |= shadow_accessed_mask; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci return spte; 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ciu64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci u64 new_spte; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci new_spte = old_spte & ~SPTE_BASE_ADDR_MASK; 33062306a36Sopenharmony_ci new_spte |= (u64)new_pfn << PAGE_SHIFT; 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci new_spte &= ~PT_WRITABLE_MASK; 33362306a36Sopenharmony_ci new_spte &= ~shadow_host_writable_mask; 33462306a36Sopenharmony_ci new_spte &= ~shadow_mmu_writable_mask; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci new_spte = mark_spte_for_access_track(new_spte); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci return new_spte; 33962306a36Sopenharmony_ci} 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ciu64 mark_spte_for_access_track(u64 spte) 34262306a36Sopenharmony_ci{ 34362306a36Sopenharmony_ci if (spte_ad_enabled(spte)) 34462306a36Sopenharmony_ci return spte & ~shadow_accessed_mask; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci if (is_access_track_spte(spte)) 34762306a36Sopenharmony_ci return spte; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci check_spte_writable_invariants(spte); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << 35262306a36Sopenharmony_ci SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), 35362306a36Sopenharmony_ci "Access Tracking saved bit locations are not zero\n"); 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << 35662306a36Sopenharmony_ci SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; 35762306a36Sopenharmony_ci spte &= ~shadow_acc_track_mask; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci return spte; 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_civoid kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) 36362306a36Sopenharmony_ci{ 36462306a36Sopenharmony_ci BUG_ON((u64)(unsigned)access_mask != access_mask); 36562306a36Sopenharmony_ci WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* 36862306a36Sopenharmony_ci * Reset to the original module param value to honor userspace's desire 36962306a36Sopenharmony_ci * to (dis)allow MMIO caching. Update the param itself so that 37062306a36Sopenharmony_ci * userspace can see whether or not KVM is actually using MMIO caching. 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_ci enable_mmio_caching = allow_mmio_caching; 37362306a36Sopenharmony_ci if (!enable_mmio_caching) 37462306a36Sopenharmony_ci mmio_value = 0; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci /* 37762306a36Sopenharmony_ci * The mask must contain only bits that are carved out specifically for 37862306a36Sopenharmony_ci * the MMIO SPTE mask, e.g. to ensure there's no overlap with the MMIO 37962306a36Sopenharmony_ci * generation. 38062306a36Sopenharmony_ci */ 38162306a36Sopenharmony_ci if (WARN_ON(mmio_mask & ~SPTE_MMIO_ALLOWED_MASK)) 38262306a36Sopenharmony_ci mmio_value = 0; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci /* 38562306a36Sopenharmony_ci * Disable MMIO caching if the MMIO value collides with the bits that 38662306a36Sopenharmony_ci * are used to hold the relocated GFN when the L1TF mitigation is 38762306a36Sopenharmony_ci * enabled. This should never fire as there is no known hardware that 38862306a36Sopenharmony_ci * can trigger this condition, e.g. SME/SEV CPUs that require a custom 38962306a36Sopenharmony_ci * MMIO value are not susceptible to L1TF. 39062306a36Sopenharmony_ci */ 39162306a36Sopenharmony_ci if (WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << 39262306a36Sopenharmony_ci SHADOW_NONPRESENT_OR_RSVD_MASK_LEN))) 39362306a36Sopenharmony_ci mmio_value = 0; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci /* 39662306a36Sopenharmony_ci * The masked MMIO value must obviously match itself and a removed SPTE 39762306a36Sopenharmony_ci * must not get a false positive. Removed SPTEs and MMIO SPTEs should 39862306a36Sopenharmony_ci * never collide as MMIO must set some RWX bits, and removed SPTEs must 39962306a36Sopenharmony_ci * not set any RWX bits. 40062306a36Sopenharmony_ci */ 40162306a36Sopenharmony_ci if (WARN_ON((mmio_value & mmio_mask) != mmio_value) || 40262306a36Sopenharmony_ci WARN_ON(mmio_value && (REMOVED_SPTE & mmio_mask) == mmio_value)) 40362306a36Sopenharmony_ci mmio_value = 0; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci if (!mmio_value) 40662306a36Sopenharmony_ci enable_mmio_caching = false; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci shadow_mmio_value = mmio_value; 40962306a36Sopenharmony_ci shadow_mmio_mask = mmio_mask; 41062306a36Sopenharmony_ci shadow_mmio_access_mask = access_mask; 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_civoid kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask) 41562306a36Sopenharmony_ci{ 41662306a36Sopenharmony_ci /* shadow_me_value must be a subset of shadow_me_mask */ 41762306a36Sopenharmony_ci if (WARN_ON(me_value & ~me_mask)) 41862306a36Sopenharmony_ci me_value = me_mask = 0; 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci shadow_me_value = me_value; 42162306a36Sopenharmony_ci shadow_me_mask = me_mask; 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_civoid kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) 42662306a36Sopenharmony_ci{ 42762306a36Sopenharmony_ci shadow_user_mask = VMX_EPT_READABLE_MASK; 42862306a36Sopenharmony_ci shadow_accessed_mask = has_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull; 42962306a36Sopenharmony_ci shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull; 43062306a36Sopenharmony_ci shadow_nx_mask = 0ull; 43162306a36Sopenharmony_ci shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; 43262306a36Sopenharmony_ci shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK; 43362306a36Sopenharmony_ci /* 43462306a36Sopenharmony_ci * EPT overrides the host MTRRs, and so KVM must program the desired 43562306a36Sopenharmony_ci * memtype directly into the SPTEs. Note, this mask is just the mask 43662306a36Sopenharmony_ci * of all bits that factor into the memtype, the actual memtype must be 43762306a36Sopenharmony_ci * dynamically calculated, e.g. to ensure host MMIO is mapped UC. 43862306a36Sopenharmony_ci */ 43962306a36Sopenharmony_ci shadow_memtype_mask = VMX_EPT_MT_MASK | VMX_EPT_IPAT_BIT; 44062306a36Sopenharmony_ci shadow_acc_track_mask = VMX_EPT_RWX_MASK; 44162306a36Sopenharmony_ci shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE; 44262306a36Sopenharmony_ci shadow_mmu_writable_mask = EPT_SPTE_MMU_WRITABLE; 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci /* 44562306a36Sopenharmony_ci * EPT Misconfigurations are generated if the value of bits 2:0 44662306a36Sopenharmony_ci * of an EPT paging-structure entry is 110b (write/execute). 44762306a36Sopenharmony_ci */ 44862306a36Sopenharmony_ci kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 44962306a36Sopenharmony_ci VMX_EPT_RWX_MASK, 0); 45062306a36Sopenharmony_ci} 45162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks); 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_civoid kvm_mmu_reset_all_pte_masks(void) 45462306a36Sopenharmony_ci{ 45562306a36Sopenharmony_ci u8 low_phys_bits; 45662306a36Sopenharmony_ci u64 mask; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci shadow_phys_bits = kvm_get_shadow_phys_bits(); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci /* 46162306a36Sopenharmony_ci * If the CPU has 46 or less physical address bits, then set an 46262306a36Sopenharmony_ci * appropriate mask to guard against L1TF attacks. Otherwise, it is 46362306a36Sopenharmony_ci * assumed that the CPU is not vulnerable to L1TF. 46462306a36Sopenharmony_ci * 46562306a36Sopenharmony_ci * Some Intel CPUs address the L1 cache using more PA bits than are 46662306a36Sopenharmony_ci * reported by CPUID. Use the PA width of the L1 cache when possible 46762306a36Sopenharmony_ci * to achieve more effective mitigation, e.g. if system RAM overlaps 46862306a36Sopenharmony_ci * the most significant bits of legal physical address space. 46962306a36Sopenharmony_ci */ 47062306a36Sopenharmony_ci shadow_nonpresent_or_rsvd_mask = 0; 47162306a36Sopenharmony_ci low_phys_bits = boot_cpu_data.x86_phys_bits; 47262306a36Sopenharmony_ci if (boot_cpu_has_bug(X86_BUG_L1TF) && 47362306a36Sopenharmony_ci !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= 47462306a36Sopenharmony_ci 52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) { 47562306a36Sopenharmony_ci low_phys_bits = boot_cpu_data.x86_cache_bits 47662306a36Sopenharmony_ci - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; 47762306a36Sopenharmony_ci shadow_nonpresent_or_rsvd_mask = 47862306a36Sopenharmony_ci rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); 47962306a36Sopenharmony_ci } 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci shadow_nonpresent_or_rsvd_lower_gfn_mask = 48262306a36Sopenharmony_ci GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci shadow_user_mask = PT_USER_MASK; 48562306a36Sopenharmony_ci shadow_accessed_mask = PT_ACCESSED_MASK; 48662306a36Sopenharmony_ci shadow_dirty_mask = PT_DIRTY_MASK; 48762306a36Sopenharmony_ci shadow_nx_mask = PT64_NX_MASK; 48862306a36Sopenharmony_ci shadow_x_mask = 0; 48962306a36Sopenharmony_ci shadow_present_mask = PT_PRESENT_MASK; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci /* 49262306a36Sopenharmony_ci * For shadow paging and NPT, KVM uses PAT entry '0' to encode WB 49362306a36Sopenharmony_ci * memtype in the SPTEs, i.e. relies on host MTRRs to provide the 49462306a36Sopenharmony_ci * correct memtype (WB is the "weakest" memtype). 49562306a36Sopenharmony_ci */ 49662306a36Sopenharmony_ci shadow_memtype_mask = 0; 49762306a36Sopenharmony_ci shadow_acc_track_mask = 0; 49862306a36Sopenharmony_ci shadow_me_mask = 0; 49962306a36Sopenharmony_ci shadow_me_value = 0; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci shadow_host_writable_mask = DEFAULT_SPTE_HOST_WRITABLE; 50262306a36Sopenharmony_ci shadow_mmu_writable_mask = DEFAULT_SPTE_MMU_WRITABLE; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* 50562306a36Sopenharmony_ci * Set a reserved PA bit in MMIO SPTEs to generate page faults with 50662306a36Sopenharmony_ci * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT 50762306a36Sopenharmony_ci * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports 50862306a36Sopenharmony_ci * 52-bit physical addresses then there are no reserved PA bits in the 50962306a36Sopenharmony_ci * PTEs and so the reserved PA approach must be disabled. 51062306a36Sopenharmony_ci */ 51162306a36Sopenharmony_ci if (shadow_phys_bits < 52) 51262306a36Sopenharmony_ci mask = BIT_ULL(51) | PT_PRESENT_MASK; 51362306a36Sopenharmony_ci else 51462306a36Sopenharmony_ci mask = 0; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); 51762306a36Sopenharmony_ci} 518