1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * mmu_audit.c: 4 * 5 * Audit code for KVM MMU 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * Avi Kivity <avi@qumranet.com> 13 * Marcelo Tosatti <mtosatti@redhat.com> 14 * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> 15 */ 16 17#include <linux/ratelimit.h> 18 19static char const *audit_point_name[] = { 20 "pre page fault", 21 "post page fault", 22 "pre pte write", 23 "post pte write", 24 "pre sync", 25 "post sync" 26}; 27 28#define audit_printk(kvm, fmt, args...) \ 29 printk(KERN_ERR "audit: (%s) error: " \ 30 fmt, audit_point_name[kvm->arch.audit_point], ##args) 31 32typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); 33 34static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 35 inspect_spte_fn fn, int level) 36{ 37 int i; 38 39 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 40 u64 *ent = sp->spt; 41 42 fn(vcpu, ent + i, level); 43 44 if (is_shadow_present_pte(ent[i]) && 45 !is_last_spte(ent[i], level)) { 46 struct kvm_mmu_page *child; 47 48 child = to_shadow_page(ent[i] & PT64_BASE_ADDR_MASK); 49 __mmu_spte_walk(vcpu, child, fn, level - 1); 50 } 51 } 52} 53 54static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) 55{ 56 int i; 57 struct kvm_mmu_page *sp; 58 59 if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) 60 return; 61 62 if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { 63 hpa_t root = vcpu->arch.mmu->root_hpa; 64 65 sp = to_shadow_page(root); 66 __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level); 67 return; 68 } 69 70 for (i = 0; i < 4; ++i) { 71 hpa_t root = vcpu->arch.mmu->pae_root[i]; 72 73 if (root && VALID_PAGE(root)) { 74 root &= PT64_BASE_ADDR_MASK; 75 sp = to_shadow_page(root); 76 __mmu_spte_walk(vcpu, sp, fn, 2); 77 } 78 } 79 80 return; 81} 82 83typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp); 84 85static void walk_all_active_sps(struct kvm *kvm, sp_handler fn) 86{ 87 struct kvm_mmu_page *sp; 88 89 list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) 90 fn(kvm, sp); 91} 92 93static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) 94{ 95 struct kvm_mmu_page *sp; 96 gfn_t gfn; 97 kvm_pfn_t pfn; 98 hpa_t hpa; 99 100 sp = sptep_to_sp(sptep); 101 102 if (sp->unsync) { 103 if (level != PG_LEVEL_4K) { 104 audit_printk(vcpu->kvm, "unsync sp: %p " 105 "level = %d\n", sp, level); 106 return; 107 } 108 } 109 110 if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level)) 111 return; 112 113 gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); 114 pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn); 115 116 if (is_error_pfn(pfn)) 117 return; 118 119 hpa = pfn << PAGE_SHIFT; 120 if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) 121 audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " 122 "ent %llxn", vcpu->arch.mmu->root_level, pfn, 123 hpa, *sptep); 124} 125 126static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) 127{ 128 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); 129 struct kvm_rmap_head *rmap_head; 130 struct kvm_mmu_page *rev_sp; 131 struct kvm_memslots *slots; 132 struct kvm_memory_slot *slot; 133 gfn_t gfn; 134 135 rev_sp = sptep_to_sp(sptep); 136 gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); 137 138 slots = kvm_memslots_for_spte_role(kvm, rev_sp->role); 139 slot = __gfn_to_memslot(slots, gfn); 140 if (!slot) { 141 if (!__ratelimit(&ratelimit_state)) 142 return; 143 audit_printk(kvm, "no memslot for gfn %llx\n", gfn); 144 audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", 145 (long int)(sptep - rev_sp->spt), rev_sp->gfn); 146 dump_stack(); 147 return; 148 } 149 150 rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); 151 if (!rmap_head->val) { 152 if (!__ratelimit(&ratelimit_state)) 153 return; 154 audit_printk(kvm, "no rmap for writable spte %llx\n", 155 *sptep); 156 dump_stack(); 157 } 158} 159 160static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level) 161{ 162 if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level)) 163 inspect_spte_has_rmap(vcpu->kvm, sptep); 164} 165 166static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level) 167{ 168 struct kvm_mmu_page *sp = sptep_to_sp(sptep); 169 170 if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync) 171 audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync " 172 "root.\n", sp); 173} 174 175static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) 176{ 177 int i; 178 179 if (sp->role.level != PG_LEVEL_4K) 180 return; 181 182 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 183 if (!is_shadow_present_pte(sp->spt[i])) 184 continue; 185 186 inspect_spte_has_rmap(kvm, sp->spt + i); 187 } 188} 189 190static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) 191{ 192 struct kvm_rmap_head *rmap_head; 193 u64 *sptep; 194 struct rmap_iterator iter; 195 struct kvm_memslots *slots; 196 struct kvm_memory_slot *slot; 197 198 if (sp->role.direct || sp->unsync || sp->role.invalid) 199 return; 200 201 slots = kvm_memslots_for_spte_role(kvm, sp->role); 202 slot = __gfn_to_memslot(slots, sp->gfn); 203 rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); 204 205 for_each_rmap_spte(rmap_head, &iter, sptep) { 206 if (is_writable_pte(*sptep)) 207 audit_printk(kvm, "shadow page has writable " 208 "mappings: gfn %llx role %x\n", 209 sp->gfn, sp->role.word); 210 } 211} 212 213static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) 214{ 215 check_mappings_rmap(kvm, sp); 216 audit_write_protection(kvm, sp); 217} 218 219static void audit_all_active_sps(struct kvm *kvm) 220{ 221 walk_all_active_sps(kvm, audit_sp); 222} 223 224static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level) 225{ 226 audit_sptes_have_rmaps(vcpu, sptep, level); 227 audit_mappings(vcpu, sptep, level); 228 audit_spte_after_sync(vcpu, sptep, level); 229} 230 231static void audit_vcpu_spte(struct kvm_vcpu *vcpu) 232{ 233 mmu_spte_walk(vcpu, audit_spte); 234} 235 236static bool mmu_audit; 237static struct static_key mmu_audit_key; 238 239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 240{ 241 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); 242 243 if (!__ratelimit(&ratelimit_state)) 244 return; 245 246 vcpu->kvm->arch.audit_point = point; 247 audit_all_active_sps(vcpu->kvm); 248 audit_vcpu_spte(vcpu); 249} 250 251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 252{ 253 if (static_key_false((&mmu_audit_key))) 254 __kvm_mmu_audit(vcpu, point); 255} 256 257static void mmu_audit_enable(void) 258{ 259 if (mmu_audit) 260 return; 261 262 static_key_slow_inc(&mmu_audit_key); 263 mmu_audit = true; 264} 265 266static void mmu_audit_disable(void) 267{ 268 if (!mmu_audit) 269 return; 270 271 static_key_slow_dec(&mmu_audit_key); 272 mmu_audit = false; 273} 274 275static int mmu_audit_set(const char *val, const struct kernel_param *kp) 276{ 277 int ret; 278 unsigned long enable; 279 280 ret = kstrtoul(val, 10, &enable); 281 if (ret < 0) 282 return -EINVAL; 283 284 switch (enable) { 285 case 0: 286 mmu_audit_disable(); 287 break; 288 case 1: 289 mmu_audit_enable(); 290 break; 291 default: 292 return -EINVAL; 293 } 294 295 return 0; 296} 297 298static const struct kernel_param_ops audit_param_ops = { 299 .set = mmu_audit_set, 300 .get = param_get_bool, 301}; 302 303arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); 304