1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2012 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7#include <linux/arm-smccc.h> 8#include <linux/preempt.h> 9#include <linux/kvm_host.h> 10#include <linux/uaccess.h> 11#include <linux/wait.h> 12 13#include <asm/cputype.h> 14#include <asm/kvm_emulate.h> 15 16#include <kvm/arm_psci.h> 17#include <kvm/arm_hypercalls.h> 18 19/* 20 * This is an implementation of the Power State Coordination Interface 21 * as described in ARM document number ARM DEN 0022A. 22 */ 23 24#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) 25 26static unsigned long psci_affinity_mask(unsigned long affinity_level) 27{ 28 if (affinity_level <= 3) 29 return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); 30 31 return 0; 32} 33 34static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) 35{ 36 /* 37 * NOTE: For simplicity, we make VCPU suspend emulation to be 38 * same-as WFI (Wait-for-interrupt) emulation. 39 * 40 * This means for KVM the wakeup events are interrupts and 41 * this is consistent with intended use of StateID as described 42 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). 43 * 44 * Further, we also treat power-down request to be same as 45 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 46 * specification (ARM DEN 0022A). This means all suspend states 47 * for KVM will preserve the register state. 48 */ 49 kvm_vcpu_block(vcpu); 50 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 51 52 return PSCI_RET_SUCCESS; 53} 54 55static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) 56{ 57 vcpu->arch.power_off = true; 58 kvm_make_request(KVM_REQ_SLEEP, vcpu); 59 kvm_vcpu_kick(vcpu); 60} 61 62static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 63{ 64 struct vcpu_reset_state *reset_state; 65 struct kvm *kvm = source_vcpu->kvm; 66 struct kvm_vcpu *vcpu = NULL; 67 unsigned long cpu_id; 68 69 cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; 70 if (vcpu_mode_is_32bit(source_vcpu)) 71 cpu_id &= ~((u32) 0); 72 73 vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); 74 75 /* 76 * Make sure the caller requested a valid CPU and that the CPU is 77 * turned off. 78 */ 79 if (!vcpu) 80 return PSCI_RET_INVALID_PARAMS; 81 if (!vcpu->arch.power_off) { 82 if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) 83 return PSCI_RET_ALREADY_ON; 84 else 85 return PSCI_RET_INVALID_PARAMS; 86 } 87 88 reset_state = &vcpu->arch.reset_state; 89 90 reset_state->pc = smccc_get_arg2(source_vcpu); 91 92 /* Propagate caller endianness */ 93 reset_state->be = kvm_vcpu_is_be(source_vcpu); 94 95 /* 96 * NOTE: We always update r0 (or x0) because for PSCI v0.1 97 * the general purpose registers are undefined upon CPU_ON. 98 */ 99 reset_state->r0 = smccc_get_arg3(source_vcpu); 100 101 WRITE_ONCE(reset_state->reset, true); 102 kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); 103 104 /* 105 * Make sure the reset request is observed if the change to 106 * power_state is observed. 107 */ 108 smp_wmb(); 109 110 vcpu->arch.power_off = false; 111 kvm_vcpu_wake_up(vcpu); 112 113 return PSCI_RET_SUCCESS; 114} 115 116static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) 117{ 118 int i, matching_cpus = 0; 119 unsigned long mpidr; 120 unsigned long target_affinity; 121 unsigned long target_affinity_mask; 122 unsigned long lowest_affinity_level; 123 struct kvm *kvm = vcpu->kvm; 124 struct kvm_vcpu *tmp; 125 126 target_affinity = smccc_get_arg1(vcpu); 127 lowest_affinity_level = smccc_get_arg2(vcpu); 128 129 /* Determine target affinity mask */ 130 target_affinity_mask = psci_affinity_mask(lowest_affinity_level); 131 if (!target_affinity_mask) 132 return PSCI_RET_INVALID_PARAMS; 133 134 /* Ignore other bits of target affinity */ 135 target_affinity &= target_affinity_mask; 136 137 /* 138 * If one or more VCPU matching target affinity are running 139 * then ON else OFF 140 */ 141 kvm_for_each_vcpu(i, tmp, kvm) { 142 mpidr = kvm_vcpu_get_mpidr_aff(tmp); 143 if ((mpidr & target_affinity_mask) == target_affinity) { 144 matching_cpus++; 145 if (!tmp->arch.power_off) 146 return PSCI_0_2_AFFINITY_LEVEL_ON; 147 } 148 } 149 150 if (!matching_cpus) 151 return PSCI_RET_INVALID_PARAMS; 152 153 return PSCI_0_2_AFFINITY_LEVEL_OFF; 154} 155 156static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) 157{ 158 int i; 159 struct kvm_vcpu *tmp; 160 161 /* 162 * The KVM ABI specifies that a system event exit may call KVM_RUN 163 * again and may perform shutdown/reboot at a later time that when the 164 * actual request is made. Since we are implementing PSCI and a 165 * caller of PSCI reboot and shutdown expects that the system shuts 166 * down or reboots immediately, let's make sure that VCPUs are not run 167 * after this call is handled and before the VCPUs have been 168 * re-initialized. 169 */ 170 kvm_for_each_vcpu(i, tmp, vcpu->kvm) 171 tmp->arch.power_off = true; 172 kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); 173 174 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 175 vcpu->run->system_event.type = type; 176 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 177} 178 179static void kvm_psci_system_off(struct kvm_vcpu *vcpu) 180{ 181 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); 182} 183 184static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) 185{ 186 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); 187} 188 189static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) 190{ 191 int i; 192 193 /* 194 * Zero the input registers' upper 32 bits. They will be fully 195 * zeroed on exit, so we're fine changing them in place. 196 */ 197 for (i = 1; i < 4; i++) 198 vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); 199} 200 201static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) 202{ 203 switch(fn) { 204 case PSCI_0_2_FN64_CPU_SUSPEND: 205 case PSCI_0_2_FN64_CPU_ON: 206 case PSCI_0_2_FN64_AFFINITY_INFO: 207 /* Disallow these functions for 32bit guests */ 208 if (vcpu_mode_is_32bit(vcpu)) 209 return PSCI_RET_NOT_SUPPORTED; 210 break; 211 } 212 213 return 0; 214} 215 216static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) 217{ 218 struct kvm *kvm = vcpu->kvm; 219 u32 psci_fn = smccc_get_function(vcpu); 220 unsigned long val; 221 int ret = 1; 222 223 val = kvm_psci_check_allowed_function(vcpu, psci_fn); 224 if (val) 225 goto out; 226 227 switch (psci_fn) { 228 case PSCI_0_2_FN_PSCI_VERSION: 229 /* 230 * Bits[31:16] = Major Version = 0 231 * Bits[15:0] = Minor Version = 2 232 */ 233 val = KVM_ARM_PSCI_0_2; 234 break; 235 case PSCI_0_2_FN_CPU_SUSPEND: 236 case PSCI_0_2_FN64_CPU_SUSPEND: 237 val = kvm_psci_vcpu_suspend(vcpu); 238 break; 239 case PSCI_0_2_FN_CPU_OFF: 240 kvm_psci_vcpu_off(vcpu); 241 val = PSCI_RET_SUCCESS; 242 break; 243 case PSCI_0_2_FN_CPU_ON: 244 kvm_psci_narrow_to_32bit(vcpu); 245 fallthrough; 246 case PSCI_0_2_FN64_CPU_ON: 247 mutex_lock(&kvm->lock); 248 val = kvm_psci_vcpu_on(vcpu); 249 mutex_unlock(&kvm->lock); 250 break; 251 case PSCI_0_2_FN_AFFINITY_INFO: 252 kvm_psci_narrow_to_32bit(vcpu); 253 fallthrough; 254 case PSCI_0_2_FN64_AFFINITY_INFO: 255 val = kvm_psci_vcpu_affinity_info(vcpu); 256 break; 257 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 258 /* 259 * Trusted OS is MP hence does not require migration 260 * or 261 * Trusted OS is not present 262 */ 263 val = PSCI_0_2_TOS_MP; 264 break; 265 case PSCI_0_2_FN_SYSTEM_OFF: 266 kvm_psci_system_off(vcpu); 267 /* 268 * We shouldn't be going back to guest VCPU after 269 * receiving SYSTEM_OFF request. 270 * 271 * If user space accidentally/deliberately resumes 272 * guest VCPU after SYSTEM_OFF request then guest 273 * VCPU should see internal failure from PSCI return 274 * value. To achieve this, we preload r0 (or x0) with 275 * PSCI return value INTERNAL_FAILURE. 276 */ 277 val = PSCI_RET_INTERNAL_FAILURE; 278 ret = 0; 279 break; 280 case PSCI_0_2_FN_SYSTEM_RESET: 281 kvm_psci_system_reset(vcpu); 282 /* 283 * Same reason as SYSTEM_OFF for preloading r0 (or x0) 284 * with PSCI return value INTERNAL_FAILURE. 285 */ 286 val = PSCI_RET_INTERNAL_FAILURE; 287 ret = 0; 288 break; 289 default: 290 val = PSCI_RET_NOT_SUPPORTED; 291 break; 292 } 293 294out: 295 smccc_set_retval(vcpu, val, 0, 0, 0); 296 return ret; 297} 298 299static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) 300{ 301 u32 psci_fn = smccc_get_function(vcpu); 302 u32 feature; 303 unsigned long val; 304 int ret = 1; 305 306 switch(psci_fn) { 307 case PSCI_0_2_FN_PSCI_VERSION: 308 val = KVM_ARM_PSCI_1_0; 309 break; 310 case PSCI_1_0_FN_PSCI_FEATURES: 311 feature = smccc_get_arg1(vcpu); 312 val = kvm_psci_check_allowed_function(vcpu, feature); 313 if (val) 314 break; 315 316 switch(feature) { 317 case PSCI_0_2_FN_PSCI_VERSION: 318 case PSCI_0_2_FN_CPU_SUSPEND: 319 case PSCI_0_2_FN64_CPU_SUSPEND: 320 case PSCI_0_2_FN_CPU_OFF: 321 case PSCI_0_2_FN_CPU_ON: 322 case PSCI_0_2_FN64_CPU_ON: 323 case PSCI_0_2_FN_AFFINITY_INFO: 324 case PSCI_0_2_FN64_AFFINITY_INFO: 325 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 326 case PSCI_0_2_FN_SYSTEM_OFF: 327 case PSCI_0_2_FN_SYSTEM_RESET: 328 case PSCI_1_0_FN_PSCI_FEATURES: 329 case ARM_SMCCC_VERSION_FUNC_ID: 330 val = 0; 331 break; 332 default: 333 val = PSCI_RET_NOT_SUPPORTED; 334 break; 335 } 336 break; 337 default: 338 return kvm_psci_0_2_call(vcpu); 339 } 340 341 smccc_set_retval(vcpu, val, 0, 0, 0); 342 return ret; 343} 344 345static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) 346{ 347 struct kvm *kvm = vcpu->kvm; 348 u32 psci_fn = smccc_get_function(vcpu); 349 unsigned long val; 350 351 switch (psci_fn) { 352 case KVM_PSCI_FN_CPU_OFF: 353 kvm_psci_vcpu_off(vcpu); 354 val = PSCI_RET_SUCCESS; 355 break; 356 case KVM_PSCI_FN_CPU_ON: 357 mutex_lock(&kvm->lock); 358 val = kvm_psci_vcpu_on(vcpu); 359 mutex_unlock(&kvm->lock); 360 break; 361 default: 362 val = PSCI_RET_NOT_SUPPORTED; 363 break; 364 } 365 366 smccc_set_retval(vcpu, val, 0, 0, 0); 367 return 1; 368} 369 370/** 371 * kvm_psci_call - handle PSCI call if r0 value is in range 372 * @vcpu: Pointer to the VCPU struct 373 * 374 * Handle PSCI calls from guests through traps from HVC instructions. 375 * The calling convention is similar to SMC calls to the secure world 376 * where the function number is placed in r0. 377 * 378 * This function returns: > 0 (success), 0 (success but exit to user 379 * space), and < 0 (errors) 380 * 381 * Errors: 382 * -EINVAL: Unrecognized PSCI function 383 */ 384int kvm_psci_call(struct kvm_vcpu *vcpu) 385{ 386 switch (kvm_psci_version(vcpu, vcpu->kvm)) { 387 case KVM_ARM_PSCI_1_0: 388 return kvm_psci_1_0_call(vcpu); 389 case KVM_ARM_PSCI_0_2: 390 return kvm_psci_0_2_call(vcpu); 391 case KVM_ARM_PSCI_0_1: 392 return kvm_psci_0_1_call(vcpu); 393 default: 394 return -EINVAL; 395 }; 396} 397 398int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) 399{ 400 return 4; /* PSCI version and three workaround registers */ 401} 402 403int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) 404{ 405 if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) 406 return -EFAULT; 407 408 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) 409 return -EFAULT; 410 411 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) 412 return -EFAULT; 413 414 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) 415 return -EFAULT; 416 417 return 0; 418} 419 420#define KVM_REG_FEATURE_LEVEL_WIDTH 4 421#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) 422 423/* 424 * Convert the workaround level into an easy-to-compare number, where higher 425 * values mean better protection. 426 */ 427static int get_kernel_wa_level(u64 regid) 428{ 429 switch (regid) { 430 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 431 switch (arm64_get_spectre_v2_state()) { 432 case SPECTRE_VULNERABLE: 433 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 434 case SPECTRE_MITIGATED: 435 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; 436 case SPECTRE_UNAFFECTED: 437 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; 438 } 439 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 440 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 441 switch (arm64_get_spectre_v4_state()) { 442 case SPECTRE_MITIGATED: 443 /* 444 * As for the hypercall discovery, we pretend we 445 * don't have any FW mitigation if SSBS is there at 446 * all times. 447 */ 448 if (cpus_have_final_cap(ARM64_SSBS)) 449 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 450 fallthrough; 451 case SPECTRE_UNAFFECTED: 452 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 453 case SPECTRE_VULNERABLE: 454 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 455 } 456 break; 457 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: 458 switch (arm64_get_spectre_bhb_state()) { 459 case SPECTRE_VULNERABLE: 460 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; 461 case SPECTRE_MITIGATED: 462 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; 463 case SPECTRE_UNAFFECTED: 464 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; 465 } 466 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; 467 } 468 469 return -EINVAL; 470} 471 472int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 473{ 474 void __user *uaddr = (void __user *)(long)reg->addr; 475 u64 val; 476 477 switch (reg->id) { 478 case KVM_REG_ARM_PSCI_VERSION: 479 val = kvm_psci_version(vcpu, vcpu->kvm); 480 break; 481 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 482 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 483 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: 484 val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; 485 break; 486 default: 487 return -ENOENT; 488 } 489 490 if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) 491 return -EFAULT; 492 493 return 0; 494} 495 496int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 497{ 498 void __user *uaddr = (void __user *)(long)reg->addr; 499 u64 val; 500 int wa_level; 501 502 if (KVM_REG_SIZE(reg->id) != sizeof(val)) 503 return -ENOENT; 504 if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) 505 return -EFAULT; 506 507 switch (reg->id) { 508 case KVM_REG_ARM_PSCI_VERSION: 509 { 510 bool wants_02; 511 512 wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); 513 514 switch (val) { 515 case KVM_ARM_PSCI_0_1: 516 if (wants_02) 517 return -EINVAL; 518 vcpu->kvm->arch.psci_version = val; 519 return 0; 520 case KVM_ARM_PSCI_0_2: 521 case KVM_ARM_PSCI_1_0: 522 if (!wants_02) 523 return -EINVAL; 524 vcpu->kvm->arch.psci_version = val; 525 return 0; 526 } 527 break; 528 } 529 530 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 531 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: 532 if (val & ~KVM_REG_FEATURE_LEVEL_MASK) 533 return -EINVAL; 534 535 if (get_kernel_wa_level(reg->id) < val) 536 return -EINVAL; 537 538 return 0; 539 540 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 541 if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | 542 KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) 543 return -EINVAL; 544 545 /* The enabled bit must not be set unless the level is AVAIL. */ 546 if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && 547 (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) 548 return -EINVAL; 549 550 /* 551 * Map all the possible incoming states to the only two we 552 * really want to deal with. 553 */ 554 switch (val & KVM_REG_FEATURE_LEVEL_MASK) { 555 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: 556 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: 557 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 558 break; 559 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: 560 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: 561 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 562 break; 563 default: 564 return -EINVAL; 565 } 566 567 /* 568 * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the 569 * other way around. 570 */ 571 if (get_kernel_wa_level(reg->id) < wa_level) 572 return -EINVAL; 573 574 return 0; 575 default: 576 return -ENOENT; 577 } 578 579 return -EINVAL; 580} 581