162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Core of Xen paravirt_ops implementation. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This file contains the xen_paravirt_ops structure itself, and the 662306a36Sopenharmony_ci * implementations for: 762306a36Sopenharmony_ci * - privileged instructions 862306a36Sopenharmony_ci * - interrupt flags 962306a36Sopenharmony_ci * - segment operations 1062306a36Sopenharmony_ci * - booting and setup 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 1362306a36Sopenharmony_ci */ 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include <linux/cpu.h> 1662306a36Sopenharmony_ci#include <linux/kernel.h> 1762306a36Sopenharmony_ci#include <linux/init.h> 1862306a36Sopenharmony_ci#include <linux/smp.h> 1962306a36Sopenharmony_ci#include <linux/preempt.h> 2062306a36Sopenharmony_ci#include <linux/hardirq.h> 2162306a36Sopenharmony_ci#include <linux/percpu.h> 2262306a36Sopenharmony_ci#include <linux/delay.h> 2362306a36Sopenharmony_ci#include <linux/start_kernel.h> 2462306a36Sopenharmony_ci#include <linux/sched.h> 2562306a36Sopenharmony_ci#include <linux/kprobes.h> 2662306a36Sopenharmony_ci#include <linux/kstrtox.h> 2762306a36Sopenharmony_ci#include <linux/memblock.h> 2862306a36Sopenharmony_ci#include <linux/export.h> 2962306a36Sopenharmony_ci#include <linux/mm.h> 3062306a36Sopenharmony_ci#include <linux/page-flags.h> 3162306a36Sopenharmony_ci#include <linux/pci.h> 3262306a36Sopenharmony_ci#include <linux/gfp.h> 3362306a36Sopenharmony_ci#include <linux/edd.h> 3462306a36Sopenharmony_ci#include <linux/reboot.h> 3562306a36Sopenharmony_ci#include <linux/virtio_anchor.h> 3662306a36Sopenharmony_ci#include <linux/stackprotector.h> 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#include <xen/xen.h> 3962306a36Sopenharmony_ci#include <xen/events.h> 4062306a36Sopenharmony_ci#include <xen/interface/xen.h> 4162306a36Sopenharmony_ci#include <xen/interface/version.h> 4262306a36Sopenharmony_ci#include <xen/interface/physdev.h> 4362306a36Sopenharmony_ci#include <xen/interface/vcpu.h> 4462306a36Sopenharmony_ci#include <xen/interface/memory.h> 4562306a36Sopenharmony_ci#include <xen/interface/nmi.h> 4662306a36Sopenharmony_ci#include <xen/interface/xen-mca.h> 4762306a36Sopenharmony_ci#include <xen/features.h> 4862306a36Sopenharmony_ci#include <xen/page.h> 4962306a36Sopenharmony_ci#include <xen/hvc-console.h> 5062306a36Sopenharmony_ci#include <xen/acpi.h> 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#include <asm/paravirt.h> 5362306a36Sopenharmony_ci#include <asm/apic.h> 5462306a36Sopenharmony_ci#include <asm/page.h> 5562306a36Sopenharmony_ci#include <asm/xen/pci.h> 5662306a36Sopenharmony_ci#include <asm/xen/hypercall.h> 5762306a36Sopenharmony_ci#include <asm/xen/hypervisor.h> 5862306a36Sopenharmony_ci#include <asm/xen/cpuid.h> 5962306a36Sopenharmony_ci#include <asm/fixmap.h> 6062306a36Sopenharmony_ci#include <asm/processor.h> 6162306a36Sopenharmony_ci#include <asm/proto.h> 6262306a36Sopenharmony_ci#include <asm/msr-index.h> 6362306a36Sopenharmony_ci#include <asm/traps.h> 6462306a36Sopenharmony_ci#include <asm/setup.h> 6562306a36Sopenharmony_ci#include <asm/desc.h> 6662306a36Sopenharmony_ci#include <asm/pgalloc.h> 6762306a36Sopenharmony_ci#include <asm/tlbflush.h> 6862306a36Sopenharmony_ci#include <asm/reboot.h> 6962306a36Sopenharmony_ci#include <asm/hypervisor.h> 7062306a36Sopenharmony_ci#include <asm/mach_traps.h> 7162306a36Sopenharmony_ci#include <asm/mtrr.h> 7262306a36Sopenharmony_ci#include <asm/mwait.h> 7362306a36Sopenharmony_ci#include <asm/pci_x86.h> 7462306a36Sopenharmony_ci#include <asm/cpu.h> 7562306a36Sopenharmony_ci#ifdef CONFIG_X86_IOPL_IOPERM 7662306a36Sopenharmony_ci#include <asm/io_bitmap.h> 7762306a36Sopenharmony_ci#endif 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci#ifdef CONFIG_ACPI 8062306a36Sopenharmony_ci#include <linux/acpi.h> 8162306a36Sopenharmony_ci#include <asm/acpi.h> 8262306a36Sopenharmony_ci#include <acpi/proc_cap_intel.h> 8362306a36Sopenharmony_ci#include <acpi/processor.h> 8462306a36Sopenharmony_ci#include <xen/interface/platform.h> 8562306a36Sopenharmony_ci#endif 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci#include "xen-ops.h" 8862306a36Sopenharmony_ci#include "mmu.h" 8962306a36Sopenharmony_ci#include "smp.h" 9062306a36Sopenharmony_ci#include "multicalls.h" 9162306a36Sopenharmony_ci#include "pmu.h" 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */ 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_civoid *xen_initial_gdt; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_cistatic int xen_cpu_up_prepare_pv(unsigned int cpu); 9862306a36Sopenharmony_cistatic int xen_cpu_dead_pv(unsigned int cpu); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistruct tls_descs { 10162306a36Sopenharmony_ci struct desc_struct desc[3]; 10262306a36Sopenharmony_ci}; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ciDEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; 10562306a36Sopenharmony_ciDEFINE_PER_CPU(unsigned int, xen_lazy_nesting); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_cienum xen_lazy_mode xen_get_lazy_mode(void) 10862306a36Sopenharmony_ci{ 10962306a36Sopenharmony_ci if (in_interrupt()) 11062306a36Sopenharmony_ci return XEN_LAZY_NONE; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci return this_cpu_read(xen_lazy_mode); 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci/* 11662306a36Sopenharmony_ci * Updating the 3 TLS descriptors in the GDT on every task switch is 11762306a36Sopenharmony_ci * surprisingly expensive so we avoid updating them if they haven't 11862306a36Sopenharmony_ci * changed. Since Xen writes different descriptors than the one 11962306a36Sopenharmony_ci * passed in the update_descriptor hypercall we keep shadow copies to 12062306a36Sopenharmony_ci * compare against. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_cistatic __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistatic int __init parse_xen_msr_safe(char *str) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci if (str) 12962306a36Sopenharmony_ci return kstrtobool(str, &xen_msr_safe); 13062306a36Sopenharmony_ci return -EINVAL; 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ciearly_param("xen_msr_safe", parse_xen_msr_safe); 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci/* Get MTRR settings from Xen and put them into mtrr_state. */ 13562306a36Sopenharmony_cistatic void __init xen_set_mtrr_data(void) 13662306a36Sopenharmony_ci{ 13762306a36Sopenharmony_ci#ifdef CONFIG_MTRR 13862306a36Sopenharmony_ci struct xen_platform_op op = { 13962306a36Sopenharmony_ci .cmd = XENPF_read_memtype, 14062306a36Sopenharmony_ci .interface_version = XENPF_INTERFACE_VERSION, 14162306a36Sopenharmony_ci }; 14262306a36Sopenharmony_ci unsigned int reg; 14362306a36Sopenharmony_ci unsigned long mask; 14462306a36Sopenharmony_ci uint32_t eax, width; 14562306a36Sopenharmony_ci static struct mtrr_var_range var[MTRR_MAX_VAR_RANGES] __initdata; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci /* Get physical address width (only 64-bit cpus supported). */ 14862306a36Sopenharmony_ci width = 36; 14962306a36Sopenharmony_ci eax = cpuid_eax(0x80000000); 15062306a36Sopenharmony_ci if ((eax >> 16) == 0x8000 && eax >= 0x80000008) { 15162306a36Sopenharmony_ci eax = cpuid_eax(0x80000008); 15262306a36Sopenharmony_ci width = eax & 0xff; 15362306a36Sopenharmony_ci } 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci for (reg = 0; reg < MTRR_MAX_VAR_RANGES; reg++) { 15662306a36Sopenharmony_ci op.u.read_memtype.reg = reg; 15762306a36Sopenharmony_ci if (HYPERVISOR_platform_op(&op)) 15862306a36Sopenharmony_ci break; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci /* 16162306a36Sopenharmony_ci * Only called in dom0, which has all RAM PFNs mapped at 16262306a36Sopenharmony_ci * RAM MFNs, and all PCI space etc. is identity mapped. 16362306a36Sopenharmony_ci * This means we can treat MFN == PFN regarding MTRR settings. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci var[reg].base_lo = op.u.read_memtype.type; 16662306a36Sopenharmony_ci var[reg].base_lo |= op.u.read_memtype.mfn << PAGE_SHIFT; 16762306a36Sopenharmony_ci var[reg].base_hi = op.u.read_memtype.mfn >> (32 - PAGE_SHIFT); 16862306a36Sopenharmony_ci mask = ~((op.u.read_memtype.nr_mfns << PAGE_SHIFT) - 1); 16962306a36Sopenharmony_ci mask &= (1UL << width) - 1; 17062306a36Sopenharmony_ci if (mask) 17162306a36Sopenharmony_ci mask |= MTRR_PHYSMASK_V; 17262306a36Sopenharmony_ci var[reg].mask_lo = mask; 17362306a36Sopenharmony_ci var[reg].mask_hi = mask >> 32; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci /* Only overwrite MTRR state if any MTRR could be got from Xen. */ 17762306a36Sopenharmony_ci if (reg) 17862306a36Sopenharmony_ci mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE); 17962306a36Sopenharmony_ci#endif 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_cistatic void __init xen_pv_init_platform(void) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci /* PV guests can't operate virtio devices without grants. */ 18562306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_XEN_VIRTIO)) 18662306a36Sopenharmony_ci virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); 19162306a36Sopenharmony_ci HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */ 19462306a36Sopenharmony_ci xen_vcpu_info_reset(0); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* pvclock is in shared info area */ 19762306a36Sopenharmony_ci xen_init_time_ops(); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci if (xen_initial_domain()) 20062306a36Sopenharmony_ci xen_set_mtrr_data(); 20162306a36Sopenharmony_ci else 20262306a36Sopenharmony_ci mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_cistatic void __init xen_pv_guest_late_init(void) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci#ifndef CONFIG_SMP 20862306a36Sopenharmony_ci /* Setup shared vcpu info for non-smp configurations */ 20962306a36Sopenharmony_ci xen_setup_vcpu_info_placement(); 21062306a36Sopenharmony_ci#endif 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_cistatic __read_mostly unsigned int cpuid_leaf5_ecx_val; 21462306a36Sopenharmony_cistatic __read_mostly unsigned int cpuid_leaf5_edx_val; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_cistatic void xen_cpuid(unsigned int *ax, unsigned int *bx, 21762306a36Sopenharmony_ci unsigned int *cx, unsigned int *dx) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci unsigned maskebx = ~0; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci /* 22262306a36Sopenharmony_ci * Mask out inconvenient features, to try and disable as many 22362306a36Sopenharmony_ci * unsupported kernel subsystems as possible. 22462306a36Sopenharmony_ci */ 22562306a36Sopenharmony_ci switch (*ax) { 22662306a36Sopenharmony_ci case CPUID_MWAIT_LEAF: 22762306a36Sopenharmony_ci /* Synthesize the values.. */ 22862306a36Sopenharmony_ci *ax = 0; 22962306a36Sopenharmony_ci *bx = 0; 23062306a36Sopenharmony_ci *cx = cpuid_leaf5_ecx_val; 23162306a36Sopenharmony_ci *dx = cpuid_leaf5_edx_val; 23262306a36Sopenharmony_ci return; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci case 0xb: 23562306a36Sopenharmony_ci /* Suppress extended topology stuff */ 23662306a36Sopenharmony_ci maskebx = 0; 23762306a36Sopenharmony_ci break; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci asm(XEN_EMULATE_PREFIX "cpuid" 24162306a36Sopenharmony_ci : "=a" (*ax), 24262306a36Sopenharmony_ci "=b" (*bx), 24362306a36Sopenharmony_ci "=c" (*cx), 24462306a36Sopenharmony_ci "=d" (*dx) 24562306a36Sopenharmony_ci : "0" (*ax), "2" (*cx)); 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci *bx &= maskebx; 24862306a36Sopenharmony_ci} 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_cistatic bool __init xen_check_mwait(void) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci#ifdef CONFIG_ACPI 25362306a36Sopenharmony_ci struct xen_platform_op op = { 25462306a36Sopenharmony_ci .cmd = XENPF_set_processor_pminfo, 25562306a36Sopenharmony_ci .u.set_pminfo.id = -1, 25662306a36Sopenharmony_ci .u.set_pminfo.type = XEN_PM_PDC, 25762306a36Sopenharmony_ci }; 25862306a36Sopenharmony_ci uint32_t buf[3]; 25962306a36Sopenharmony_ci unsigned int ax, bx, cx, dx; 26062306a36Sopenharmony_ci unsigned int mwait_mask; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci /* We need to determine whether it is OK to expose the MWAIT 26362306a36Sopenharmony_ci * capability to the kernel to harvest deeper than C3 states from ACPI 26462306a36Sopenharmony_ci * _CST using the processor_harvest_xen.c module. For this to work, we 26562306a36Sopenharmony_ci * need to gather the MWAIT_LEAF values (which the cstate.c code 26662306a36Sopenharmony_ci * checks against). The hypervisor won't expose the MWAIT flag because 26762306a36Sopenharmony_ci * it would break backwards compatibility; so we will find out directly 26862306a36Sopenharmony_ci * from the hardware and hypercall. 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_ci if (!xen_initial_domain()) 27162306a36Sopenharmony_ci return false; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci /* 27462306a36Sopenharmony_ci * When running under platform earlier than Xen4.2, do not expose 27562306a36Sopenharmony_ci * mwait, to avoid the risk of loading native acpi pad driver 27662306a36Sopenharmony_ci */ 27762306a36Sopenharmony_ci if (!xen_running_on_version_or_later(4, 2)) 27862306a36Sopenharmony_ci return false; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci ax = 1; 28162306a36Sopenharmony_ci cx = 0; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci native_cpuid(&ax, &bx, &cx, &dx); 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci mwait_mask = (1 << (X86_FEATURE_EST % 32)) | 28662306a36Sopenharmony_ci (1 << (X86_FEATURE_MWAIT % 32)); 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci if ((cx & mwait_mask) != mwait_mask) 28962306a36Sopenharmony_ci return false; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci /* We need to emulate the MWAIT_LEAF and for that we need both 29262306a36Sopenharmony_ci * ecx and edx. The hypercall provides only partial information. 29362306a36Sopenharmony_ci */ 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci ax = CPUID_MWAIT_LEAF; 29662306a36Sopenharmony_ci bx = 0; 29762306a36Sopenharmony_ci cx = 0; 29862306a36Sopenharmony_ci dx = 0; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci native_cpuid(&ax, &bx, &cx, &dx); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci /* Ask the Hypervisor whether to clear ACPI_PROC_CAP_C_C2C3_FFH. If so, 30362306a36Sopenharmony_ci * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. 30462306a36Sopenharmony_ci */ 30562306a36Sopenharmony_ci buf[0] = ACPI_PDC_REVISION_ID; 30662306a36Sopenharmony_ci buf[1] = 1; 30762306a36Sopenharmony_ci buf[2] = (ACPI_PROC_CAP_C_CAPABILITY_SMP | ACPI_PROC_CAP_EST_CAPABILITY_SWSMP); 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci set_xen_guest_handle(op.u.set_pminfo.pdc, buf); 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci if ((HYPERVISOR_platform_op(&op) == 0) && 31262306a36Sopenharmony_ci (buf[2] & (ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH))) { 31362306a36Sopenharmony_ci cpuid_leaf5_ecx_val = cx; 31462306a36Sopenharmony_ci cpuid_leaf5_edx_val = dx; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci return true; 31762306a36Sopenharmony_ci#else 31862306a36Sopenharmony_ci return false; 31962306a36Sopenharmony_ci#endif 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_cistatic bool __init xen_check_xsave(void) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci unsigned int cx, xsave_mask; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci cx = cpuid_ecx(1); 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) | 32962306a36Sopenharmony_ci (1 << (X86_FEATURE_OSXSAVE % 32)); 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ 33262306a36Sopenharmony_ci return (cx & xsave_mask) == xsave_mask; 33362306a36Sopenharmony_ci} 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_cistatic void __init xen_init_capabilities(void) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci setup_force_cpu_cap(X86_FEATURE_XENPV); 33862306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_DCA); 33962306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_APERFMPERF); 34062306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_MTRR); 34162306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_ACC); 34262306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_X2APIC); 34362306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_SME); 34462306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_LKGS); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci /* 34762306a36Sopenharmony_ci * Xen PV would need some work to support PCID: CR3 handling as well 34862306a36Sopenharmony_ci * as xen_flush_tlb_others() would need updating. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_PCID); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci if (!xen_initial_domain()) 35362306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_ACPI); 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci if (xen_check_mwait()) 35662306a36Sopenharmony_ci setup_force_cpu_cap(X86_FEATURE_MWAIT); 35762306a36Sopenharmony_ci else 35862306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_MWAIT); 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci if (!xen_check_xsave()) { 36162306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_XSAVE); 36262306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_OSXSAVE); 36362306a36Sopenharmony_ci } 36462306a36Sopenharmony_ci} 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_cistatic noinstr void xen_set_debugreg(int reg, unsigned long val) 36762306a36Sopenharmony_ci{ 36862306a36Sopenharmony_ci HYPERVISOR_set_debugreg(reg, val); 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_cistatic noinstr unsigned long xen_get_debugreg(int reg) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci return HYPERVISOR_get_debugreg(reg); 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cistatic void xen_start_context_switch(struct task_struct *prev) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci BUG_ON(preemptible()); 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) { 38162306a36Sopenharmony_ci arch_leave_lazy_mmu_mode(); 38262306a36Sopenharmony_ci set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci enter_lazy(XEN_LAZY_CPU); 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_cistatic void xen_end_context_switch(struct task_struct *next) 38862306a36Sopenharmony_ci{ 38962306a36Sopenharmony_ci BUG_ON(preemptible()); 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci xen_mc_flush(); 39262306a36Sopenharmony_ci leave_lazy(XEN_LAZY_CPU); 39362306a36Sopenharmony_ci if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) 39462306a36Sopenharmony_ci arch_enter_lazy_mmu_mode(); 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_cistatic unsigned long xen_store_tr(void) 39862306a36Sopenharmony_ci{ 39962306a36Sopenharmony_ci return 0; 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci/* 40362306a36Sopenharmony_ci * Set the page permissions for a particular virtual address. If the 40462306a36Sopenharmony_ci * address is a vmalloc mapping (or other non-linear mapping), then 40562306a36Sopenharmony_ci * find the linear mapping of the page and also set its protections to 40662306a36Sopenharmony_ci * match. 40762306a36Sopenharmony_ci */ 40862306a36Sopenharmony_cistatic void set_aliased_prot(void *v, pgprot_t prot) 40962306a36Sopenharmony_ci{ 41062306a36Sopenharmony_ci int level; 41162306a36Sopenharmony_ci pte_t *ptep; 41262306a36Sopenharmony_ci pte_t pte; 41362306a36Sopenharmony_ci unsigned long pfn; 41462306a36Sopenharmony_ci unsigned char dummy; 41562306a36Sopenharmony_ci void *va; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci ptep = lookup_address((unsigned long)v, &level); 41862306a36Sopenharmony_ci BUG_ON(ptep == NULL); 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci pfn = pte_pfn(*ptep); 42162306a36Sopenharmony_ci pte = pfn_pte(pfn, prot); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci /* 42462306a36Sopenharmony_ci * Careful: update_va_mapping() will fail if the virtual address 42562306a36Sopenharmony_ci * we're poking isn't populated in the page tables. We don't 42662306a36Sopenharmony_ci * need to worry about the direct map (that's always in the page 42762306a36Sopenharmony_ci * tables), but we need to be careful about vmap space. In 42862306a36Sopenharmony_ci * particular, the top level page table can lazily propagate 42962306a36Sopenharmony_ci * entries between processes, so if we've switched mms since we 43062306a36Sopenharmony_ci * vmapped the target in the first place, we might not have the 43162306a36Sopenharmony_ci * top-level page table entry populated. 43262306a36Sopenharmony_ci * 43362306a36Sopenharmony_ci * We disable preemption because we want the same mm active when 43462306a36Sopenharmony_ci * we probe the target and when we issue the hypercall. We'll 43562306a36Sopenharmony_ci * have the same nominal mm, but if we're a kernel thread, lazy 43662306a36Sopenharmony_ci * mm dropping could change our pgd. 43762306a36Sopenharmony_ci * 43862306a36Sopenharmony_ci * Out of an abundance of caution, this uses __get_user() to fault 43962306a36Sopenharmony_ci * in the target address just in case there's some obscure case 44062306a36Sopenharmony_ci * in which the target address isn't readable. 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci preempt_disable(); 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci copy_from_kernel_nofault(&dummy, v, 1); 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) 44862306a36Sopenharmony_ci BUG(); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci va = __va(PFN_PHYS(pfn)); 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) 45362306a36Sopenharmony_ci BUG(); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci preempt_enable(); 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_cistatic void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) 45962306a36Sopenharmony_ci{ 46062306a36Sopenharmony_ci const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; 46162306a36Sopenharmony_ci int i; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* 46462306a36Sopenharmony_ci * We need to mark the all aliases of the LDT pages RO. We 46562306a36Sopenharmony_ci * don't need to call vm_flush_aliases(), though, since that's 46662306a36Sopenharmony_ci * only responsible for flushing aliases out the TLBs, not the 46762306a36Sopenharmony_ci * page tables, and Xen will flush the TLB for us if needed. 46862306a36Sopenharmony_ci * 46962306a36Sopenharmony_ci * To avoid confusing future readers: none of this is necessary 47062306a36Sopenharmony_ci * to load the LDT. The hypervisor only checks this when the 47162306a36Sopenharmony_ci * LDT is faulted in due to subsequent descriptor access. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci for (i = 0; i < entries; i += entries_per_page) 47562306a36Sopenharmony_ci set_aliased_prot(ldt + i, PAGE_KERNEL_RO); 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic void xen_free_ldt(struct desc_struct *ldt, unsigned entries) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; 48162306a36Sopenharmony_ci int i; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci for (i = 0; i < entries; i += entries_per_page) 48462306a36Sopenharmony_ci set_aliased_prot(ldt + i, PAGE_KERNEL); 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_cistatic void xen_set_ldt(const void *addr, unsigned entries) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci struct mmuext_op *op; 49062306a36Sopenharmony_ci struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci trace_xen_cpu_set_ldt(addr, entries); 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci op = mcs.args; 49562306a36Sopenharmony_ci op->cmd = MMUEXT_SET_LDT; 49662306a36Sopenharmony_ci op->arg1.linear_addr = (unsigned long)addr; 49762306a36Sopenharmony_ci op->arg2.nr_ents = entries; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); 50262306a36Sopenharmony_ci} 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_cistatic void xen_load_gdt(const struct desc_ptr *dtr) 50562306a36Sopenharmony_ci{ 50662306a36Sopenharmony_ci unsigned long va = dtr->address; 50762306a36Sopenharmony_ci unsigned int size = dtr->size + 1; 50862306a36Sopenharmony_ci unsigned long pfn, mfn; 50962306a36Sopenharmony_ci int level; 51062306a36Sopenharmony_ci pte_t *ptep; 51162306a36Sopenharmony_ci void *virt; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */ 51462306a36Sopenharmony_ci BUG_ON(size > PAGE_SIZE); 51562306a36Sopenharmony_ci BUG_ON(va & ~PAGE_MASK); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci /* 51862306a36Sopenharmony_ci * The GDT is per-cpu and is in the percpu data area. 51962306a36Sopenharmony_ci * That can be virtually mapped, so we need to do a 52062306a36Sopenharmony_ci * page-walk to get the underlying MFN for the 52162306a36Sopenharmony_ci * hypercall. The page can also be in the kernel's 52262306a36Sopenharmony_ci * linear range, so we need to RO that mapping too. 52362306a36Sopenharmony_ci */ 52462306a36Sopenharmony_ci ptep = lookup_address(va, &level); 52562306a36Sopenharmony_ci BUG_ON(ptep == NULL); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci pfn = pte_pfn(*ptep); 52862306a36Sopenharmony_ci mfn = pfn_to_mfn(pfn); 52962306a36Sopenharmony_ci virt = __va(PFN_PHYS(pfn)); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci make_lowmem_page_readonly((void *)va); 53262306a36Sopenharmony_ci make_lowmem_page_readonly(virt); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct))) 53562306a36Sopenharmony_ci BUG(); 53662306a36Sopenharmony_ci} 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci/* 53962306a36Sopenharmony_ci * load_gdt for early boot, when the gdt is only mapped once 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_cistatic void __init xen_load_gdt_boot(const struct desc_ptr *dtr) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci unsigned long va = dtr->address; 54462306a36Sopenharmony_ci unsigned int size = dtr->size + 1; 54562306a36Sopenharmony_ci unsigned long pfn, mfn; 54662306a36Sopenharmony_ci pte_t pte; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */ 54962306a36Sopenharmony_ci BUG_ON(size > PAGE_SIZE); 55062306a36Sopenharmony_ci BUG_ON(va & ~PAGE_MASK); 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci pfn = virt_to_pfn((void *)va); 55362306a36Sopenharmony_ci mfn = pfn_to_mfn(pfn); 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci pte = pfn_pte(pfn, PAGE_KERNEL_RO); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) 55862306a36Sopenharmony_ci BUG(); 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct))) 56162306a36Sopenharmony_ci BUG(); 56262306a36Sopenharmony_ci} 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_cistatic inline bool desc_equal(const struct desc_struct *d1, 56562306a36Sopenharmony_ci const struct desc_struct *d2) 56662306a36Sopenharmony_ci{ 56762306a36Sopenharmony_ci return !memcmp(d1, d2, sizeof(*d1)); 56862306a36Sopenharmony_ci} 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_cistatic void load_TLS_descriptor(struct thread_struct *t, 57162306a36Sopenharmony_ci unsigned int cpu, unsigned int i) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; 57462306a36Sopenharmony_ci struct desc_struct *gdt; 57562306a36Sopenharmony_ci xmaddr_t maddr; 57662306a36Sopenharmony_ci struct multicall_space mc; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci if (desc_equal(shadow, &t->tls_array[i])) 57962306a36Sopenharmony_ci return; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci *shadow = t->tls_array[i]; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci gdt = get_cpu_gdt_rw(cpu); 58462306a36Sopenharmony_ci maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); 58562306a36Sopenharmony_ci mc = __xen_mc_entry(0); 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); 58862306a36Sopenharmony_ci} 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_cistatic void xen_load_tls(struct thread_struct *t, unsigned int cpu) 59162306a36Sopenharmony_ci{ 59262306a36Sopenharmony_ci /* 59362306a36Sopenharmony_ci * In lazy mode we need to zero %fs, otherwise we may get an 59462306a36Sopenharmony_ci * exception between the new %fs descriptor being loaded and 59562306a36Sopenharmony_ci * %fs being effectively cleared at __switch_to(). 59662306a36Sopenharmony_ci */ 59762306a36Sopenharmony_ci if (xen_get_lazy_mode() == XEN_LAZY_CPU) 59862306a36Sopenharmony_ci loadsegment(fs, 0); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci xen_mc_batch(); 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci load_TLS_descriptor(t, cpu, 0); 60362306a36Sopenharmony_ci load_TLS_descriptor(t, cpu, 1); 60462306a36Sopenharmony_ci load_TLS_descriptor(t, cpu, 2); 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_cistatic void xen_load_gs_index(unsigned int idx) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) 61262306a36Sopenharmony_ci BUG(); 61362306a36Sopenharmony_ci} 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_cistatic void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 61662306a36Sopenharmony_ci const void *ptr) 61762306a36Sopenharmony_ci{ 61862306a36Sopenharmony_ci xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); 61962306a36Sopenharmony_ci u64 entry = *(u64 *)ptr; 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci trace_xen_cpu_write_ldt_entry(dt, entrynum, entry); 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci preempt_disable(); 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci xen_mc_flush(); 62662306a36Sopenharmony_ci if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) 62762306a36Sopenharmony_ci BUG(); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci preempt_enable(); 63062306a36Sopenharmony_ci} 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_civoid noist_exc_debug(struct pt_regs *regs); 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(xenpv_exc_nmi) 63562306a36Sopenharmony_ci{ 63662306a36Sopenharmony_ci /* On Xen PV, NMI doesn't use IST. The C part is the same as native. */ 63762306a36Sopenharmony_ci exc_nmi(regs); 63862306a36Sopenharmony_ci} 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW_ERRORCODE(xenpv_exc_double_fault) 64162306a36Sopenharmony_ci{ 64262306a36Sopenharmony_ci /* On Xen PV, DF doesn't use IST. The C part is the same as native. */ 64362306a36Sopenharmony_ci exc_double_fault(regs, error_code); 64462306a36Sopenharmony_ci} 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(xenpv_exc_debug) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci /* 64962306a36Sopenharmony_ci * There's no IST on Xen PV, but we still need to dispatch 65062306a36Sopenharmony_ci * to the correct handler. 65162306a36Sopenharmony_ci */ 65262306a36Sopenharmony_ci if (user_mode(regs)) 65362306a36Sopenharmony_ci noist_exc_debug(regs); 65462306a36Sopenharmony_ci else 65562306a36Sopenharmony_ci exc_debug(regs); 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci /* This should never happen and there is no way to handle it. */ 66162306a36Sopenharmony_ci instrumentation_begin(); 66262306a36Sopenharmony_ci pr_err("Unknown trap in Xen PV mode."); 66362306a36Sopenharmony_ci BUG(); 66462306a36Sopenharmony_ci instrumentation_end(); 66562306a36Sopenharmony_ci} 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci#ifdef CONFIG_X86_MCE 66862306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(xenpv_exc_machine_check) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci /* 67162306a36Sopenharmony_ci * There's no IST on Xen PV, but we still need to dispatch 67262306a36Sopenharmony_ci * to the correct handler. 67362306a36Sopenharmony_ci */ 67462306a36Sopenharmony_ci if (user_mode(regs)) 67562306a36Sopenharmony_ci noist_exc_machine_check(regs); 67662306a36Sopenharmony_ci else 67762306a36Sopenharmony_ci exc_machine_check(regs); 67862306a36Sopenharmony_ci} 67962306a36Sopenharmony_ci#endif 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_cistruct trap_array_entry { 68262306a36Sopenharmony_ci void (*orig)(void); 68362306a36Sopenharmony_ci void (*xen)(void); 68462306a36Sopenharmony_ci bool ist_okay; 68562306a36Sopenharmony_ci}; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci#define TRAP_ENTRY(func, ist_ok) { \ 68862306a36Sopenharmony_ci .orig = asm_##func, \ 68962306a36Sopenharmony_ci .xen = xen_asm_##func, \ 69062306a36Sopenharmony_ci .ist_okay = ist_ok } 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci#define TRAP_ENTRY_REDIR(func, ist_ok) { \ 69362306a36Sopenharmony_ci .orig = asm_##func, \ 69462306a36Sopenharmony_ci .xen = xen_asm_xenpv_##func, \ 69562306a36Sopenharmony_ci .ist_okay = ist_ok } 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_cistatic struct trap_array_entry trap_array[] = { 69862306a36Sopenharmony_ci TRAP_ENTRY_REDIR(exc_debug, true ), 69962306a36Sopenharmony_ci TRAP_ENTRY_REDIR(exc_double_fault, true ), 70062306a36Sopenharmony_ci#ifdef CONFIG_X86_MCE 70162306a36Sopenharmony_ci TRAP_ENTRY_REDIR(exc_machine_check, true ), 70262306a36Sopenharmony_ci#endif 70362306a36Sopenharmony_ci TRAP_ENTRY_REDIR(exc_nmi, true ), 70462306a36Sopenharmony_ci TRAP_ENTRY(exc_int3, false ), 70562306a36Sopenharmony_ci TRAP_ENTRY(exc_overflow, false ), 70662306a36Sopenharmony_ci#ifdef CONFIG_IA32_EMULATION 70762306a36Sopenharmony_ci TRAP_ENTRY(int80_emulation, false ), 70862306a36Sopenharmony_ci#endif 70962306a36Sopenharmony_ci TRAP_ENTRY(exc_page_fault, false ), 71062306a36Sopenharmony_ci TRAP_ENTRY(exc_divide_error, false ), 71162306a36Sopenharmony_ci TRAP_ENTRY(exc_bounds, false ), 71262306a36Sopenharmony_ci TRAP_ENTRY(exc_invalid_op, false ), 71362306a36Sopenharmony_ci TRAP_ENTRY(exc_device_not_available, false ), 71462306a36Sopenharmony_ci TRAP_ENTRY(exc_coproc_segment_overrun, false ), 71562306a36Sopenharmony_ci TRAP_ENTRY(exc_invalid_tss, false ), 71662306a36Sopenharmony_ci TRAP_ENTRY(exc_segment_not_present, false ), 71762306a36Sopenharmony_ci TRAP_ENTRY(exc_stack_segment, false ), 71862306a36Sopenharmony_ci TRAP_ENTRY(exc_general_protection, false ), 71962306a36Sopenharmony_ci TRAP_ENTRY(exc_spurious_interrupt_bug, false ), 72062306a36Sopenharmony_ci TRAP_ENTRY(exc_coprocessor_error, false ), 72162306a36Sopenharmony_ci TRAP_ENTRY(exc_alignment_check, false ), 72262306a36Sopenharmony_ci TRAP_ENTRY(exc_simd_coprocessor_error, false ), 72362306a36Sopenharmony_ci#ifdef CONFIG_X86_CET 72462306a36Sopenharmony_ci TRAP_ENTRY(exc_control_protection, false ), 72562306a36Sopenharmony_ci#endif 72662306a36Sopenharmony_ci}; 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_cistatic bool __ref get_trap_addr(void **addr, unsigned int ist) 72962306a36Sopenharmony_ci{ 73062306a36Sopenharmony_ci unsigned int nr; 73162306a36Sopenharmony_ci bool ist_okay = false; 73262306a36Sopenharmony_ci bool found = false; 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci /* 73562306a36Sopenharmony_ci * Replace trap handler addresses by Xen specific ones. 73662306a36Sopenharmony_ci * Check for known traps using IST and whitelist them. 73762306a36Sopenharmony_ci * The debugger ones are the only ones we care about. 73862306a36Sopenharmony_ci * Xen will handle faults like double_fault, so we should never see 73962306a36Sopenharmony_ci * them. Warn if there's an unexpected IST-using fault handler. 74062306a36Sopenharmony_ci */ 74162306a36Sopenharmony_ci for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) { 74262306a36Sopenharmony_ci struct trap_array_entry *entry = trap_array + nr; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci if (*addr == entry->orig) { 74562306a36Sopenharmony_ci *addr = entry->xen; 74662306a36Sopenharmony_ci ist_okay = entry->ist_okay; 74762306a36Sopenharmony_ci found = true; 74862306a36Sopenharmony_ci break; 74962306a36Sopenharmony_ci } 75062306a36Sopenharmony_ci } 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci if (nr == ARRAY_SIZE(trap_array) && 75362306a36Sopenharmony_ci *addr >= (void *)early_idt_handler_array[0] && 75462306a36Sopenharmony_ci *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) { 75562306a36Sopenharmony_ci nr = (*addr - (void *)early_idt_handler_array[0]) / 75662306a36Sopenharmony_ci EARLY_IDT_HANDLER_SIZE; 75762306a36Sopenharmony_ci *addr = (void *)xen_early_idt_handler_array[nr]; 75862306a36Sopenharmony_ci found = true; 75962306a36Sopenharmony_ci } 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci if (!found) 76262306a36Sopenharmony_ci *addr = (void *)xen_asm_exc_xen_unknown_trap; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci if (WARN_ON(found && ist != 0 && !ist_okay)) 76562306a36Sopenharmony_ci return false; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci return true; 76862306a36Sopenharmony_ci} 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_cistatic int cvt_gate_to_trap(int vector, const gate_desc *val, 77162306a36Sopenharmony_ci struct trap_info *info) 77262306a36Sopenharmony_ci{ 77362306a36Sopenharmony_ci unsigned long addr; 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT) 77662306a36Sopenharmony_ci return 0; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci info->vector = vector; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci addr = gate_offset(val); 78162306a36Sopenharmony_ci if (!get_trap_addr((void **)&addr, val->bits.ist)) 78262306a36Sopenharmony_ci return 0; 78362306a36Sopenharmony_ci info->address = addr; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci info->cs = gate_segment(val); 78662306a36Sopenharmony_ci info->flags = val->bits.dpl; 78762306a36Sopenharmony_ci /* interrupt gates clear IF */ 78862306a36Sopenharmony_ci if (val->bits.type == GATE_INTERRUPT) 78962306a36Sopenharmony_ci info->flags |= 1 << 2; 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci return 1; 79262306a36Sopenharmony_ci} 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci/* Locations of each CPU's IDT */ 79562306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct desc_ptr, idt_desc); 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci/* Set an IDT entry. If the entry is part of the current IDT, then 79862306a36Sopenharmony_ci also update Xen. */ 79962306a36Sopenharmony_cistatic void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) 80062306a36Sopenharmony_ci{ 80162306a36Sopenharmony_ci unsigned long p = (unsigned long)&dt[entrynum]; 80262306a36Sopenharmony_ci unsigned long start, end; 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci trace_xen_cpu_write_idt_entry(dt, entrynum, g); 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci preempt_disable(); 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci start = __this_cpu_read(idt_desc.address); 80962306a36Sopenharmony_ci end = start + __this_cpu_read(idt_desc.size) + 1; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci xen_mc_flush(); 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci native_write_idt_entry(dt, entrynum, g); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci if (p >= start && (p + 8) <= end) { 81662306a36Sopenharmony_ci struct trap_info info[2]; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci info[1].address = 0; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (cvt_gate_to_trap(entrynum, g, &info[0])) 82162306a36Sopenharmony_ci if (HYPERVISOR_set_trap_table(info)) 82262306a36Sopenharmony_ci BUG(); 82362306a36Sopenharmony_ci } 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci preempt_enable(); 82662306a36Sopenharmony_ci} 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_cistatic unsigned xen_convert_trap_info(const struct desc_ptr *desc, 82962306a36Sopenharmony_ci struct trap_info *traps, bool full) 83062306a36Sopenharmony_ci{ 83162306a36Sopenharmony_ci unsigned in, out, count; 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci count = (desc->size+1) / sizeof(gate_desc); 83462306a36Sopenharmony_ci BUG_ON(count > 256); 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci for (in = out = 0; in < count; in++) { 83762306a36Sopenharmony_ci gate_desc *entry = (gate_desc *)(desc->address) + in; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci if (cvt_gate_to_trap(in, entry, &traps[out]) || full) 84062306a36Sopenharmony_ci out++; 84162306a36Sopenharmony_ci } 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci return out; 84462306a36Sopenharmony_ci} 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_civoid xen_copy_trap_info(struct trap_info *traps) 84762306a36Sopenharmony_ci{ 84862306a36Sopenharmony_ci const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci xen_convert_trap_info(desc, traps, true); 85162306a36Sopenharmony_ci} 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci/* Load a new IDT into Xen. In principle this can be per-CPU, so we 85462306a36Sopenharmony_ci hold a spinlock to protect the static traps[] array (static because 85562306a36Sopenharmony_ci it avoids allocation, and saves stack space). */ 85662306a36Sopenharmony_cistatic void xen_load_idt(const struct desc_ptr *desc) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci static DEFINE_SPINLOCK(lock); 85962306a36Sopenharmony_ci static struct trap_info traps[257]; 86062306a36Sopenharmony_ci static const struct trap_info zero = { }; 86162306a36Sopenharmony_ci unsigned out; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci trace_xen_cpu_load_idt(desc); 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci spin_lock(&lock); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci out = xen_convert_trap_info(desc, traps, false); 87062306a36Sopenharmony_ci traps[out] = zero; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci xen_mc_flush(); 87362306a36Sopenharmony_ci if (HYPERVISOR_set_trap_table(traps)) 87462306a36Sopenharmony_ci BUG(); 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci spin_unlock(&lock); 87762306a36Sopenharmony_ci} 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci/* Write a GDT descriptor entry. Ignore LDT descriptors, since 88062306a36Sopenharmony_ci they're handled differently. */ 88162306a36Sopenharmony_cistatic void xen_write_gdt_entry(struct desc_struct *dt, int entry, 88262306a36Sopenharmony_ci const void *desc, int type) 88362306a36Sopenharmony_ci{ 88462306a36Sopenharmony_ci trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci preempt_disable(); 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci switch (type) { 88962306a36Sopenharmony_ci case DESC_LDT: 89062306a36Sopenharmony_ci case DESC_TSS: 89162306a36Sopenharmony_ci /* ignore */ 89262306a36Sopenharmony_ci break; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci default: { 89562306a36Sopenharmony_ci xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci xen_mc_flush(); 89862306a36Sopenharmony_ci if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) 89962306a36Sopenharmony_ci BUG(); 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci } 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci preempt_enable(); 90562306a36Sopenharmony_ci} 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci/* 90862306a36Sopenharmony_ci * Version of write_gdt_entry for use at early boot-time needed to 90962306a36Sopenharmony_ci * update an entry as simply as possible. 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_cistatic void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, 91262306a36Sopenharmony_ci const void *desc, int type) 91362306a36Sopenharmony_ci{ 91462306a36Sopenharmony_ci trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci switch (type) { 91762306a36Sopenharmony_ci case DESC_LDT: 91862306a36Sopenharmony_ci case DESC_TSS: 91962306a36Sopenharmony_ci /* ignore */ 92062306a36Sopenharmony_ci break; 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci default: { 92362306a36Sopenharmony_ci xmaddr_t maddr = virt_to_machine(&dt[entry]); 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) 92662306a36Sopenharmony_ci dt[entry] = *(struct desc_struct *)desc; 92762306a36Sopenharmony_ci } 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci } 93062306a36Sopenharmony_ci} 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_cistatic void xen_load_sp0(unsigned long sp0) 93362306a36Sopenharmony_ci{ 93462306a36Sopenharmony_ci struct multicall_space mcs; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci mcs = xen_mc_entry(0); 93762306a36Sopenharmony_ci MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); 93862306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); 93962306a36Sopenharmony_ci this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); 94062306a36Sopenharmony_ci} 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci#ifdef CONFIG_X86_IOPL_IOPERM 94362306a36Sopenharmony_cistatic void xen_invalidate_io_bitmap(void) 94462306a36Sopenharmony_ci{ 94562306a36Sopenharmony_ci struct physdev_set_iobitmap iobitmap = { 94662306a36Sopenharmony_ci .bitmap = NULL, 94762306a36Sopenharmony_ci .nr_ports = 0, 94862306a36Sopenharmony_ci }; 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci native_tss_invalidate_io_bitmap(); 95162306a36Sopenharmony_ci HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); 95262306a36Sopenharmony_ci} 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_cistatic void xen_update_io_bitmap(void) 95562306a36Sopenharmony_ci{ 95662306a36Sopenharmony_ci struct physdev_set_iobitmap iobitmap; 95762306a36Sopenharmony_ci struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci native_tss_update_io_bitmap(); 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) + 96262306a36Sopenharmony_ci tss->x86_tss.io_bitmap_base; 96362306a36Sopenharmony_ci if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID) 96462306a36Sopenharmony_ci iobitmap.nr_ports = 0; 96562306a36Sopenharmony_ci else 96662306a36Sopenharmony_ci iobitmap.nr_ports = IO_BITMAP_BITS; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); 96962306a36Sopenharmony_ci} 97062306a36Sopenharmony_ci#endif 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_cistatic void xen_io_delay(void) 97362306a36Sopenharmony_ci{ 97462306a36Sopenharmony_ci} 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, xen_cr0_value); 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_cistatic unsigned long xen_read_cr0(void) 97962306a36Sopenharmony_ci{ 98062306a36Sopenharmony_ci unsigned long cr0 = this_cpu_read(xen_cr0_value); 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci if (unlikely(cr0 == 0)) { 98362306a36Sopenharmony_ci cr0 = native_read_cr0(); 98462306a36Sopenharmony_ci this_cpu_write(xen_cr0_value, cr0); 98562306a36Sopenharmony_ci } 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci return cr0; 98862306a36Sopenharmony_ci} 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_cistatic void xen_write_cr0(unsigned long cr0) 99162306a36Sopenharmony_ci{ 99262306a36Sopenharmony_ci struct multicall_space mcs; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci this_cpu_write(xen_cr0_value, cr0); 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci /* Only pay attention to cr0.TS; everything else is 99762306a36Sopenharmony_ci ignored. */ 99862306a36Sopenharmony_ci mcs = xen_mc_entry(0); 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci xen_mc_issue(XEN_LAZY_CPU); 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_cistatic void xen_write_cr4(unsigned long cr4) 100662306a36Sopenharmony_ci{ 100762306a36Sopenharmony_ci cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE); 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci native_write_cr4(cr4); 101062306a36Sopenharmony_ci} 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_cistatic u64 xen_do_read_msr(unsigned int msr, int *err) 101362306a36Sopenharmony_ci{ 101462306a36Sopenharmony_ci u64 val = 0; /* Avoid uninitialized value for safe variant. */ 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci if (pmu_msr_read(msr, &val, err)) 101762306a36Sopenharmony_ci return val; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci if (err) 102062306a36Sopenharmony_ci val = native_read_msr_safe(msr, err); 102162306a36Sopenharmony_ci else 102262306a36Sopenharmony_ci val = native_read_msr(msr); 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci switch (msr) { 102562306a36Sopenharmony_ci case MSR_IA32_APICBASE: 102662306a36Sopenharmony_ci val &= ~X2APIC_ENABLE; 102762306a36Sopenharmony_ci break; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci return val; 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_cistatic void set_seg(unsigned int which, unsigned int low, unsigned int high, 103362306a36Sopenharmony_ci int *err) 103462306a36Sopenharmony_ci{ 103562306a36Sopenharmony_ci u64 base = ((u64)high << 32) | low; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci if (HYPERVISOR_set_segment_base(which, base) == 0) 103862306a36Sopenharmony_ci return; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci if (err) 104162306a36Sopenharmony_ci *err = -EIO; 104262306a36Sopenharmony_ci else 104362306a36Sopenharmony_ci WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); 104462306a36Sopenharmony_ci} 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci/* 104762306a36Sopenharmony_ci * Support write_msr_safe() and write_msr() semantics. 104862306a36Sopenharmony_ci * With err == NULL write_msr() semantics are selected. 104962306a36Sopenharmony_ci * Supplying an err pointer requires err to be pre-initialized with 0. 105062306a36Sopenharmony_ci */ 105162306a36Sopenharmony_cistatic void xen_do_write_msr(unsigned int msr, unsigned int low, 105262306a36Sopenharmony_ci unsigned int high, int *err) 105362306a36Sopenharmony_ci{ 105462306a36Sopenharmony_ci switch (msr) { 105562306a36Sopenharmony_ci case MSR_FS_BASE: 105662306a36Sopenharmony_ci set_seg(SEGBASE_FS, low, high, err); 105762306a36Sopenharmony_ci break; 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci case MSR_KERNEL_GS_BASE: 106062306a36Sopenharmony_ci set_seg(SEGBASE_GS_USER, low, high, err); 106162306a36Sopenharmony_ci break; 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci case MSR_GS_BASE: 106462306a36Sopenharmony_ci set_seg(SEGBASE_GS_KERNEL, low, high, err); 106562306a36Sopenharmony_ci break; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci case MSR_STAR: 106862306a36Sopenharmony_ci case MSR_CSTAR: 106962306a36Sopenharmony_ci case MSR_LSTAR: 107062306a36Sopenharmony_ci case MSR_SYSCALL_MASK: 107162306a36Sopenharmony_ci case MSR_IA32_SYSENTER_CS: 107262306a36Sopenharmony_ci case MSR_IA32_SYSENTER_ESP: 107362306a36Sopenharmony_ci case MSR_IA32_SYSENTER_EIP: 107462306a36Sopenharmony_ci /* Fast syscall setup is all done in hypercalls, so 107562306a36Sopenharmony_ci these are all ignored. Stub them out here to stop 107662306a36Sopenharmony_ci Xen console noise. */ 107762306a36Sopenharmony_ci break; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci default: 108062306a36Sopenharmony_ci if (!pmu_msr_write(msr, low, high, err)) { 108162306a36Sopenharmony_ci if (err) 108262306a36Sopenharmony_ci *err = native_write_msr_safe(msr, low, high); 108362306a36Sopenharmony_ci else 108462306a36Sopenharmony_ci native_write_msr(msr, low, high); 108562306a36Sopenharmony_ci } 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci} 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_cistatic u64 xen_read_msr_safe(unsigned int msr, int *err) 109062306a36Sopenharmony_ci{ 109162306a36Sopenharmony_ci return xen_do_read_msr(msr, err); 109262306a36Sopenharmony_ci} 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_cistatic int xen_write_msr_safe(unsigned int msr, unsigned int low, 109562306a36Sopenharmony_ci unsigned int high) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci int err = 0; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci xen_do_write_msr(msr, low, high, &err); 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci return err; 110262306a36Sopenharmony_ci} 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_cistatic u64 xen_read_msr(unsigned int msr) 110562306a36Sopenharmony_ci{ 110662306a36Sopenharmony_ci int err; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); 110962306a36Sopenharmony_ci} 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_cistatic void xen_write_msr(unsigned int msr, unsigned low, unsigned high) 111262306a36Sopenharmony_ci{ 111362306a36Sopenharmony_ci int err; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); 111662306a36Sopenharmony_ci} 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci/* This is called once we have the cpu_possible_mask */ 111962306a36Sopenharmony_civoid __init xen_setup_vcpu_info_placement(void) 112062306a36Sopenharmony_ci{ 112162306a36Sopenharmony_ci int cpu; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 112462306a36Sopenharmony_ci /* Set up direct vCPU id mapping for PV guests. */ 112562306a36Sopenharmony_ci per_cpu(xen_vcpu_id, cpu) = cpu; 112662306a36Sopenharmony_ci xen_vcpu_setup(cpu); 112762306a36Sopenharmony_ci } 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); 113062306a36Sopenharmony_ci pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); 113162306a36Sopenharmony_ci pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); 113262306a36Sopenharmony_ci pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); 113362306a36Sopenharmony_ci} 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_cistatic const struct pv_info xen_info __initconst = { 113662306a36Sopenharmony_ci .extra_user_64bit_cs = FLAT_USER_CS64, 113762306a36Sopenharmony_ci .name = "Xen", 113862306a36Sopenharmony_ci}; 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_cistatic const typeof(pv_ops) xen_cpu_ops __initconst = { 114162306a36Sopenharmony_ci .cpu = { 114262306a36Sopenharmony_ci .cpuid = xen_cpuid, 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci .set_debugreg = xen_set_debugreg, 114562306a36Sopenharmony_ci .get_debugreg = xen_get_debugreg, 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci .read_cr0 = xen_read_cr0, 114862306a36Sopenharmony_ci .write_cr0 = xen_write_cr0, 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci .write_cr4 = xen_write_cr4, 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci .wbinvd = pv_native_wbinvd, 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci .read_msr = xen_read_msr, 115562306a36Sopenharmony_ci .write_msr = xen_write_msr, 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci .read_msr_safe = xen_read_msr_safe, 115862306a36Sopenharmony_ci .write_msr_safe = xen_write_msr_safe, 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci .read_pmc = xen_read_pmc, 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci .load_tr_desc = paravirt_nop, 116362306a36Sopenharmony_ci .set_ldt = xen_set_ldt, 116462306a36Sopenharmony_ci .load_gdt = xen_load_gdt, 116562306a36Sopenharmony_ci .load_idt = xen_load_idt, 116662306a36Sopenharmony_ci .load_tls = xen_load_tls, 116762306a36Sopenharmony_ci .load_gs_index = xen_load_gs_index, 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci .alloc_ldt = xen_alloc_ldt, 117062306a36Sopenharmony_ci .free_ldt = xen_free_ldt, 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci .store_tr = xen_store_tr, 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci .write_ldt_entry = xen_write_ldt_entry, 117562306a36Sopenharmony_ci .write_gdt_entry = xen_write_gdt_entry, 117662306a36Sopenharmony_ci .write_idt_entry = xen_write_idt_entry, 117762306a36Sopenharmony_ci .load_sp0 = xen_load_sp0, 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci#ifdef CONFIG_X86_IOPL_IOPERM 118062306a36Sopenharmony_ci .invalidate_io_bitmap = xen_invalidate_io_bitmap, 118162306a36Sopenharmony_ci .update_io_bitmap = xen_update_io_bitmap, 118262306a36Sopenharmony_ci#endif 118362306a36Sopenharmony_ci .io_delay = xen_io_delay, 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci .start_context_switch = xen_start_context_switch, 118662306a36Sopenharmony_ci .end_context_switch = xen_end_context_switch, 118762306a36Sopenharmony_ci }, 118862306a36Sopenharmony_ci}; 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_cistatic void xen_restart(char *msg) 119162306a36Sopenharmony_ci{ 119262306a36Sopenharmony_ci xen_reboot(SHUTDOWN_reboot); 119362306a36Sopenharmony_ci} 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_cistatic void xen_machine_halt(void) 119662306a36Sopenharmony_ci{ 119762306a36Sopenharmony_ci xen_reboot(SHUTDOWN_poweroff); 119862306a36Sopenharmony_ci} 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_cistatic void xen_machine_power_off(void) 120162306a36Sopenharmony_ci{ 120262306a36Sopenharmony_ci do_kernel_power_off(); 120362306a36Sopenharmony_ci xen_reboot(SHUTDOWN_poweroff); 120462306a36Sopenharmony_ci} 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_cistatic void xen_crash_shutdown(struct pt_regs *regs) 120762306a36Sopenharmony_ci{ 120862306a36Sopenharmony_ci xen_reboot(SHUTDOWN_crash); 120962306a36Sopenharmony_ci} 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_cistatic const struct machine_ops xen_machine_ops __initconst = { 121262306a36Sopenharmony_ci .restart = xen_restart, 121362306a36Sopenharmony_ci .halt = xen_machine_halt, 121462306a36Sopenharmony_ci .power_off = xen_machine_power_off, 121562306a36Sopenharmony_ci .shutdown = xen_machine_halt, 121662306a36Sopenharmony_ci .crash_shutdown = xen_crash_shutdown, 121762306a36Sopenharmony_ci .emergency_restart = xen_emergency_restart, 121862306a36Sopenharmony_ci}; 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_cistatic unsigned char xen_get_nmi_reason(void) 122162306a36Sopenharmony_ci{ 122262306a36Sopenharmony_ci unsigned char reason = 0; 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci /* Construct a value which looks like it came from port 0x61. */ 122562306a36Sopenharmony_ci if (test_bit(_XEN_NMIREASON_io_error, 122662306a36Sopenharmony_ci &HYPERVISOR_shared_info->arch.nmi_reason)) 122762306a36Sopenharmony_ci reason |= NMI_REASON_IOCHK; 122862306a36Sopenharmony_ci if (test_bit(_XEN_NMIREASON_pci_serr, 122962306a36Sopenharmony_ci &HYPERVISOR_shared_info->arch.nmi_reason)) 123062306a36Sopenharmony_ci reason |= NMI_REASON_SERR; 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci return reason; 123362306a36Sopenharmony_ci} 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_cistatic void __init xen_boot_params_init_edd(void) 123662306a36Sopenharmony_ci{ 123762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_EDD) 123862306a36Sopenharmony_ci struct xen_platform_op op; 123962306a36Sopenharmony_ci struct edd_info *edd_info; 124062306a36Sopenharmony_ci u32 *mbr_signature; 124162306a36Sopenharmony_ci unsigned nr; 124262306a36Sopenharmony_ci int ret; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci edd_info = boot_params.eddbuf; 124562306a36Sopenharmony_ci mbr_signature = boot_params.edd_mbr_sig_buffer; 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci op.cmd = XENPF_firmware_info; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci op.u.firmware_info.type = XEN_FW_DISK_INFO; 125062306a36Sopenharmony_ci for (nr = 0; nr < EDDMAXNR; nr++) { 125162306a36Sopenharmony_ci struct edd_info *info = edd_info + nr; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci op.u.firmware_info.index = nr; 125462306a36Sopenharmony_ci info->params.length = sizeof(info->params); 125562306a36Sopenharmony_ci set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params, 125662306a36Sopenharmony_ci &info->params); 125762306a36Sopenharmony_ci ret = HYPERVISOR_platform_op(&op); 125862306a36Sopenharmony_ci if (ret) 125962306a36Sopenharmony_ci break; 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_ci#define C(x) info->x = op.u.firmware_info.u.disk_info.x 126262306a36Sopenharmony_ci C(device); 126362306a36Sopenharmony_ci C(version); 126462306a36Sopenharmony_ci C(interface_support); 126562306a36Sopenharmony_ci C(legacy_max_cylinder); 126662306a36Sopenharmony_ci C(legacy_max_head); 126762306a36Sopenharmony_ci C(legacy_sectors_per_track); 126862306a36Sopenharmony_ci#undef C 126962306a36Sopenharmony_ci } 127062306a36Sopenharmony_ci boot_params.eddbuf_entries = nr; 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE; 127362306a36Sopenharmony_ci for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) { 127462306a36Sopenharmony_ci op.u.firmware_info.index = nr; 127562306a36Sopenharmony_ci ret = HYPERVISOR_platform_op(&op); 127662306a36Sopenharmony_ci if (ret) 127762306a36Sopenharmony_ci break; 127862306a36Sopenharmony_ci mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature; 127962306a36Sopenharmony_ci } 128062306a36Sopenharmony_ci boot_params.edd_mbr_sig_buf_entries = nr; 128162306a36Sopenharmony_ci#endif 128262306a36Sopenharmony_ci} 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci/* 128562306a36Sopenharmony_ci * Set up the GDT and segment registers for -fstack-protector. Until 128662306a36Sopenharmony_ci * we do this, we have to be careful not to call any stack-protected 128762306a36Sopenharmony_ci * function, which is most of the kernel. 128862306a36Sopenharmony_ci */ 128962306a36Sopenharmony_cistatic void __init xen_setup_gdt(int cpu) 129062306a36Sopenharmony_ci{ 129162306a36Sopenharmony_ci pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; 129262306a36Sopenharmony_ci pv_ops.cpu.load_gdt = xen_load_gdt_boot; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci switch_gdt_and_percpu_base(cpu); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; 129762306a36Sopenharmony_ci pv_ops.cpu.load_gdt = xen_load_gdt; 129862306a36Sopenharmony_ci} 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_cistatic void __init xen_dom0_set_legacy_features(void) 130162306a36Sopenharmony_ci{ 130262306a36Sopenharmony_ci x86_platform.legacy.rtc = 1; 130362306a36Sopenharmony_ci} 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_cistatic void __init xen_domu_set_legacy_features(void) 130662306a36Sopenharmony_ci{ 130762306a36Sopenharmony_ci x86_platform.legacy.rtc = 0; 130862306a36Sopenharmony_ci} 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ciextern void early_xen_iret_patch(void); 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci/* First C function to be called on Xen boot */ 131362306a36Sopenharmony_ciasmlinkage __visible void __init xen_start_kernel(struct start_info *si) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci struct physdev_set_iopl set_iopl; 131662306a36Sopenharmony_ci unsigned long initrd_start = 0; 131762306a36Sopenharmony_ci int rc; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci if (!si) 132062306a36Sopenharmony_ci return; 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci clear_bss(); 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci xen_start_info = si; 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci __text_gen_insn(&early_xen_iret_patch, 132762306a36Sopenharmony_ci JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, 132862306a36Sopenharmony_ci JMP32_INSN_SIZE); 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci xen_domain_type = XEN_PV_DOMAIN; 133162306a36Sopenharmony_ci xen_start_flags = xen_start_info->flags; 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci xen_setup_features(); 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci /* Install Xen paravirt ops */ 133662306a36Sopenharmony_ci pv_info = xen_info; 133762306a36Sopenharmony_ci pv_ops.cpu = xen_cpu_ops.cpu; 133862306a36Sopenharmony_ci xen_init_irq_ops(); 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci /* 134162306a36Sopenharmony_ci * Setup xen_vcpu early because it is needed for 134262306a36Sopenharmony_ci * local_irq_disable(), irqs_disabled(), e.g. in printk(). 134362306a36Sopenharmony_ci * 134462306a36Sopenharmony_ci * Don't do the full vcpu_info placement stuff until we have 134562306a36Sopenharmony_ci * the cpu_possible_mask and a non-dummy shared_info. 134662306a36Sopenharmony_ci */ 134762306a36Sopenharmony_ci xen_vcpu_info_reset(0); 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci x86_platform.get_nmi_reason = xen_get_nmi_reason; 135062306a36Sopenharmony_ci x86_platform.realmode_reserve = x86_init_noop; 135162306a36Sopenharmony_ci x86_platform.realmode_init = x86_init_noop; 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci x86_init.resources.memory_setup = xen_memory_setup; 135462306a36Sopenharmony_ci x86_init.irqs.intr_mode_select = x86_init_noop; 135562306a36Sopenharmony_ci x86_init.irqs.intr_mode_init = x86_64_probe_apic; 135662306a36Sopenharmony_ci x86_init.oem.arch_setup = xen_arch_setup; 135762306a36Sopenharmony_ci x86_init.oem.banner = xen_banner; 135862306a36Sopenharmony_ci x86_init.hyper.init_platform = xen_pv_init_platform; 135962306a36Sopenharmony_ci x86_init.hyper.guest_late_init = xen_pv_guest_late_init; 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci /* 136262306a36Sopenharmony_ci * Set up some pagetable state before starting to set any ptes. 136362306a36Sopenharmony_ci */ 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_ci xen_setup_machphys_mapping(); 136662306a36Sopenharmony_ci xen_init_mmu_ops(); 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci /* Prevent unwanted bits from being set in PTEs. */ 136962306a36Sopenharmony_ci __supported_pte_mask &= ~_PAGE_GLOBAL; 137062306a36Sopenharmony_ci __default_kernel_pte_mask &= ~_PAGE_GLOBAL; 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci /* Get mfn list */ 137362306a36Sopenharmony_ci xen_build_dynamic_phys_to_machine(); 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci /* Work out if we support NX */ 137662306a36Sopenharmony_ci get_cpu_cap(&boot_cpu_data); 137762306a36Sopenharmony_ci x86_configure_nx(); 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci /* 138062306a36Sopenharmony_ci * Set up kernel GDT and segment registers, mainly so that 138162306a36Sopenharmony_ci * -fstack-protector code can be executed. 138262306a36Sopenharmony_ci */ 138362306a36Sopenharmony_ci xen_setup_gdt(0); 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci /* Determine virtual and physical address sizes */ 138662306a36Sopenharmony_ci get_cpu_address_sizes(&boot_cpu_data); 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci /* Let's presume PV guests always boot on vCPU with id 0. */ 138962306a36Sopenharmony_ci per_cpu(xen_vcpu_id, 0) = 0; 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci idt_setup_early_handler(); 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci xen_init_capabilities(); 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci /* 139662306a36Sopenharmony_ci * set up the basic apic ops. 139762306a36Sopenharmony_ci */ 139862306a36Sopenharmony_ci xen_init_apic(); 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci machine_ops = xen_machine_ops; 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci /* 140362306a36Sopenharmony_ci * The only reliable way to retain the initial address of the 140462306a36Sopenharmony_ci * percpu gdt_page is to remember it here, so we can go and 140562306a36Sopenharmony_ci * mark it RW later, when the initial percpu area is freed. 140662306a36Sopenharmony_ci */ 140762306a36Sopenharmony_ci xen_initial_gdt = &per_cpu(gdt_page, 0); 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci xen_smp_init(); 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci#ifdef CONFIG_ACPI_NUMA 141262306a36Sopenharmony_ci /* 141362306a36Sopenharmony_ci * The pages we from Xen are not related to machine pages, so 141462306a36Sopenharmony_ci * any NUMA information the kernel tries to get from ACPI will 141562306a36Sopenharmony_ci * be meaningless. Prevent it from trying. 141662306a36Sopenharmony_ci */ 141762306a36Sopenharmony_ci disable_srat(); 141862306a36Sopenharmony_ci#endif 141962306a36Sopenharmony_ci WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci local_irq_disable(); 142262306a36Sopenharmony_ci early_boot_irqs_disabled = true; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci xen_raw_console_write("mapping kernel into physical memory\n"); 142562306a36Sopenharmony_ci xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, 142662306a36Sopenharmony_ci xen_start_info->nr_pages); 142762306a36Sopenharmony_ci xen_reserve_special_pages(); 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci /* 143062306a36Sopenharmony_ci * We used to do this in xen_arch_setup, but that is too late 143162306a36Sopenharmony_ci * on AMD were early_cpu_init (run before ->arch_setup()) calls 143262306a36Sopenharmony_ci * early_amd_init which pokes 0xcf8 port. 143362306a36Sopenharmony_ci */ 143462306a36Sopenharmony_ci set_iopl.iopl = 1; 143562306a36Sopenharmony_ci rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 143662306a36Sopenharmony_ci if (rc != 0) 143762306a36Sopenharmony_ci xen_raw_printk("physdev_op failed %d\n", rc); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci if (xen_start_info->mod_start) { 144162306a36Sopenharmony_ci if (xen_start_info->flags & SIF_MOD_START_PFN) 144262306a36Sopenharmony_ci initrd_start = PFN_PHYS(xen_start_info->mod_start); 144362306a36Sopenharmony_ci else 144462306a36Sopenharmony_ci initrd_start = __pa(xen_start_info->mod_start); 144562306a36Sopenharmony_ci } 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci /* Poke various useful things into boot_params */ 144862306a36Sopenharmony_ci boot_params.hdr.type_of_loader = (9 << 4) | 0; 144962306a36Sopenharmony_ci boot_params.hdr.ramdisk_image = initrd_start; 145062306a36Sopenharmony_ci boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 145162306a36Sopenharmony_ci boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); 145262306a36Sopenharmony_ci boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN; 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci if (!xen_initial_domain()) { 145562306a36Sopenharmony_ci if (pci_xen) 145662306a36Sopenharmony_ci x86_init.pci.arch_init = pci_xen_init; 145762306a36Sopenharmony_ci x86_platform.set_legacy_features = 145862306a36Sopenharmony_ci xen_domu_set_legacy_features; 145962306a36Sopenharmony_ci } else { 146062306a36Sopenharmony_ci const struct dom0_vga_console_info *info = 146162306a36Sopenharmony_ci (void *)((char *)xen_start_info + 146262306a36Sopenharmony_ci xen_start_info->console.dom0.info_off); 146362306a36Sopenharmony_ci struct xen_platform_op op = { 146462306a36Sopenharmony_ci .cmd = XENPF_firmware_info, 146562306a36Sopenharmony_ci .interface_version = XENPF_INTERFACE_VERSION, 146662306a36Sopenharmony_ci .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, 146762306a36Sopenharmony_ci }; 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_ci x86_platform.set_legacy_features = 147062306a36Sopenharmony_ci xen_dom0_set_legacy_features; 147162306a36Sopenharmony_ci xen_init_vga(info, xen_start_info->console.dom0.info_size, 147262306a36Sopenharmony_ci &boot_params.screen_info); 147362306a36Sopenharmony_ci xen_start_info->console.domU.mfn = 0; 147462306a36Sopenharmony_ci xen_start_info->console.domU.evtchn = 0; 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci if (HYPERVISOR_platform_op(&op) == 0) 147762306a36Sopenharmony_ci boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci /* Make sure ACS will be enabled */ 148062306a36Sopenharmony_ci pci_request_acs(); 148162306a36Sopenharmony_ci 148262306a36Sopenharmony_ci xen_acpi_sleep_register(); 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci xen_boot_params_init_edd(); 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci#ifdef CONFIG_ACPI 148762306a36Sopenharmony_ci /* 148862306a36Sopenharmony_ci * Disable selecting "Firmware First mode" for correctable 148962306a36Sopenharmony_ci * memory errors, as this is the duty of the hypervisor to 149062306a36Sopenharmony_ci * decide. 149162306a36Sopenharmony_ci */ 149262306a36Sopenharmony_ci acpi_disable_cmcff = 1; 149362306a36Sopenharmony_ci#endif 149462306a36Sopenharmony_ci } 149562306a36Sopenharmony_ci 149662306a36Sopenharmony_ci xen_add_preferred_consoles(); 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci#ifdef CONFIG_PCI 149962306a36Sopenharmony_ci /* PCI BIOS service won't work from a PV guest. */ 150062306a36Sopenharmony_ci pci_probe &= ~PCI_PROBE_BIOS; 150162306a36Sopenharmony_ci#endif 150262306a36Sopenharmony_ci xen_raw_console_write("about to get started...\n"); 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci /* We need this for printk timestamps */ 150562306a36Sopenharmony_ci xen_setup_runstate_info(0); 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci xen_efi_init(&boot_params); 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci /* Start the world */ 151062306a36Sopenharmony_ci cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ 151162306a36Sopenharmony_ci x86_64_start_reservations((char *)__pa_symbol(&boot_params)); 151262306a36Sopenharmony_ci} 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_cistatic int xen_cpu_up_prepare_pv(unsigned int cpu) 151562306a36Sopenharmony_ci{ 151662306a36Sopenharmony_ci int rc; 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci if (per_cpu(xen_vcpu, cpu) == NULL) 151962306a36Sopenharmony_ci return -ENODEV; 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci xen_setup_timer(cpu); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_ci rc = xen_smp_intr_init(cpu); 152462306a36Sopenharmony_ci if (rc) { 152562306a36Sopenharmony_ci WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n", 152662306a36Sopenharmony_ci cpu, rc); 152762306a36Sopenharmony_ci return rc; 152862306a36Sopenharmony_ci } 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci rc = xen_smp_intr_init_pv(cpu); 153162306a36Sopenharmony_ci if (rc) { 153262306a36Sopenharmony_ci WARN(1, "xen_smp_intr_init_pv() for CPU %d failed: %d\n", 153362306a36Sopenharmony_ci cpu, rc); 153462306a36Sopenharmony_ci return rc; 153562306a36Sopenharmony_ci } 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci return 0; 153862306a36Sopenharmony_ci} 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_cistatic int xen_cpu_dead_pv(unsigned int cpu) 154162306a36Sopenharmony_ci{ 154262306a36Sopenharmony_ci xen_smp_intr_free(cpu); 154362306a36Sopenharmony_ci xen_smp_intr_free_pv(cpu); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci xen_teardown_timer(cpu); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci return 0; 154862306a36Sopenharmony_ci} 154962306a36Sopenharmony_ci 155062306a36Sopenharmony_cistatic uint32_t __init xen_platform_pv(void) 155162306a36Sopenharmony_ci{ 155262306a36Sopenharmony_ci if (xen_pv_domain()) 155362306a36Sopenharmony_ci return xen_cpuid_base(); 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci return 0; 155662306a36Sopenharmony_ci} 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ciconst __initconst struct hypervisor_x86 x86_hyper_xen_pv = { 155962306a36Sopenharmony_ci .name = "Xen PV", 156062306a36Sopenharmony_ci .detect = xen_platform_pv, 156162306a36Sopenharmony_ci .type = X86_HYPER_XEN_PV, 156262306a36Sopenharmony_ci .runtime.pin_vcpu = xen_pin_vcpu, 156362306a36Sopenharmony_ci .ignore_nopv = true, 156462306a36Sopenharmony_ci}; 1565