162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Core of Xen paravirt_ops implementation.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This file contains the xen_paravirt_ops structure itself, and the
662306a36Sopenharmony_ci * implementations for:
762306a36Sopenharmony_ci * - privileged instructions
862306a36Sopenharmony_ci * - interrupt flags
962306a36Sopenharmony_ci * - segment operations
1062306a36Sopenharmony_ci * - booting and setup
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
1362306a36Sopenharmony_ci */
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include <linux/cpu.h>
1662306a36Sopenharmony_ci#include <linux/kernel.h>
1762306a36Sopenharmony_ci#include <linux/init.h>
1862306a36Sopenharmony_ci#include <linux/smp.h>
1962306a36Sopenharmony_ci#include <linux/preempt.h>
2062306a36Sopenharmony_ci#include <linux/hardirq.h>
2162306a36Sopenharmony_ci#include <linux/percpu.h>
2262306a36Sopenharmony_ci#include <linux/delay.h>
2362306a36Sopenharmony_ci#include <linux/start_kernel.h>
2462306a36Sopenharmony_ci#include <linux/sched.h>
2562306a36Sopenharmony_ci#include <linux/kprobes.h>
2662306a36Sopenharmony_ci#include <linux/kstrtox.h>
2762306a36Sopenharmony_ci#include <linux/memblock.h>
2862306a36Sopenharmony_ci#include <linux/export.h>
2962306a36Sopenharmony_ci#include <linux/mm.h>
3062306a36Sopenharmony_ci#include <linux/page-flags.h>
3162306a36Sopenharmony_ci#include <linux/pci.h>
3262306a36Sopenharmony_ci#include <linux/gfp.h>
3362306a36Sopenharmony_ci#include <linux/edd.h>
3462306a36Sopenharmony_ci#include <linux/reboot.h>
3562306a36Sopenharmony_ci#include <linux/virtio_anchor.h>
3662306a36Sopenharmony_ci#include <linux/stackprotector.h>
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci#include <xen/xen.h>
3962306a36Sopenharmony_ci#include <xen/events.h>
4062306a36Sopenharmony_ci#include <xen/interface/xen.h>
4162306a36Sopenharmony_ci#include <xen/interface/version.h>
4262306a36Sopenharmony_ci#include <xen/interface/physdev.h>
4362306a36Sopenharmony_ci#include <xen/interface/vcpu.h>
4462306a36Sopenharmony_ci#include <xen/interface/memory.h>
4562306a36Sopenharmony_ci#include <xen/interface/nmi.h>
4662306a36Sopenharmony_ci#include <xen/interface/xen-mca.h>
4762306a36Sopenharmony_ci#include <xen/features.h>
4862306a36Sopenharmony_ci#include <xen/page.h>
4962306a36Sopenharmony_ci#include <xen/hvc-console.h>
5062306a36Sopenharmony_ci#include <xen/acpi.h>
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci#include <asm/paravirt.h>
5362306a36Sopenharmony_ci#include <asm/apic.h>
5462306a36Sopenharmony_ci#include <asm/page.h>
5562306a36Sopenharmony_ci#include <asm/xen/pci.h>
5662306a36Sopenharmony_ci#include <asm/xen/hypercall.h>
5762306a36Sopenharmony_ci#include <asm/xen/hypervisor.h>
5862306a36Sopenharmony_ci#include <asm/xen/cpuid.h>
5962306a36Sopenharmony_ci#include <asm/fixmap.h>
6062306a36Sopenharmony_ci#include <asm/processor.h>
6162306a36Sopenharmony_ci#include <asm/proto.h>
6262306a36Sopenharmony_ci#include <asm/msr-index.h>
6362306a36Sopenharmony_ci#include <asm/traps.h>
6462306a36Sopenharmony_ci#include <asm/setup.h>
6562306a36Sopenharmony_ci#include <asm/desc.h>
6662306a36Sopenharmony_ci#include <asm/pgalloc.h>
6762306a36Sopenharmony_ci#include <asm/tlbflush.h>
6862306a36Sopenharmony_ci#include <asm/reboot.h>
6962306a36Sopenharmony_ci#include <asm/hypervisor.h>
7062306a36Sopenharmony_ci#include <asm/mach_traps.h>
7162306a36Sopenharmony_ci#include <asm/mtrr.h>
7262306a36Sopenharmony_ci#include <asm/mwait.h>
7362306a36Sopenharmony_ci#include <asm/pci_x86.h>
7462306a36Sopenharmony_ci#include <asm/cpu.h>
7562306a36Sopenharmony_ci#ifdef CONFIG_X86_IOPL_IOPERM
7662306a36Sopenharmony_ci#include <asm/io_bitmap.h>
7762306a36Sopenharmony_ci#endif
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci#ifdef CONFIG_ACPI
8062306a36Sopenharmony_ci#include <linux/acpi.h>
8162306a36Sopenharmony_ci#include <asm/acpi.h>
8262306a36Sopenharmony_ci#include <acpi/proc_cap_intel.h>
8362306a36Sopenharmony_ci#include <acpi/processor.h>
8462306a36Sopenharmony_ci#include <xen/interface/platform.h>
8562306a36Sopenharmony_ci#endif
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci#include "xen-ops.h"
8862306a36Sopenharmony_ci#include "mmu.h"
8962306a36Sopenharmony_ci#include "smp.h"
9062306a36Sopenharmony_ci#include "multicalls.h"
9162306a36Sopenharmony_ci#include "pmu.h"
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_civoid *xen_initial_gdt;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic int xen_cpu_up_prepare_pv(unsigned int cpu);
9862306a36Sopenharmony_cistatic int xen_cpu_dead_pv(unsigned int cpu);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_cistruct tls_descs {
10162306a36Sopenharmony_ci	struct desc_struct desc[3];
10262306a36Sopenharmony_ci};
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ciDEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE;
10562306a36Sopenharmony_ciDEFINE_PER_CPU(unsigned int, xen_lazy_nesting);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_cienum xen_lazy_mode xen_get_lazy_mode(void)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	if (in_interrupt())
11062306a36Sopenharmony_ci		return XEN_LAZY_NONE;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	return this_cpu_read(xen_lazy_mode);
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci/*
11662306a36Sopenharmony_ci * Updating the 3 TLS descriptors in the GDT on every task switch is
11762306a36Sopenharmony_ci * surprisingly expensive so we avoid updating them if they haven't
11862306a36Sopenharmony_ci * changed.  Since Xen writes different descriptors than the one
11962306a36Sopenharmony_ci * passed in the update_descriptor hypercall we keep shadow copies to
12062306a36Sopenharmony_ci * compare against.
12162306a36Sopenharmony_ci */
12262306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_cistatic __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_cistatic int __init parse_xen_msr_safe(char *str)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	if (str)
12962306a36Sopenharmony_ci		return kstrtobool(str, &xen_msr_safe);
13062306a36Sopenharmony_ci	return -EINVAL;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ciearly_param("xen_msr_safe", parse_xen_msr_safe);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci/* Get MTRR settings from Xen and put them into mtrr_state. */
13562306a36Sopenharmony_cistatic void __init xen_set_mtrr_data(void)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci#ifdef CONFIG_MTRR
13862306a36Sopenharmony_ci	struct xen_platform_op op = {
13962306a36Sopenharmony_ci		.cmd = XENPF_read_memtype,
14062306a36Sopenharmony_ci		.interface_version = XENPF_INTERFACE_VERSION,
14162306a36Sopenharmony_ci	};
14262306a36Sopenharmony_ci	unsigned int reg;
14362306a36Sopenharmony_ci	unsigned long mask;
14462306a36Sopenharmony_ci	uint32_t eax, width;
14562306a36Sopenharmony_ci	static struct mtrr_var_range var[MTRR_MAX_VAR_RANGES] __initdata;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	/* Get physical address width (only 64-bit cpus supported). */
14862306a36Sopenharmony_ci	width = 36;
14962306a36Sopenharmony_ci	eax = cpuid_eax(0x80000000);
15062306a36Sopenharmony_ci	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
15162306a36Sopenharmony_ci		eax = cpuid_eax(0x80000008);
15262306a36Sopenharmony_ci		width = eax & 0xff;
15362306a36Sopenharmony_ci	}
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	for (reg = 0; reg < MTRR_MAX_VAR_RANGES; reg++) {
15662306a36Sopenharmony_ci		op.u.read_memtype.reg = reg;
15762306a36Sopenharmony_ci		if (HYPERVISOR_platform_op(&op))
15862306a36Sopenharmony_ci			break;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci		/*
16162306a36Sopenharmony_ci		 * Only called in dom0, which has all RAM PFNs mapped at
16262306a36Sopenharmony_ci		 * RAM MFNs, and all PCI space etc. is identity mapped.
16362306a36Sopenharmony_ci		 * This means we can treat MFN == PFN regarding MTRR settings.
16462306a36Sopenharmony_ci		 */
16562306a36Sopenharmony_ci		var[reg].base_lo = op.u.read_memtype.type;
16662306a36Sopenharmony_ci		var[reg].base_lo |= op.u.read_memtype.mfn << PAGE_SHIFT;
16762306a36Sopenharmony_ci		var[reg].base_hi = op.u.read_memtype.mfn >> (32 - PAGE_SHIFT);
16862306a36Sopenharmony_ci		mask = ~((op.u.read_memtype.nr_mfns << PAGE_SHIFT) - 1);
16962306a36Sopenharmony_ci		mask &= (1UL << width) - 1;
17062306a36Sopenharmony_ci		if (mask)
17162306a36Sopenharmony_ci			mask |= MTRR_PHYSMASK_V;
17262306a36Sopenharmony_ci		var[reg].mask_lo = mask;
17362306a36Sopenharmony_ci		var[reg].mask_hi = mask >> 32;
17462306a36Sopenharmony_ci	}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	/* Only overwrite MTRR state if any MTRR could be got from Xen. */
17762306a36Sopenharmony_ci	if (reg)
17862306a36Sopenharmony_ci		mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
17962306a36Sopenharmony_ci#endif
18062306a36Sopenharmony_ci}
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_cistatic void __init xen_pv_init_platform(void)
18362306a36Sopenharmony_ci{
18462306a36Sopenharmony_ci	/* PV guests can't operate virtio devices without grants. */
18562306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_XEN_VIRTIO))
18662306a36Sopenharmony_ci		virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
19162306a36Sopenharmony_ci	HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	/* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
19462306a36Sopenharmony_ci	xen_vcpu_info_reset(0);
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	/* pvclock is in shared info area */
19762306a36Sopenharmony_ci	xen_init_time_ops();
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	if (xen_initial_domain())
20062306a36Sopenharmony_ci		xen_set_mtrr_data();
20162306a36Sopenharmony_ci	else
20262306a36Sopenharmony_ci		mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void __init xen_pv_guest_late_init(void)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci#ifndef CONFIG_SMP
20862306a36Sopenharmony_ci	/* Setup shared vcpu info for non-smp configurations */
20962306a36Sopenharmony_ci	xen_setup_vcpu_info_placement();
21062306a36Sopenharmony_ci#endif
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_cistatic __read_mostly unsigned int cpuid_leaf5_ecx_val;
21462306a36Sopenharmony_cistatic __read_mostly unsigned int cpuid_leaf5_edx_val;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_cistatic void xen_cpuid(unsigned int *ax, unsigned int *bx,
21762306a36Sopenharmony_ci		      unsigned int *cx, unsigned int *dx)
21862306a36Sopenharmony_ci{
21962306a36Sopenharmony_ci	unsigned maskebx = ~0;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	/*
22262306a36Sopenharmony_ci	 * Mask out inconvenient features, to try and disable as many
22362306a36Sopenharmony_ci	 * unsupported kernel subsystems as possible.
22462306a36Sopenharmony_ci	 */
22562306a36Sopenharmony_ci	switch (*ax) {
22662306a36Sopenharmony_ci	case CPUID_MWAIT_LEAF:
22762306a36Sopenharmony_ci		/* Synthesize the values.. */
22862306a36Sopenharmony_ci		*ax = 0;
22962306a36Sopenharmony_ci		*bx = 0;
23062306a36Sopenharmony_ci		*cx = cpuid_leaf5_ecx_val;
23162306a36Sopenharmony_ci		*dx = cpuid_leaf5_edx_val;
23262306a36Sopenharmony_ci		return;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	case 0xb:
23562306a36Sopenharmony_ci		/* Suppress extended topology stuff */
23662306a36Sopenharmony_ci		maskebx = 0;
23762306a36Sopenharmony_ci		break;
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	asm(XEN_EMULATE_PREFIX "cpuid"
24162306a36Sopenharmony_ci		: "=a" (*ax),
24262306a36Sopenharmony_ci		  "=b" (*bx),
24362306a36Sopenharmony_ci		  "=c" (*cx),
24462306a36Sopenharmony_ci		  "=d" (*dx)
24562306a36Sopenharmony_ci		: "0" (*ax), "2" (*cx));
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	*bx &= maskebx;
24862306a36Sopenharmony_ci}
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_cistatic bool __init xen_check_mwait(void)
25162306a36Sopenharmony_ci{
25262306a36Sopenharmony_ci#ifdef CONFIG_ACPI
25362306a36Sopenharmony_ci	struct xen_platform_op op = {
25462306a36Sopenharmony_ci		.cmd			= XENPF_set_processor_pminfo,
25562306a36Sopenharmony_ci		.u.set_pminfo.id	= -1,
25662306a36Sopenharmony_ci		.u.set_pminfo.type	= XEN_PM_PDC,
25762306a36Sopenharmony_ci	};
25862306a36Sopenharmony_ci	uint32_t buf[3];
25962306a36Sopenharmony_ci	unsigned int ax, bx, cx, dx;
26062306a36Sopenharmony_ci	unsigned int mwait_mask;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	/* We need to determine whether it is OK to expose the MWAIT
26362306a36Sopenharmony_ci	 * capability to the kernel to harvest deeper than C3 states from ACPI
26462306a36Sopenharmony_ci	 * _CST using the processor_harvest_xen.c module. For this to work, we
26562306a36Sopenharmony_ci	 * need to gather the MWAIT_LEAF values (which the cstate.c code
26662306a36Sopenharmony_ci	 * checks against). The hypervisor won't expose the MWAIT flag because
26762306a36Sopenharmony_ci	 * it would break backwards compatibility; so we will find out directly
26862306a36Sopenharmony_ci	 * from the hardware and hypercall.
26962306a36Sopenharmony_ci	 */
27062306a36Sopenharmony_ci	if (!xen_initial_domain())
27162306a36Sopenharmony_ci		return false;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	/*
27462306a36Sopenharmony_ci	 * When running under platform earlier than Xen4.2, do not expose
27562306a36Sopenharmony_ci	 * mwait, to avoid the risk of loading native acpi pad driver
27662306a36Sopenharmony_ci	 */
27762306a36Sopenharmony_ci	if (!xen_running_on_version_or_later(4, 2))
27862306a36Sopenharmony_ci		return false;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	ax = 1;
28162306a36Sopenharmony_ci	cx = 0;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	native_cpuid(&ax, &bx, &cx, &dx);
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
28662306a36Sopenharmony_ci		     (1 << (X86_FEATURE_MWAIT % 32));
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci	if ((cx & mwait_mask) != mwait_mask)
28962306a36Sopenharmony_ci		return false;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	/* We need to emulate the MWAIT_LEAF and for that we need both
29262306a36Sopenharmony_ci	 * ecx and edx. The hypercall provides only partial information.
29362306a36Sopenharmony_ci	 */
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	ax = CPUID_MWAIT_LEAF;
29662306a36Sopenharmony_ci	bx = 0;
29762306a36Sopenharmony_ci	cx = 0;
29862306a36Sopenharmony_ci	dx = 0;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	native_cpuid(&ax, &bx, &cx, &dx);
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	/* Ask the Hypervisor whether to clear ACPI_PROC_CAP_C_C2C3_FFH. If so,
30362306a36Sopenharmony_ci	 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
30462306a36Sopenharmony_ci	 */
30562306a36Sopenharmony_ci	buf[0] = ACPI_PDC_REVISION_ID;
30662306a36Sopenharmony_ci	buf[1] = 1;
30762306a36Sopenharmony_ci	buf[2] = (ACPI_PROC_CAP_C_CAPABILITY_SMP | ACPI_PROC_CAP_EST_CAPABILITY_SWSMP);
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	if ((HYPERVISOR_platform_op(&op) == 0) &&
31262306a36Sopenharmony_ci	    (buf[2] & (ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH))) {
31362306a36Sopenharmony_ci		cpuid_leaf5_ecx_val = cx;
31462306a36Sopenharmony_ci		cpuid_leaf5_edx_val = dx;
31562306a36Sopenharmony_ci	}
31662306a36Sopenharmony_ci	return true;
31762306a36Sopenharmony_ci#else
31862306a36Sopenharmony_ci	return false;
31962306a36Sopenharmony_ci#endif
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_cistatic bool __init xen_check_xsave(void)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	unsigned int cx, xsave_mask;
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	cx = cpuid_ecx(1);
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) |
32962306a36Sopenharmony_ci		     (1 << (X86_FEATURE_OSXSAVE % 32));
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
33262306a36Sopenharmony_ci	return (cx & xsave_mask) == xsave_mask;
33362306a36Sopenharmony_ci}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_cistatic void __init xen_init_capabilities(void)
33662306a36Sopenharmony_ci{
33762306a36Sopenharmony_ci	setup_force_cpu_cap(X86_FEATURE_XENPV);
33862306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_DCA);
33962306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
34062306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_MTRR);
34162306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_ACC);
34262306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
34362306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_SME);
34462306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_LKGS);
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * Xen PV would need some work to support PCID: CR3 handling as well
34862306a36Sopenharmony_ci	 * as xen_flush_tlb_others() would need updating.
34962306a36Sopenharmony_ci	 */
35062306a36Sopenharmony_ci	setup_clear_cpu_cap(X86_FEATURE_PCID);
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	if (!xen_initial_domain())
35362306a36Sopenharmony_ci		setup_clear_cpu_cap(X86_FEATURE_ACPI);
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	if (xen_check_mwait())
35662306a36Sopenharmony_ci		setup_force_cpu_cap(X86_FEATURE_MWAIT);
35762306a36Sopenharmony_ci	else
35862306a36Sopenharmony_ci		setup_clear_cpu_cap(X86_FEATURE_MWAIT);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	if (!xen_check_xsave()) {
36162306a36Sopenharmony_ci		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
36262306a36Sopenharmony_ci		setup_clear_cpu_cap(X86_FEATURE_OSXSAVE);
36362306a36Sopenharmony_ci	}
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_cistatic noinstr void xen_set_debugreg(int reg, unsigned long val)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	HYPERVISOR_set_debugreg(reg, val);
36962306a36Sopenharmony_ci}
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_cistatic noinstr unsigned long xen_get_debugreg(int reg)
37262306a36Sopenharmony_ci{
37362306a36Sopenharmony_ci	return HYPERVISOR_get_debugreg(reg);
37462306a36Sopenharmony_ci}
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_cistatic void xen_start_context_switch(struct task_struct *prev)
37762306a36Sopenharmony_ci{
37862306a36Sopenharmony_ci	BUG_ON(preemptible());
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) {
38162306a36Sopenharmony_ci		arch_leave_lazy_mmu_mode();
38262306a36Sopenharmony_ci		set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
38362306a36Sopenharmony_ci	}
38462306a36Sopenharmony_ci	enter_lazy(XEN_LAZY_CPU);
38562306a36Sopenharmony_ci}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_cistatic void xen_end_context_switch(struct task_struct *next)
38862306a36Sopenharmony_ci{
38962306a36Sopenharmony_ci	BUG_ON(preemptible());
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	xen_mc_flush();
39262306a36Sopenharmony_ci	leave_lazy(XEN_LAZY_CPU);
39362306a36Sopenharmony_ci	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
39462306a36Sopenharmony_ci		arch_enter_lazy_mmu_mode();
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_cistatic unsigned long xen_store_tr(void)
39862306a36Sopenharmony_ci{
39962306a36Sopenharmony_ci	return 0;
40062306a36Sopenharmony_ci}
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci/*
40362306a36Sopenharmony_ci * Set the page permissions for a particular virtual address.  If the
40462306a36Sopenharmony_ci * address is a vmalloc mapping (or other non-linear mapping), then
40562306a36Sopenharmony_ci * find the linear mapping of the page and also set its protections to
40662306a36Sopenharmony_ci * match.
40762306a36Sopenharmony_ci */
40862306a36Sopenharmony_cistatic void set_aliased_prot(void *v, pgprot_t prot)
40962306a36Sopenharmony_ci{
41062306a36Sopenharmony_ci	int level;
41162306a36Sopenharmony_ci	pte_t *ptep;
41262306a36Sopenharmony_ci	pte_t pte;
41362306a36Sopenharmony_ci	unsigned long pfn;
41462306a36Sopenharmony_ci	unsigned char dummy;
41562306a36Sopenharmony_ci	void *va;
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	ptep = lookup_address((unsigned long)v, &level);
41862306a36Sopenharmony_ci	BUG_ON(ptep == NULL);
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	pfn = pte_pfn(*ptep);
42162306a36Sopenharmony_ci	pte = pfn_pte(pfn, prot);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	/*
42462306a36Sopenharmony_ci	 * Careful: update_va_mapping() will fail if the virtual address
42562306a36Sopenharmony_ci	 * we're poking isn't populated in the page tables.  We don't
42662306a36Sopenharmony_ci	 * need to worry about the direct map (that's always in the page
42762306a36Sopenharmony_ci	 * tables), but we need to be careful about vmap space.  In
42862306a36Sopenharmony_ci	 * particular, the top level page table can lazily propagate
42962306a36Sopenharmony_ci	 * entries between processes, so if we've switched mms since we
43062306a36Sopenharmony_ci	 * vmapped the target in the first place, we might not have the
43162306a36Sopenharmony_ci	 * top-level page table entry populated.
43262306a36Sopenharmony_ci	 *
43362306a36Sopenharmony_ci	 * We disable preemption because we want the same mm active when
43462306a36Sopenharmony_ci	 * we probe the target and when we issue the hypercall.  We'll
43562306a36Sopenharmony_ci	 * have the same nominal mm, but if we're a kernel thread, lazy
43662306a36Sopenharmony_ci	 * mm dropping could change our pgd.
43762306a36Sopenharmony_ci	 *
43862306a36Sopenharmony_ci	 * Out of an abundance of caution, this uses __get_user() to fault
43962306a36Sopenharmony_ci	 * in the target address just in case there's some obscure case
44062306a36Sopenharmony_ci	 * in which the target address isn't readable.
44162306a36Sopenharmony_ci	 */
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	preempt_disable();
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	copy_from_kernel_nofault(&dummy, v, 1);
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
44862306a36Sopenharmony_ci		BUG();
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	va = __va(PFN_PHYS(pfn));
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
45362306a36Sopenharmony_ci		BUG();
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	preempt_enable();
45662306a36Sopenharmony_ci}
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_cistatic void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
45962306a36Sopenharmony_ci{
46062306a36Sopenharmony_ci	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
46162306a36Sopenharmony_ci	int i;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	/*
46462306a36Sopenharmony_ci	 * We need to mark the all aliases of the LDT pages RO.  We
46562306a36Sopenharmony_ci	 * don't need to call vm_flush_aliases(), though, since that's
46662306a36Sopenharmony_ci	 * only responsible for flushing aliases out the TLBs, not the
46762306a36Sopenharmony_ci	 * page tables, and Xen will flush the TLB for us if needed.
46862306a36Sopenharmony_ci	 *
46962306a36Sopenharmony_ci	 * To avoid confusing future readers: none of this is necessary
47062306a36Sopenharmony_ci	 * to load the LDT.  The hypervisor only checks this when the
47162306a36Sopenharmony_ci	 * LDT is faulted in due to subsequent descriptor access.
47262306a36Sopenharmony_ci	 */
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	for (i = 0; i < entries; i += entries_per_page)
47562306a36Sopenharmony_ci		set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
47662306a36Sopenharmony_ci}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_cistatic void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
48162306a36Sopenharmony_ci	int i;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	for (i = 0; i < entries; i += entries_per_page)
48462306a36Sopenharmony_ci		set_aliased_prot(ldt + i, PAGE_KERNEL);
48562306a36Sopenharmony_ci}
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_cistatic void xen_set_ldt(const void *addr, unsigned entries)
48862306a36Sopenharmony_ci{
48962306a36Sopenharmony_ci	struct mmuext_op *op;
49062306a36Sopenharmony_ci	struct multicall_space mcs = xen_mc_entry(sizeof(*op));
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	trace_xen_cpu_set_ldt(addr, entries);
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	op = mcs.args;
49562306a36Sopenharmony_ci	op->cmd = MMUEXT_SET_LDT;
49662306a36Sopenharmony_ci	op->arg1.linear_addr = (unsigned long)addr;
49762306a36Sopenharmony_ci	op->arg2.nr_ents = entries;
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	xen_mc_issue(XEN_LAZY_CPU);
50262306a36Sopenharmony_ci}
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_cistatic void xen_load_gdt(const struct desc_ptr *dtr)
50562306a36Sopenharmony_ci{
50662306a36Sopenharmony_ci	unsigned long va = dtr->address;
50762306a36Sopenharmony_ci	unsigned int size = dtr->size + 1;
50862306a36Sopenharmony_ci	unsigned long pfn, mfn;
50962306a36Sopenharmony_ci	int level;
51062306a36Sopenharmony_ci	pte_t *ptep;
51162306a36Sopenharmony_ci	void *virt;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	/* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
51462306a36Sopenharmony_ci	BUG_ON(size > PAGE_SIZE);
51562306a36Sopenharmony_ci	BUG_ON(va & ~PAGE_MASK);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	/*
51862306a36Sopenharmony_ci	 * The GDT is per-cpu and is in the percpu data area.
51962306a36Sopenharmony_ci	 * That can be virtually mapped, so we need to do a
52062306a36Sopenharmony_ci	 * page-walk to get the underlying MFN for the
52162306a36Sopenharmony_ci	 * hypercall.  The page can also be in the kernel's
52262306a36Sopenharmony_ci	 * linear range, so we need to RO that mapping too.
52362306a36Sopenharmony_ci	 */
52462306a36Sopenharmony_ci	ptep = lookup_address(va, &level);
52562306a36Sopenharmony_ci	BUG_ON(ptep == NULL);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	pfn = pte_pfn(*ptep);
52862306a36Sopenharmony_ci	mfn = pfn_to_mfn(pfn);
52962306a36Sopenharmony_ci	virt = __va(PFN_PHYS(pfn));
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	make_lowmem_page_readonly((void *)va);
53262306a36Sopenharmony_ci	make_lowmem_page_readonly(virt);
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
53562306a36Sopenharmony_ci		BUG();
53662306a36Sopenharmony_ci}
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci/*
53962306a36Sopenharmony_ci * load_gdt for early boot, when the gdt is only mapped once
54062306a36Sopenharmony_ci */
54162306a36Sopenharmony_cistatic void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
54262306a36Sopenharmony_ci{
54362306a36Sopenharmony_ci	unsigned long va = dtr->address;
54462306a36Sopenharmony_ci	unsigned int size = dtr->size + 1;
54562306a36Sopenharmony_ci	unsigned long pfn, mfn;
54662306a36Sopenharmony_ci	pte_t pte;
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	/* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
54962306a36Sopenharmony_ci	BUG_ON(size > PAGE_SIZE);
55062306a36Sopenharmony_ci	BUG_ON(va & ~PAGE_MASK);
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	pfn = virt_to_pfn((void *)va);
55362306a36Sopenharmony_ci	mfn = pfn_to_mfn(pfn);
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	pte = pfn_pte(pfn, PAGE_KERNEL_RO);
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
55862306a36Sopenharmony_ci		BUG();
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
56162306a36Sopenharmony_ci		BUG();
56262306a36Sopenharmony_ci}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_cistatic inline bool desc_equal(const struct desc_struct *d1,
56562306a36Sopenharmony_ci			      const struct desc_struct *d2)
56662306a36Sopenharmony_ci{
56762306a36Sopenharmony_ci	return !memcmp(d1, d2, sizeof(*d1));
56862306a36Sopenharmony_ci}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_cistatic void load_TLS_descriptor(struct thread_struct *t,
57162306a36Sopenharmony_ci				unsigned int cpu, unsigned int i)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
57462306a36Sopenharmony_ci	struct desc_struct *gdt;
57562306a36Sopenharmony_ci	xmaddr_t maddr;
57662306a36Sopenharmony_ci	struct multicall_space mc;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	if (desc_equal(shadow, &t->tls_array[i]))
57962306a36Sopenharmony_ci		return;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	*shadow = t->tls_array[i];
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	gdt = get_cpu_gdt_rw(cpu);
58462306a36Sopenharmony_ci	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
58562306a36Sopenharmony_ci	mc = __xen_mc_entry(0);
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
58862306a36Sopenharmony_ci}
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_cistatic void xen_load_tls(struct thread_struct *t, unsigned int cpu)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	/*
59362306a36Sopenharmony_ci	 * In lazy mode we need to zero %fs, otherwise we may get an
59462306a36Sopenharmony_ci	 * exception between the new %fs descriptor being loaded and
59562306a36Sopenharmony_ci	 * %fs being effectively cleared at __switch_to().
59662306a36Sopenharmony_ci	 */
59762306a36Sopenharmony_ci	if (xen_get_lazy_mode() == XEN_LAZY_CPU)
59862306a36Sopenharmony_ci		loadsegment(fs, 0);
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	xen_mc_batch();
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	load_TLS_descriptor(t, cpu, 0);
60362306a36Sopenharmony_ci	load_TLS_descriptor(t, cpu, 1);
60462306a36Sopenharmony_ci	load_TLS_descriptor(t, cpu, 2);
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	xen_mc_issue(XEN_LAZY_CPU);
60762306a36Sopenharmony_ci}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_cistatic void xen_load_gs_index(unsigned int idx)
61062306a36Sopenharmony_ci{
61162306a36Sopenharmony_ci	if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
61262306a36Sopenharmony_ci		BUG();
61362306a36Sopenharmony_ci}
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_cistatic void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
61662306a36Sopenharmony_ci				const void *ptr)
61762306a36Sopenharmony_ci{
61862306a36Sopenharmony_ci	xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
61962306a36Sopenharmony_ci	u64 entry = *(u64 *)ptr;
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	preempt_disable();
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	xen_mc_flush();
62662306a36Sopenharmony_ci	if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
62762306a36Sopenharmony_ci		BUG();
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	preempt_enable();
63062306a36Sopenharmony_ci}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_civoid noist_exc_debug(struct pt_regs *regs);
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
63562306a36Sopenharmony_ci{
63662306a36Sopenharmony_ci	/* On Xen PV, NMI doesn't use IST.  The C part is the same as native. */
63762306a36Sopenharmony_ci	exc_nmi(regs);
63862306a36Sopenharmony_ci}
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW_ERRORCODE(xenpv_exc_double_fault)
64162306a36Sopenharmony_ci{
64262306a36Sopenharmony_ci	/* On Xen PV, DF doesn't use IST.  The C part is the same as native. */
64362306a36Sopenharmony_ci	exc_double_fault(regs, error_code);
64462306a36Sopenharmony_ci}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(xenpv_exc_debug)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	/*
64962306a36Sopenharmony_ci	 * There's no IST on Xen PV, but we still need to dispatch
65062306a36Sopenharmony_ci	 * to the correct handler.
65162306a36Sopenharmony_ci	 */
65262306a36Sopenharmony_ci	if (user_mode(regs))
65362306a36Sopenharmony_ci		noist_exc_debug(regs);
65462306a36Sopenharmony_ci	else
65562306a36Sopenharmony_ci		exc_debug(regs);
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
65962306a36Sopenharmony_ci{
66062306a36Sopenharmony_ci	/* This should never happen and there is no way to handle it. */
66162306a36Sopenharmony_ci	instrumentation_begin();
66262306a36Sopenharmony_ci	pr_err("Unknown trap in Xen PV mode.");
66362306a36Sopenharmony_ci	BUG();
66462306a36Sopenharmony_ci	instrumentation_end();
66562306a36Sopenharmony_ci}
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci#ifdef CONFIG_X86_MCE
66862306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
66962306a36Sopenharmony_ci{
67062306a36Sopenharmony_ci	/*
67162306a36Sopenharmony_ci	 * There's no IST on Xen PV, but we still need to dispatch
67262306a36Sopenharmony_ci	 * to the correct handler.
67362306a36Sopenharmony_ci	 */
67462306a36Sopenharmony_ci	if (user_mode(regs))
67562306a36Sopenharmony_ci		noist_exc_machine_check(regs);
67662306a36Sopenharmony_ci	else
67762306a36Sopenharmony_ci		exc_machine_check(regs);
67862306a36Sopenharmony_ci}
67962306a36Sopenharmony_ci#endif
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_cistruct trap_array_entry {
68262306a36Sopenharmony_ci	void (*orig)(void);
68362306a36Sopenharmony_ci	void (*xen)(void);
68462306a36Sopenharmony_ci	bool ist_okay;
68562306a36Sopenharmony_ci};
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci#define TRAP_ENTRY(func, ist_ok) {			\
68862306a36Sopenharmony_ci	.orig		= asm_##func,			\
68962306a36Sopenharmony_ci	.xen		= xen_asm_##func,		\
69062306a36Sopenharmony_ci	.ist_okay	= ist_ok }
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci#define TRAP_ENTRY_REDIR(func, ist_ok) {		\
69362306a36Sopenharmony_ci	.orig		= asm_##func,			\
69462306a36Sopenharmony_ci	.xen		= xen_asm_xenpv_##func,		\
69562306a36Sopenharmony_ci	.ist_okay	= ist_ok }
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_cistatic struct trap_array_entry trap_array[] = {
69862306a36Sopenharmony_ci	TRAP_ENTRY_REDIR(exc_debug,			true  ),
69962306a36Sopenharmony_ci	TRAP_ENTRY_REDIR(exc_double_fault,		true  ),
70062306a36Sopenharmony_ci#ifdef CONFIG_X86_MCE
70162306a36Sopenharmony_ci	TRAP_ENTRY_REDIR(exc_machine_check,		true  ),
70262306a36Sopenharmony_ci#endif
70362306a36Sopenharmony_ci	TRAP_ENTRY_REDIR(exc_nmi,			true  ),
70462306a36Sopenharmony_ci	TRAP_ENTRY(exc_int3,				false ),
70562306a36Sopenharmony_ci	TRAP_ENTRY(exc_overflow,			false ),
70662306a36Sopenharmony_ci#ifdef CONFIG_IA32_EMULATION
70762306a36Sopenharmony_ci	TRAP_ENTRY(int80_emulation,			false ),
70862306a36Sopenharmony_ci#endif
70962306a36Sopenharmony_ci	TRAP_ENTRY(exc_page_fault,			false ),
71062306a36Sopenharmony_ci	TRAP_ENTRY(exc_divide_error,			false ),
71162306a36Sopenharmony_ci	TRAP_ENTRY(exc_bounds,				false ),
71262306a36Sopenharmony_ci	TRAP_ENTRY(exc_invalid_op,			false ),
71362306a36Sopenharmony_ci	TRAP_ENTRY(exc_device_not_available,		false ),
71462306a36Sopenharmony_ci	TRAP_ENTRY(exc_coproc_segment_overrun,		false ),
71562306a36Sopenharmony_ci	TRAP_ENTRY(exc_invalid_tss,			false ),
71662306a36Sopenharmony_ci	TRAP_ENTRY(exc_segment_not_present,		false ),
71762306a36Sopenharmony_ci	TRAP_ENTRY(exc_stack_segment,			false ),
71862306a36Sopenharmony_ci	TRAP_ENTRY(exc_general_protection,		false ),
71962306a36Sopenharmony_ci	TRAP_ENTRY(exc_spurious_interrupt_bug,		false ),
72062306a36Sopenharmony_ci	TRAP_ENTRY(exc_coprocessor_error,		false ),
72162306a36Sopenharmony_ci	TRAP_ENTRY(exc_alignment_check,			false ),
72262306a36Sopenharmony_ci	TRAP_ENTRY(exc_simd_coprocessor_error,		false ),
72362306a36Sopenharmony_ci#ifdef CONFIG_X86_CET
72462306a36Sopenharmony_ci	TRAP_ENTRY(exc_control_protection,		false ),
72562306a36Sopenharmony_ci#endif
72662306a36Sopenharmony_ci};
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_cistatic bool __ref get_trap_addr(void **addr, unsigned int ist)
72962306a36Sopenharmony_ci{
73062306a36Sopenharmony_ci	unsigned int nr;
73162306a36Sopenharmony_ci	bool ist_okay = false;
73262306a36Sopenharmony_ci	bool found = false;
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	/*
73562306a36Sopenharmony_ci	 * Replace trap handler addresses by Xen specific ones.
73662306a36Sopenharmony_ci	 * Check for known traps using IST and whitelist them.
73762306a36Sopenharmony_ci	 * The debugger ones are the only ones we care about.
73862306a36Sopenharmony_ci	 * Xen will handle faults like double_fault, so we should never see
73962306a36Sopenharmony_ci	 * them.  Warn if there's an unexpected IST-using fault handler.
74062306a36Sopenharmony_ci	 */
74162306a36Sopenharmony_ci	for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
74262306a36Sopenharmony_ci		struct trap_array_entry *entry = trap_array + nr;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci		if (*addr == entry->orig) {
74562306a36Sopenharmony_ci			*addr = entry->xen;
74662306a36Sopenharmony_ci			ist_okay = entry->ist_okay;
74762306a36Sopenharmony_ci			found = true;
74862306a36Sopenharmony_ci			break;
74962306a36Sopenharmony_ci		}
75062306a36Sopenharmony_ci	}
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci	if (nr == ARRAY_SIZE(trap_array) &&
75362306a36Sopenharmony_ci	    *addr >= (void *)early_idt_handler_array[0] &&
75462306a36Sopenharmony_ci	    *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
75562306a36Sopenharmony_ci		nr = (*addr - (void *)early_idt_handler_array[0]) /
75662306a36Sopenharmony_ci		     EARLY_IDT_HANDLER_SIZE;
75762306a36Sopenharmony_ci		*addr = (void *)xen_early_idt_handler_array[nr];
75862306a36Sopenharmony_ci		found = true;
75962306a36Sopenharmony_ci	}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	if (!found)
76262306a36Sopenharmony_ci		*addr = (void *)xen_asm_exc_xen_unknown_trap;
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	if (WARN_ON(found && ist != 0 && !ist_okay))
76562306a36Sopenharmony_ci		return false;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	return true;
76862306a36Sopenharmony_ci}
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_cistatic int cvt_gate_to_trap(int vector, const gate_desc *val,
77162306a36Sopenharmony_ci			    struct trap_info *info)
77262306a36Sopenharmony_ci{
77362306a36Sopenharmony_ci	unsigned long addr;
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
77662306a36Sopenharmony_ci		return 0;
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	info->vector = vector;
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	addr = gate_offset(val);
78162306a36Sopenharmony_ci	if (!get_trap_addr((void **)&addr, val->bits.ist))
78262306a36Sopenharmony_ci		return 0;
78362306a36Sopenharmony_ci	info->address = addr;
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	info->cs = gate_segment(val);
78662306a36Sopenharmony_ci	info->flags = val->bits.dpl;
78762306a36Sopenharmony_ci	/* interrupt gates clear IF */
78862306a36Sopenharmony_ci	if (val->bits.type == GATE_INTERRUPT)
78962306a36Sopenharmony_ci		info->flags |= 1 << 2;
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	return 1;
79262306a36Sopenharmony_ci}
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci/* Locations of each CPU's IDT */
79562306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct desc_ptr, idt_desc);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci/* Set an IDT entry.  If the entry is part of the current IDT, then
79862306a36Sopenharmony_ci   also update Xen. */
79962306a36Sopenharmony_cistatic void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
80062306a36Sopenharmony_ci{
80162306a36Sopenharmony_ci	unsigned long p = (unsigned long)&dt[entrynum];
80262306a36Sopenharmony_ci	unsigned long start, end;
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	trace_xen_cpu_write_idt_entry(dt, entrynum, g);
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	preempt_disable();
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	start = __this_cpu_read(idt_desc.address);
80962306a36Sopenharmony_ci	end = start + __this_cpu_read(idt_desc.size) + 1;
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	xen_mc_flush();
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	native_write_idt_entry(dt, entrynum, g);
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	if (p >= start && (p + 8) <= end) {
81662306a36Sopenharmony_ci		struct trap_info info[2];
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci		info[1].address = 0;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci		if (cvt_gate_to_trap(entrynum, g, &info[0]))
82162306a36Sopenharmony_ci			if (HYPERVISOR_set_trap_table(info))
82262306a36Sopenharmony_ci				BUG();
82362306a36Sopenharmony_ci	}
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	preempt_enable();
82662306a36Sopenharmony_ci}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_cistatic unsigned xen_convert_trap_info(const struct desc_ptr *desc,
82962306a36Sopenharmony_ci				      struct trap_info *traps, bool full)
83062306a36Sopenharmony_ci{
83162306a36Sopenharmony_ci	unsigned in, out, count;
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci	count = (desc->size+1) / sizeof(gate_desc);
83462306a36Sopenharmony_ci	BUG_ON(count > 256);
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	for (in = out = 0; in < count; in++) {
83762306a36Sopenharmony_ci		gate_desc *entry = (gate_desc *)(desc->address) + in;
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci		if (cvt_gate_to_trap(in, entry, &traps[out]) || full)
84062306a36Sopenharmony_ci			out++;
84162306a36Sopenharmony_ci	}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci	return out;
84462306a36Sopenharmony_ci}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_civoid xen_copy_trap_info(struct trap_info *traps)
84762306a36Sopenharmony_ci{
84862306a36Sopenharmony_ci	const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci	xen_convert_trap_info(desc, traps, true);
85162306a36Sopenharmony_ci}
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci/* Load a new IDT into Xen.  In principle this can be per-CPU, so we
85462306a36Sopenharmony_ci   hold a spinlock to protect the static traps[] array (static because
85562306a36Sopenharmony_ci   it avoids allocation, and saves stack space). */
85662306a36Sopenharmony_cistatic void xen_load_idt(const struct desc_ptr *desc)
85762306a36Sopenharmony_ci{
85862306a36Sopenharmony_ci	static DEFINE_SPINLOCK(lock);
85962306a36Sopenharmony_ci	static struct trap_info traps[257];
86062306a36Sopenharmony_ci	static const struct trap_info zero = { };
86162306a36Sopenharmony_ci	unsigned out;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	trace_xen_cpu_load_idt(desc);
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	spin_lock(&lock);
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	out = xen_convert_trap_info(desc, traps, false);
87062306a36Sopenharmony_ci	traps[out] = zero;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	xen_mc_flush();
87362306a36Sopenharmony_ci	if (HYPERVISOR_set_trap_table(traps))
87462306a36Sopenharmony_ci		BUG();
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci	spin_unlock(&lock);
87762306a36Sopenharmony_ci}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci/* Write a GDT descriptor entry.  Ignore LDT descriptors, since
88062306a36Sopenharmony_ci   they're handled differently. */
88162306a36Sopenharmony_cistatic void xen_write_gdt_entry(struct desc_struct *dt, int entry,
88262306a36Sopenharmony_ci				const void *desc, int type)
88362306a36Sopenharmony_ci{
88462306a36Sopenharmony_ci	trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	preempt_disable();
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	switch (type) {
88962306a36Sopenharmony_ci	case DESC_LDT:
89062306a36Sopenharmony_ci	case DESC_TSS:
89162306a36Sopenharmony_ci		/* ignore */
89262306a36Sopenharmony_ci		break;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	default: {
89562306a36Sopenharmony_ci		xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci		xen_mc_flush();
89862306a36Sopenharmony_ci		if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
89962306a36Sopenharmony_ci			BUG();
90062306a36Sopenharmony_ci	}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci	preempt_enable();
90562306a36Sopenharmony_ci}
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci/*
90862306a36Sopenharmony_ci * Version of write_gdt_entry for use at early boot-time needed to
90962306a36Sopenharmony_ci * update an entry as simply as possible.
91062306a36Sopenharmony_ci */
91162306a36Sopenharmony_cistatic void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
91262306a36Sopenharmony_ci					    const void *desc, int type)
91362306a36Sopenharmony_ci{
91462306a36Sopenharmony_ci	trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	switch (type) {
91762306a36Sopenharmony_ci	case DESC_LDT:
91862306a36Sopenharmony_ci	case DESC_TSS:
91962306a36Sopenharmony_ci		/* ignore */
92062306a36Sopenharmony_ci		break;
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci	default: {
92362306a36Sopenharmony_ci		xmaddr_t maddr = virt_to_machine(&dt[entry]);
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci		if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
92662306a36Sopenharmony_ci			dt[entry] = *(struct desc_struct *)desc;
92762306a36Sopenharmony_ci	}
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	}
93062306a36Sopenharmony_ci}
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_cistatic void xen_load_sp0(unsigned long sp0)
93362306a36Sopenharmony_ci{
93462306a36Sopenharmony_ci	struct multicall_space mcs;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	mcs = xen_mc_entry(0);
93762306a36Sopenharmony_ci	MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
93862306a36Sopenharmony_ci	xen_mc_issue(XEN_LAZY_CPU);
93962306a36Sopenharmony_ci	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
94062306a36Sopenharmony_ci}
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci#ifdef CONFIG_X86_IOPL_IOPERM
94362306a36Sopenharmony_cistatic void xen_invalidate_io_bitmap(void)
94462306a36Sopenharmony_ci{
94562306a36Sopenharmony_ci	struct physdev_set_iobitmap iobitmap = {
94662306a36Sopenharmony_ci		.bitmap = NULL,
94762306a36Sopenharmony_ci		.nr_ports = 0,
94862306a36Sopenharmony_ci	};
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	native_tss_invalidate_io_bitmap();
95162306a36Sopenharmony_ci	HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
95262306a36Sopenharmony_ci}
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_cistatic void xen_update_io_bitmap(void)
95562306a36Sopenharmony_ci{
95662306a36Sopenharmony_ci	struct physdev_set_iobitmap iobitmap;
95762306a36Sopenharmony_ci	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	native_tss_update_io_bitmap();
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
96262306a36Sopenharmony_ci			  tss->x86_tss.io_bitmap_base;
96362306a36Sopenharmony_ci	if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
96462306a36Sopenharmony_ci		iobitmap.nr_ports = 0;
96562306a36Sopenharmony_ci	else
96662306a36Sopenharmony_ci		iobitmap.nr_ports = IO_BITMAP_BITS;
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
96962306a36Sopenharmony_ci}
97062306a36Sopenharmony_ci#endif
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_cistatic void xen_io_delay(void)
97362306a36Sopenharmony_ci{
97462306a36Sopenharmony_ci}
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long, xen_cr0_value);
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_cistatic unsigned long xen_read_cr0(void)
97962306a36Sopenharmony_ci{
98062306a36Sopenharmony_ci	unsigned long cr0 = this_cpu_read(xen_cr0_value);
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci	if (unlikely(cr0 == 0)) {
98362306a36Sopenharmony_ci		cr0 = native_read_cr0();
98462306a36Sopenharmony_ci		this_cpu_write(xen_cr0_value, cr0);
98562306a36Sopenharmony_ci	}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci	return cr0;
98862306a36Sopenharmony_ci}
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_cistatic void xen_write_cr0(unsigned long cr0)
99162306a36Sopenharmony_ci{
99262306a36Sopenharmony_ci	struct multicall_space mcs;
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	this_cpu_write(xen_cr0_value, cr0);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	/* Only pay attention to cr0.TS; everything else is
99762306a36Sopenharmony_ci	   ignored. */
99862306a36Sopenharmony_ci	mcs = xen_mc_entry(0);
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci	MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci	xen_mc_issue(XEN_LAZY_CPU);
100362306a36Sopenharmony_ci}
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_cistatic void xen_write_cr4(unsigned long cr4)
100662306a36Sopenharmony_ci{
100762306a36Sopenharmony_ci	cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	native_write_cr4(cr4);
101062306a36Sopenharmony_ci}
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_cistatic u64 xen_do_read_msr(unsigned int msr, int *err)
101362306a36Sopenharmony_ci{
101462306a36Sopenharmony_ci	u64 val = 0;	/* Avoid uninitialized value for safe variant. */
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_ci	if (pmu_msr_read(msr, &val, err))
101762306a36Sopenharmony_ci		return val;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	if (err)
102062306a36Sopenharmony_ci		val = native_read_msr_safe(msr, err);
102162306a36Sopenharmony_ci	else
102262306a36Sopenharmony_ci		val = native_read_msr(msr);
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	switch (msr) {
102562306a36Sopenharmony_ci	case MSR_IA32_APICBASE:
102662306a36Sopenharmony_ci		val &= ~X2APIC_ENABLE;
102762306a36Sopenharmony_ci		break;
102862306a36Sopenharmony_ci	}
102962306a36Sopenharmony_ci	return val;
103062306a36Sopenharmony_ci}
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_cistatic void set_seg(unsigned int which, unsigned int low, unsigned int high,
103362306a36Sopenharmony_ci		    int *err)
103462306a36Sopenharmony_ci{
103562306a36Sopenharmony_ci	u64 base = ((u64)high << 32) | low;
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci	if (HYPERVISOR_set_segment_base(which, base) == 0)
103862306a36Sopenharmony_ci		return;
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci	if (err)
104162306a36Sopenharmony_ci		*err = -EIO;
104262306a36Sopenharmony_ci	else
104362306a36Sopenharmony_ci		WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
104462306a36Sopenharmony_ci}
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci/*
104762306a36Sopenharmony_ci * Support write_msr_safe() and write_msr() semantics.
104862306a36Sopenharmony_ci * With err == NULL write_msr() semantics are selected.
104962306a36Sopenharmony_ci * Supplying an err pointer requires err to be pre-initialized with 0.
105062306a36Sopenharmony_ci */
105162306a36Sopenharmony_cistatic void xen_do_write_msr(unsigned int msr, unsigned int low,
105262306a36Sopenharmony_ci			     unsigned int high, int *err)
105362306a36Sopenharmony_ci{
105462306a36Sopenharmony_ci	switch (msr) {
105562306a36Sopenharmony_ci	case MSR_FS_BASE:
105662306a36Sopenharmony_ci		set_seg(SEGBASE_FS, low, high, err);
105762306a36Sopenharmony_ci		break;
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	case MSR_KERNEL_GS_BASE:
106062306a36Sopenharmony_ci		set_seg(SEGBASE_GS_USER, low, high, err);
106162306a36Sopenharmony_ci		break;
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	case MSR_GS_BASE:
106462306a36Sopenharmony_ci		set_seg(SEGBASE_GS_KERNEL, low, high, err);
106562306a36Sopenharmony_ci		break;
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	case MSR_STAR:
106862306a36Sopenharmony_ci	case MSR_CSTAR:
106962306a36Sopenharmony_ci	case MSR_LSTAR:
107062306a36Sopenharmony_ci	case MSR_SYSCALL_MASK:
107162306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_CS:
107262306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_ESP:
107362306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_EIP:
107462306a36Sopenharmony_ci		/* Fast syscall setup is all done in hypercalls, so
107562306a36Sopenharmony_ci		   these are all ignored.  Stub them out here to stop
107662306a36Sopenharmony_ci		   Xen console noise. */
107762306a36Sopenharmony_ci		break;
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci	default:
108062306a36Sopenharmony_ci		if (!pmu_msr_write(msr, low, high, err)) {
108162306a36Sopenharmony_ci			if (err)
108262306a36Sopenharmony_ci				*err = native_write_msr_safe(msr, low, high);
108362306a36Sopenharmony_ci			else
108462306a36Sopenharmony_ci				native_write_msr(msr, low, high);
108562306a36Sopenharmony_ci		}
108662306a36Sopenharmony_ci	}
108762306a36Sopenharmony_ci}
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_cistatic u64 xen_read_msr_safe(unsigned int msr, int *err)
109062306a36Sopenharmony_ci{
109162306a36Sopenharmony_ci	return xen_do_read_msr(msr, err);
109262306a36Sopenharmony_ci}
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_cistatic int xen_write_msr_safe(unsigned int msr, unsigned int low,
109562306a36Sopenharmony_ci			      unsigned int high)
109662306a36Sopenharmony_ci{
109762306a36Sopenharmony_ci	int err = 0;
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	xen_do_write_msr(msr, low, high, &err);
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci	return err;
110262306a36Sopenharmony_ci}
110362306a36Sopenharmony_ci
110462306a36Sopenharmony_cistatic u64 xen_read_msr(unsigned int msr)
110562306a36Sopenharmony_ci{
110662306a36Sopenharmony_ci	int err;
110762306a36Sopenharmony_ci
110862306a36Sopenharmony_ci	return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
110962306a36Sopenharmony_ci}
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_cistatic void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
111262306a36Sopenharmony_ci{
111362306a36Sopenharmony_ci	int err;
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci	xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
111662306a36Sopenharmony_ci}
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci/* This is called once we have the cpu_possible_mask */
111962306a36Sopenharmony_civoid __init xen_setup_vcpu_info_placement(void)
112062306a36Sopenharmony_ci{
112162306a36Sopenharmony_ci	int cpu;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
112462306a36Sopenharmony_ci		/* Set up direct vCPU id mapping for PV guests. */
112562306a36Sopenharmony_ci		per_cpu(xen_vcpu_id, cpu) = cpu;
112662306a36Sopenharmony_ci		xen_vcpu_setup(cpu);
112762306a36Sopenharmony_ci	}
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_ci	pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
113062306a36Sopenharmony_ci	pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
113162306a36Sopenharmony_ci	pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
113262306a36Sopenharmony_ci	pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
113362306a36Sopenharmony_ci}
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_cistatic const struct pv_info xen_info __initconst = {
113662306a36Sopenharmony_ci	.extra_user_64bit_cs = FLAT_USER_CS64,
113762306a36Sopenharmony_ci	.name = "Xen",
113862306a36Sopenharmony_ci};
113962306a36Sopenharmony_ci
114062306a36Sopenharmony_cistatic const typeof(pv_ops) xen_cpu_ops __initconst = {
114162306a36Sopenharmony_ci	.cpu = {
114262306a36Sopenharmony_ci		.cpuid = xen_cpuid,
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci		.set_debugreg = xen_set_debugreg,
114562306a36Sopenharmony_ci		.get_debugreg = xen_get_debugreg,
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci		.read_cr0 = xen_read_cr0,
114862306a36Sopenharmony_ci		.write_cr0 = xen_write_cr0,
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci		.write_cr4 = xen_write_cr4,
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci		.wbinvd = pv_native_wbinvd,
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci		.read_msr = xen_read_msr,
115562306a36Sopenharmony_ci		.write_msr = xen_write_msr,
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci		.read_msr_safe = xen_read_msr_safe,
115862306a36Sopenharmony_ci		.write_msr_safe = xen_write_msr_safe,
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci		.read_pmc = xen_read_pmc,
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci		.load_tr_desc = paravirt_nop,
116362306a36Sopenharmony_ci		.set_ldt = xen_set_ldt,
116462306a36Sopenharmony_ci		.load_gdt = xen_load_gdt,
116562306a36Sopenharmony_ci		.load_idt = xen_load_idt,
116662306a36Sopenharmony_ci		.load_tls = xen_load_tls,
116762306a36Sopenharmony_ci		.load_gs_index = xen_load_gs_index,
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci		.alloc_ldt = xen_alloc_ldt,
117062306a36Sopenharmony_ci		.free_ldt = xen_free_ldt,
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci		.store_tr = xen_store_tr,
117362306a36Sopenharmony_ci
117462306a36Sopenharmony_ci		.write_ldt_entry = xen_write_ldt_entry,
117562306a36Sopenharmony_ci		.write_gdt_entry = xen_write_gdt_entry,
117662306a36Sopenharmony_ci		.write_idt_entry = xen_write_idt_entry,
117762306a36Sopenharmony_ci		.load_sp0 = xen_load_sp0,
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci#ifdef CONFIG_X86_IOPL_IOPERM
118062306a36Sopenharmony_ci		.invalidate_io_bitmap = xen_invalidate_io_bitmap,
118162306a36Sopenharmony_ci		.update_io_bitmap = xen_update_io_bitmap,
118262306a36Sopenharmony_ci#endif
118362306a36Sopenharmony_ci		.io_delay = xen_io_delay,
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_ci		.start_context_switch = xen_start_context_switch,
118662306a36Sopenharmony_ci		.end_context_switch = xen_end_context_switch,
118762306a36Sopenharmony_ci	},
118862306a36Sopenharmony_ci};
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_cistatic void xen_restart(char *msg)
119162306a36Sopenharmony_ci{
119262306a36Sopenharmony_ci	xen_reboot(SHUTDOWN_reboot);
119362306a36Sopenharmony_ci}
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_cistatic void xen_machine_halt(void)
119662306a36Sopenharmony_ci{
119762306a36Sopenharmony_ci	xen_reboot(SHUTDOWN_poweroff);
119862306a36Sopenharmony_ci}
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_cistatic void xen_machine_power_off(void)
120162306a36Sopenharmony_ci{
120262306a36Sopenharmony_ci	do_kernel_power_off();
120362306a36Sopenharmony_ci	xen_reboot(SHUTDOWN_poweroff);
120462306a36Sopenharmony_ci}
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_cistatic void xen_crash_shutdown(struct pt_regs *regs)
120762306a36Sopenharmony_ci{
120862306a36Sopenharmony_ci	xen_reboot(SHUTDOWN_crash);
120962306a36Sopenharmony_ci}
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_cistatic const struct machine_ops xen_machine_ops __initconst = {
121262306a36Sopenharmony_ci	.restart = xen_restart,
121362306a36Sopenharmony_ci	.halt = xen_machine_halt,
121462306a36Sopenharmony_ci	.power_off = xen_machine_power_off,
121562306a36Sopenharmony_ci	.shutdown = xen_machine_halt,
121662306a36Sopenharmony_ci	.crash_shutdown = xen_crash_shutdown,
121762306a36Sopenharmony_ci	.emergency_restart = xen_emergency_restart,
121862306a36Sopenharmony_ci};
121962306a36Sopenharmony_ci
122062306a36Sopenharmony_cistatic unsigned char xen_get_nmi_reason(void)
122162306a36Sopenharmony_ci{
122262306a36Sopenharmony_ci	unsigned char reason = 0;
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	/* Construct a value which looks like it came from port 0x61. */
122562306a36Sopenharmony_ci	if (test_bit(_XEN_NMIREASON_io_error,
122662306a36Sopenharmony_ci		     &HYPERVISOR_shared_info->arch.nmi_reason))
122762306a36Sopenharmony_ci		reason |= NMI_REASON_IOCHK;
122862306a36Sopenharmony_ci	if (test_bit(_XEN_NMIREASON_pci_serr,
122962306a36Sopenharmony_ci		     &HYPERVISOR_shared_info->arch.nmi_reason))
123062306a36Sopenharmony_ci		reason |= NMI_REASON_SERR;
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	return reason;
123362306a36Sopenharmony_ci}
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_cistatic void __init xen_boot_params_init_edd(void)
123662306a36Sopenharmony_ci{
123762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_EDD)
123862306a36Sopenharmony_ci	struct xen_platform_op op;
123962306a36Sopenharmony_ci	struct edd_info *edd_info;
124062306a36Sopenharmony_ci	u32 *mbr_signature;
124162306a36Sopenharmony_ci	unsigned nr;
124262306a36Sopenharmony_ci	int ret;
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci	edd_info = boot_params.eddbuf;
124562306a36Sopenharmony_ci	mbr_signature = boot_params.edd_mbr_sig_buffer;
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci	op.cmd = XENPF_firmware_info;
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ci	op.u.firmware_info.type = XEN_FW_DISK_INFO;
125062306a36Sopenharmony_ci	for (nr = 0; nr < EDDMAXNR; nr++) {
125162306a36Sopenharmony_ci		struct edd_info *info = edd_info + nr;
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci		op.u.firmware_info.index = nr;
125462306a36Sopenharmony_ci		info->params.length = sizeof(info->params);
125562306a36Sopenharmony_ci		set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
125662306a36Sopenharmony_ci				     &info->params);
125762306a36Sopenharmony_ci		ret = HYPERVISOR_platform_op(&op);
125862306a36Sopenharmony_ci		if (ret)
125962306a36Sopenharmony_ci			break;
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci#define C(x) info->x = op.u.firmware_info.u.disk_info.x
126262306a36Sopenharmony_ci		C(device);
126362306a36Sopenharmony_ci		C(version);
126462306a36Sopenharmony_ci		C(interface_support);
126562306a36Sopenharmony_ci		C(legacy_max_cylinder);
126662306a36Sopenharmony_ci		C(legacy_max_head);
126762306a36Sopenharmony_ci		C(legacy_sectors_per_track);
126862306a36Sopenharmony_ci#undef C
126962306a36Sopenharmony_ci	}
127062306a36Sopenharmony_ci	boot_params.eddbuf_entries = nr;
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
127362306a36Sopenharmony_ci	for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
127462306a36Sopenharmony_ci		op.u.firmware_info.index = nr;
127562306a36Sopenharmony_ci		ret = HYPERVISOR_platform_op(&op);
127662306a36Sopenharmony_ci		if (ret)
127762306a36Sopenharmony_ci			break;
127862306a36Sopenharmony_ci		mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
127962306a36Sopenharmony_ci	}
128062306a36Sopenharmony_ci	boot_params.edd_mbr_sig_buf_entries = nr;
128162306a36Sopenharmony_ci#endif
128262306a36Sopenharmony_ci}
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci/*
128562306a36Sopenharmony_ci * Set up the GDT and segment registers for -fstack-protector.  Until
128662306a36Sopenharmony_ci * we do this, we have to be careful not to call any stack-protected
128762306a36Sopenharmony_ci * function, which is most of the kernel.
128862306a36Sopenharmony_ci */
128962306a36Sopenharmony_cistatic void __init xen_setup_gdt(int cpu)
129062306a36Sopenharmony_ci{
129162306a36Sopenharmony_ci	pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
129262306a36Sopenharmony_ci	pv_ops.cpu.load_gdt = xen_load_gdt_boot;
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	switch_gdt_and_percpu_base(cpu);
129562306a36Sopenharmony_ci
129662306a36Sopenharmony_ci	pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
129762306a36Sopenharmony_ci	pv_ops.cpu.load_gdt = xen_load_gdt;
129862306a36Sopenharmony_ci}
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_cistatic void __init xen_dom0_set_legacy_features(void)
130162306a36Sopenharmony_ci{
130262306a36Sopenharmony_ci	x86_platform.legacy.rtc = 1;
130362306a36Sopenharmony_ci}
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_cistatic void __init xen_domu_set_legacy_features(void)
130662306a36Sopenharmony_ci{
130762306a36Sopenharmony_ci	x86_platform.legacy.rtc = 0;
130862306a36Sopenharmony_ci}
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ciextern void early_xen_iret_patch(void);
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci/* First C function to be called on Xen boot */
131362306a36Sopenharmony_ciasmlinkage __visible void __init xen_start_kernel(struct start_info *si)
131462306a36Sopenharmony_ci{
131562306a36Sopenharmony_ci	struct physdev_set_iopl set_iopl;
131662306a36Sopenharmony_ci	unsigned long initrd_start = 0;
131762306a36Sopenharmony_ci	int rc;
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	if (!si)
132062306a36Sopenharmony_ci		return;
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_ci	clear_bss();
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci	xen_start_info = si;
132562306a36Sopenharmony_ci
132662306a36Sopenharmony_ci	__text_gen_insn(&early_xen_iret_patch,
132762306a36Sopenharmony_ci			JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret,
132862306a36Sopenharmony_ci			JMP32_INSN_SIZE);
132962306a36Sopenharmony_ci
133062306a36Sopenharmony_ci	xen_domain_type = XEN_PV_DOMAIN;
133162306a36Sopenharmony_ci	xen_start_flags = xen_start_info->flags;
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_ci	xen_setup_features();
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	/* Install Xen paravirt ops */
133662306a36Sopenharmony_ci	pv_info = xen_info;
133762306a36Sopenharmony_ci	pv_ops.cpu = xen_cpu_ops.cpu;
133862306a36Sopenharmony_ci	xen_init_irq_ops();
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	/*
134162306a36Sopenharmony_ci	 * Setup xen_vcpu early because it is needed for
134262306a36Sopenharmony_ci	 * local_irq_disable(), irqs_disabled(), e.g. in printk().
134362306a36Sopenharmony_ci	 *
134462306a36Sopenharmony_ci	 * Don't do the full vcpu_info placement stuff until we have
134562306a36Sopenharmony_ci	 * the cpu_possible_mask and a non-dummy shared_info.
134662306a36Sopenharmony_ci	 */
134762306a36Sopenharmony_ci	xen_vcpu_info_reset(0);
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_ci	x86_platform.get_nmi_reason = xen_get_nmi_reason;
135062306a36Sopenharmony_ci	x86_platform.realmode_reserve = x86_init_noop;
135162306a36Sopenharmony_ci	x86_platform.realmode_init = x86_init_noop;
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci	x86_init.resources.memory_setup = xen_memory_setup;
135462306a36Sopenharmony_ci	x86_init.irqs.intr_mode_select	= x86_init_noop;
135562306a36Sopenharmony_ci	x86_init.irqs.intr_mode_init	= x86_64_probe_apic;
135662306a36Sopenharmony_ci	x86_init.oem.arch_setup = xen_arch_setup;
135762306a36Sopenharmony_ci	x86_init.oem.banner = xen_banner;
135862306a36Sopenharmony_ci	x86_init.hyper.init_platform = xen_pv_init_platform;
135962306a36Sopenharmony_ci	x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
136062306a36Sopenharmony_ci
136162306a36Sopenharmony_ci	/*
136262306a36Sopenharmony_ci	 * Set up some pagetable state before starting to set any ptes.
136362306a36Sopenharmony_ci	 */
136462306a36Sopenharmony_ci
136562306a36Sopenharmony_ci	xen_setup_machphys_mapping();
136662306a36Sopenharmony_ci	xen_init_mmu_ops();
136762306a36Sopenharmony_ci
136862306a36Sopenharmony_ci	/* Prevent unwanted bits from being set in PTEs. */
136962306a36Sopenharmony_ci	__supported_pte_mask &= ~_PAGE_GLOBAL;
137062306a36Sopenharmony_ci	__default_kernel_pte_mask &= ~_PAGE_GLOBAL;
137162306a36Sopenharmony_ci
137262306a36Sopenharmony_ci	/* Get mfn list */
137362306a36Sopenharmony_ci	xen_build_dynamic_phys_to_machine();
137462306a36Sopenharmony_ci
137562306a36Sopenharmony_ci	/* Work out if we support NX */
137662306a36Sopenharmony_ci	get_cpu_cap(&boot_cpu_data);
137762306a36Sopenharmony_ci	x86_configure_nx();
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	/*
138062306a36Sopenharmony_ci	 * Set up kernel GDT and segment registers, mainly so that
138162306a36Sopenharmony_ci	 * -fstack-protector code can be executed.
138262306a36Sopenharmony_ci	 */
138362306a36Sopenharmony_ci	xen_setup_gdt(0);
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci	/* Determine virtual and physical address sizes */
138662306a36Sopenharmony_ci	get_cpu_address_sizes(&boot_cpu_data);
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci	/* Let's presume PV guests always boot on vCPU with id 0. */
138962306a36Sopenharmony_ci	per_cpu(xen_vcpu_id, 0) = 0;
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci	idt_setup_early_handler();
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	xen_init_capabilities();
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci	/*
139662306a36Sopenharmony_ci	 * set up the basic apic ops.
139762306a36Sopenharmony_ci	 */
139862306a36Sopenharmony_ci	xen_init_apic();
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	machine_ops = xen_machine_ops;
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci	/*
140362306a36Sopenharmony_ci	 * The only reliable way to retain the initial address of the
140462306a36Sopenharmony_ci	 * percpu gdt_page is to remember it here, so we can go and
140562306a36Sopenharmony_ci	 * mark it RW later, when the initial percpu area is freed.
140662306a36Sopenharmony_ci	 */
140762306a36Sopenharmony_ci	xen_initial_gdt = &per_cpu(gdt_page, 0);
140862306a36Sopenharmony_ci
140962306a36Sopenharmony_ci	xen_smp_init();
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_ci#ifdef CONFIG_ACPI_NUMA
141262306a36Sopenharmony_ci	/*
141362306a36Sopenharmony_ci	 * The pages we from Xen are not related to machine pages, so
141462306a36Sopenharmony_ci	 * any NUMA information the kernel tries to get from ACPI will
141562306a36Sopenharmony_ci	 * be meaningless.  Prevent it from trying.
141662306a36Sopenharmony_ci	 */
141762306a36Sopenharmony_ci	disable_srat();
141862306a36Sopenharmony_ci#endif
141962306a36Sopenharmony_ci	WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
142062306a36Sopenharmony_ci
142162306a36Sopenharmony_ci	local_irq_disable();
142262306a36Sopenharmony_ci	early_boot_irqs_disabled = true;
142362306a36Sopenharmony_ci
142462306a36Sopenharmony_ci	xen_raw_console_write("mapping kernel into physical memory\n");
142562306a36Sopenharmony_ci	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
142662306a36Sopenharmony_ci				   xen_start_info->nr_pages);
142762306a36Sopenharmony_ci	xen_reserve_special_pages();
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	/*
143062306a36Sopenharmony_ci	 * We used to do this in xen_arch_setup, but that is too late
143162306a36Sopenharmony_ci	 * on AMD were early_cpu_init (run before ->arch_setup()) calls
143262306a36Sopenharmony_ci	 * early_amd_init which pokes 0xcf8 port.
143362306a36Sopenharmony_ci	 */
143462306a36Sopenharmony_ci	set_iopl.iopl = 1;
143562306a36Sopenharmony_ci	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
143662306a36Sopenharmony_ci	if (rc != 0)
143762306a36Sopenharmony_ci		xen_raw_printk("physdev_op failed %d\n", rc);
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci
144062306a36Sopenharmony_ci	if (xen_start_info->mod_start) {
144162306a36Sopenharmony_ci	    if (xen_start_info->flags & SIF_MOD_START_PFN)
144262306a36Sopenharmony_ci		initrd_start = PFN_PHYS(xen_start_info->mod_start);
144362306a36Sopenharmony_ci	    else
144462306a36Sopenharmony_ci		initrd_start = __pa(xen_start_info->mod_start);
144562306a36Sopenharmony_ci	}
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	/* Poke various useful things into boot_params */
144862306a36Sopenharmony_ci	boot_params.hdr.type_of_loader = (9 << 4) | 0;
144962306a36Sopenharmony_ci	boot_params.hdr.ramdisk_image = initrd_start;
145062306a36Sopenharmony_ci	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
145162306a36Sopenharmony_ci	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
145262306a36Sopenharmony_ci	boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_ci	if (!xen_initial_domain()) {
145562306a36Sopenharmony_ci		if (pci_xen)
145662306a36Sopenharmony_ci			x86_init.pci.arch_init = pci_xen_init;
145762306a36Sopenharmony_ci		x86_platform.set_legacy_features =
145862306a36Sopenharmony_ci				xen_domu_set_legacy_features;
145962306a36Sopenharmony_ci	} else {
146062306a36Sopenharmony_ci		const struct dom0_vga_console_info *info =
146162306a36Sopenharmony_ci			(void *)((char *)xen_start_info +
146262306a36Sopenharmony_ci				 xen_start_info->console.dom0.info_off);
146362306a36Sopenharmony_ci		struct xen_platform_op op = {
146462306a36Sopenharmony_ci			.cmd = XENPF_firmware_info,
146562306a36Sopenharmony_ci			.interface_version = XENPF_INTERFACE_VERSION,
146662306a36Sopenharmony_ci			.u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
146762306a36Sopenharmony_ci		};
146862306a36Sopenharmony_ci
146962306a36Sopenharmony_ci		x86_platform.set_legacy_features =
147062306a36Sopenharmony_ci				xen_dom0_set_legacy_features;
147162306a36Sopenharmony_ci		xen_init_vga(info, xen_start_info->console.dom0.info_size,
147262306a36Sopenharmony_ci			     &boot_params.screen_info);
147362306a36Sopenharmony_ci		xen_start_info->console.domU.mfn = 0;
147462306a36Sopenharmony_ci		xen_start_info->console.domU.evtchn = 0;
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_ci		if (HYPERVISOR_platform_op(&op) == 0)
147762306a36Sopenharmony_ci			boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci		/* Make sure ACS will be enabled */
148062306a36Sopenharmony_ci		pci_request_acs();
148162306a36Sopenharmony_ci
148262306a36Sopenharmony_ci		xen_acpi_sleep_register();
148362306a36Sopenharmony_ci
148462306a36Sopenharmony_ci		xen_boot_params_init_edd();
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_ci#ifdef CONFIG_ACPI
148762306a36Sopenharmony_ci		/*
148862306a36Sopenharmony_ci		 * Disable selecting "Firmware First mode" for correctable
148962306a36Sopenharmony_ci		 * memory errors, as this is the duty of the hypervisor to
149062306a36Sopenharmony_ci		 * decide.
149162306a36Sopenharmony_ci		 */
149262306a36Sopenharmony_ci		acpi_disable_cmcff = 1;
149362306a36Sopenharmony_ci#endif
149462306a36Sopenharmony_ci	}
149562306a36Sopenharmony_ci
149662306a36Sopenharmony_ci	xen_add_preferred_consoles();
149762306a36Sopenharmony_ci
149862306a36Sopenharmony_ci#ifdef CONFIG_PCI
149962306a36Sopenharmony_ci	/* PCI BIOS service won't work from a PV guest. */
150062306a36Sopenharmony_ci	pci_probe &= ~PCI_PROBE_BIOS;
150162306a36Sopenharmony_ci#endif
150262306a36Sopenharmony_ci	xen_raw_console_write("about to get started...\n");
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_ci	/* We need this for printk timestamps */
150562306a36Sopenharmony_ci	xen_setup_runstate_info(0);
150662306a36Sopenharmony_ci
150762306a36Sopenharmony_ci	xen_efi_init(&boot_params);
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	/* Start the world */
151062306a36Sopenharmony_ci	cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
151162306a36Sopenharmony_ci	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
151262306a36Sopenharmony_ci}
151362306a36Sopenharmony_ci
151462306a36Sopenharmony_cistatic int xen_cpu_up_prepare_pv(unsigned int cpu)
151562306a36Sopenharmony_ci{
151662306a36Sopenharmony_ci	int rc;
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci	if (per_cpu(xen_vcpu, cpu) == NULL)
151962306a36Sopenharmony_ci		return -ENODEV;
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_ci	xen_setup_timer(cpu);
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	rc = xen_smp_intr_init(cpu);
152462306a36Sopenharmony_ci	if (rc) {
152562306a36Sopenharmony_ci		WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
152662306a36Sopenharmony_ci		     cpu, rc);
152762306a36Sopenharmony_ci		return rc;
152862306a36Sopenharmony_ci	}
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_ci	rc = xen_smp_intr_init_pv(cpu);
153162306a36Sopenharmony_ci	if (rc) {
153262306a36Sopenharmony_ci		WARN(1, "xen_smp_intr_init_pv() for CPU %d failed: %d\n",
153362306a36Sopenharmony_ci		     cpu, rc);
153462306a36Sopenharmony_ci		return rc;
153562306a36Sopenharmony_ci	}
153662306a36Sopenharmony_ci
153762306a36Sopenharmony_ci	return 0;
153862306a36Sopenharmony_ci}
153962306a36Sopenharmony_ci
154062306a36Sopenharmony_cistatic int xen_cpu_dead_pv(unsigned int cpu)
154162306a36Sopenharmony_ci{
154262306a36Sopenharmony_ci	xen_smp_intr_free(cpu);
154362306a36Sopenharmony_ci	xen_smp_intr_free_pv(cpu);
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_ci	xen_teardown_timer(cpu);
154662306a36Sopenharmony_ci
154762306a36Sopenharmony_ci	return 0;
154862306a36Sopenharmony_ci}
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_cistatic uint32_t __init xen_platform_pv(void)
155162306a36Sopenharmony_ci{
155262306a36Sopenharmony_ci	if (xen_pv_domain())
155362306a36Sopenharmony_ci		return xen_cpuid_base();
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	return 0;
155662306a36Sopenharmony_ci}
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ciconst __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
155962306a36Sopenharmony_ci	.name                   = "Xen PV",
156062306a36Sopenharmony_ci	.detect                 = xen_platform_pv,
156162306a36Sopenharmony_ci	.type			= X86_HYPER_XEN_PV,
156262306a36Sopenharmony_ci	.runtime.pin_vcpu       = xen_pin_vcpu,
156362306a36Sopenharmony_ci	.ignore_nopv		= true,
156462306a36Sopenharmony_ci};
1565