162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * common.c - C code for kernel entry and exit 462306a36Sopenharmony_ci * Copyright (c) 2015 Andrew Lutomirski 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Based on asm and ptrace code by many authors. The code here originated 762306a36Sopenharmony_ci * in ptrace.c and signal.c. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/kernel.h> 1162306a36Sopenharmony_ci#include <linux/sched.h> 1262306a36Sopenharmony_ci#include <linux/sched/task_stack.h> 1362306a36Sopenharmony_ci#include <linux/entry-common.h> 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/smp.h> 1662306a36Sopenharmony_ci#include <linux/errno.h> 1762306a36Sopenharmony_ci#include <linux/ptrace.h> 1862306a36Sopenharmony_ci#include <linux/export.h> 1962306a36Sopenharmony_ci#include <linux/nospec.h> 2062306a36Sopenharmony_ci#include <linux/syscalls.h> 2162306a36Sopenharmony_ci#include <linux/uaccess.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#ifdef CONFIG_XEN_PV 2462306a36Sopenharmony_ci#include <xen/xen-ops.h> 2562306a36Sopenharmony_ci#include <xen/events.h> 2662306a36Sopenharmony_ci#endif 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include <asm/apic.h> 2962306a36Sopenharmony_ci#include <asm/desc.h> 3062306a36Sopenharmony_ci#include <asm/traps.h> 3162306a36Sopenharmony_ci#include <asm/vdso.h> 3262306a36Sopenharmony_ci#include <asm/cpufeature.h> 3362306a36Sopenharmony_ci#include <asm/fpu/api.h> 3462306a36Sopenharmony_ci#include <asm/nospec-branch.h> 3562306a36Sopenharmony_ci#include <asm/io_bitmap.h> 3662306a36Sopenharmony_ci#include <asm/syscall.h> 3762306a36Sopenharmony_ci#include <asm/irq_stack.h> 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci /* 4462306a36Sopenharmony_ci * Convert negative numbers to very high and thus out of range 4562306a36Sopenharmony_ci * numbers for comparisons. 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_ci unsigned int unr = nr; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci if (likely(unr < NR_syscalls)) { 5062306a36Sopenharmony_ci unr = array_index_nospec(unr, NR_syscalls); 5162306a36Sopenharmony_ci regs->ax = sys_call_table[unr](regs); 5262306a36Sopenharmony_ci return true; 5362306a36Sopenharmony_ci } 5462306a36Sopenharmony_ci return false; 5562306a36Sopenharmony_ci} 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistatic __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci /* 6062306a36Sopenharmony_ci * Adjust the starting offset of the table, and convert numbers 6162306a36Sopenharmony_ci * < __X32_SYSCALL_BIT to very high and thus out of range 6262306a36Sopenharmony_ci * numbers for comparisons. 6362306a36Sopenharmony_ci */ 6462306a36Sopenharmony_ci unsigned int xnr = nr - __X32_SYSCALL_BIT; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { 6762306a36Sopenharmony_ci xnr = array_index_nospec(xnr, X32_NR_syscalls); 6862306a36Sopenharmony_ci regs->ax = x32_sys_call_table[xnr](regs); 6962306a36Sopenharmony_ci return true; 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci return false; 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci add_random_kstack_offset(); 7762306a36Sopenharmony_ci nr = syscall_enter_from_user_mode(regs, nr); 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci instrumentation_begin(); 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { 8262306a36Sopenharmony_ci /* Invalid system call, but still a system call. */ 8362306a36Sopenharmony_ci regs->ax = __x64_sys_ni_syscall(regs); 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci instrumentation_end(); 8762306a36Sopenharmony_ci syscall_exit_to_user_mode(regs); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci#endif 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 9262306a36Sopenharmony_cistatic __always_inline int syscall_32_enter(struct pt_regs *regs) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IA32_EMULATION)) 9562306a36Sopenharmony_ci current_thread_info()->status |= TS_COMPAT; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci return (int)regs->orig_ax; 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci#ifdef CONFIG_IA32_EMULATION 10162306a36Sopenharmony_cibool __ia32_enabled __ro_after_init = true; 10262306a36Sopenharmony_ci#endif 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci/* 10562306a36Sopenharmony_ci * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. 10662306a36Sopenharmony_ci */ 10762306a36Sopenharmony_cistatic __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) 10862306a36Sopenharmony_ci{ 10962306a36Sopenharmony_ci /* 11062306a36Sopenharmony_ci * Convert negative numbers to very high and thus out of range 11162306a36Sopenharmony_ci * numbers for comparisons. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_ci unsigned int unr = nr; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci if (likely(unr < IA32_NR_syscalls)) { 11662306a36Sopenharmony_ci unr = array_index_nospec(unr, IA32_NR_syscalls); 11762306a36Sopenharmony_ci regs->ax = ia32_sys_call_table[unr](regs); 11862306a36Sopenharmony_ci } else if (nr != -1) { 11962306a36Sopenharmony_ci regs->ax = __ia32_sys_ni_syscall(regs); 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci#ifdef CONFIG_IA32_EMULATION 12462306a36Sopenharmony_cistatic __always_inline bool int80_is_external(void) 12562306a36Sopenharmony_ci{ 12662306a36Sopenharmony_ci const unsigned int offs = (0x80 / 32) * 0x10; 12762306a36Sopenharmony_ci const u32 bit = BIT(0x80 % 32); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci /* The local APIC on XENPV guests is fake */ 13062306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_XENPV)) 13162306a36Sopenharmony_ci return false; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci /* 13462306a36Sopenharmony_ci * If vector 0x80 is set in the APIC ISR then this is an external 13562306a36Sopenharmony_ci * interrupt. Either from broken hardware or injected by a VMM. 13662306a36Sopenharmony_ci * 13762306a36Sopenharmony_ci * Note: In guest mode this is only valid for secure guests where 13862306a36Sopenharmony_ci * the secure module fully controls the vAPIC exposed to the guest. 13962306a36Sopenharmony_ci */ 14062306a36Sopenharmony_ci return apic_read(APIC_ISR + offs) & bit; 14162306a36Sopenharmony_ci} 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci/** 14462306a36Sopenharmony_ci * int80_emulation - 32-bit legacy syscall entry 14562306a36Sopenharmony_ci * 14662306a36Sopenharmony_ci * This entry point can be used by 32-bit and 64-bit programs to perform 14762306a36Sopenharmony_ci * 32-bit system calls. Instances of INT $0x80 can be found inline in 14862306a36Sopenharmony_ci * various programs and libraries. It is also used by the vDSO's 14962306a36Sopenharmony_ci * __kernel_vsyscall fallback for hardware that doesn't support a faster 15062306a36Sopenharmony_ci * entry method. Restarted 32-bit system calls also fall back to INT 15162306a36Sopenharmony_ci * $0x80 regardless of what instruction was originally used to do the 15262306a36Sopenharmony_ci * system call. 15362306a36Sopenharmony_ci * 15462306a36Sopenharmony_ci * This is considered a slow path. It is not used by most libc 15562306a36Sopenharmony_ci * implementations on modern hardware except during process startup. 15662306a36Sopenharmony_ci * 15762306a36Sopenharmony_ci * The arguments for the INT $0x80 based syscall are on stack in the 15862306a36Sopenharmony_ci * pt_regs structure: 15962306a36Sopenharmony_ci * eax: system call number 16062306a36Sopenharmony_ci * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 16162306a36Sopenharmony_ci */ 16262306a36Sopenharmony_ciDEFINE_IDTENTRY_RAW(int80_emulation) 16362306a36Sopenharmony_ci{ 16462306a36Sopenharmony_ci int nr; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci /* Kernel does not use INT $0x80! */ 16762306a36Sopenharmony_ci if (unlikely(!user_mode(regs))) { 16862306a36Sopenharmony_ci irqentry_enter(regs); 16962306a36Sopenharmony_ci instrumentation_begin(); 17062306a36Sopenharmony_ci panic("Unexpected external interrupt 0x80\n"); 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci /* 17462306a36Sopenharmony_ci * Establish kernel context for instrumentation, including for 17562306a36Sopenharmony_ci * int80_is_external() below which calls into the APIC driver. 17662306a36Sopenharmony_ci * Identical for soft and external interrupts. 17762306a36Sopenharmony_ci */ 17862306a36Sopenharmony_ci enter_from_user_mode(regs); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci instrumentation_begin(); 18162306a36Sopenharmony_ci add_random_kstack_offset(); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci /* Validate that this is a soft interrupt to the extent possible */ 18462306a36Sopenharmony_ci if (unlikely(int80_is_external())) 18562306a36Sopenharmony_ci panic("Unexpected external interrupt 0x80\n"); 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci /* 18862306a36Sopenharmony_ci * The low level idtentry code pushed -1 into regs::orig_ax 18962306a36Sopenharmony_ci * and regs::ax contains the syscall number. 19062306a36Sopenharmony_ci * 19162306a36Sopenharmony_ci * User tracing code (ptrace or signal handlers) might assume 19262306a36Sopenharmony_ci * that the regs::orig_ax contains a 32-bit number on invoking 19362306a36Sopenharmony_ci * a 32-bit syscall. 19462306a36Sopenharmony_ci * 19562306a36Sopenharmony_ci * Establish the syscall convention by saving the 32bit truncated 19662306a36Sopenharmony_ci * syscall number in regs::orig_ax and by invalidating regs::ax. 19762306a36Sopenharmony_ci */ 19862306a36Sopenharmony_ci regs->orig_ax = regs->ax & GENMASK(31, 0); 19962306a36Sopenharmony_ci regs->ax = -ENOSYS; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci nr = syscall_32_enter(regs); 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci local_irq_enable(); 20462306a36Sopenharmony_ci nr = syscall_enter_from_user_mode_work(regs, nr); 20562306a36Sopenharmony_ci do_syscall_32_irqs_on(regs, nr); 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci instrumentation_end(); 20862306a36Sopenharmony_ci syscall_exit_to_user_mode(regs); 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci#else /* CONFIG_IA32_EMULATION */ 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci/* Handles int $0x80 on a 32bit kernel */ 21362306a36Sopenharmony_ci__visible noinstr void do_int80_syscall_32(struct pt_regs *regs) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci int nr = syscall_32_enter(regs); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci add_random_kstack_offset(); 21862306a36Sopenharmony_ci /* 21962306a36Sopenharmony_ci * Subtlety here: if ptrace pokes something larger than 2^31-1 into 22062306a36Sopenharmony_ci * orig_ax, the int return value truncates it. This matches 22162306a36Sopenharmony_ci * the semantics of syscall_get_nr(). 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci nr = syscall_enter_from_user_mode(regs, nr); 22462306a36Sopenharmony_ci instrumentation_begin(); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci do_syscall_32_irqs_on(regs, nr); 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci instrumentation_end(); 22962306a36Sopenharmony_ci syscall_exit_to_user_mode(regs); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci#endif /* !CONFIG_IA32_EMULATION */ 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_cistatic noinstr bool __do_fast_syscall_32(struct pt_regs *regs) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci int nr = syscall_32_enter(regs); 23662306a36Sopenharmony_ci int res; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci add_random_kstack_offset(); 23962306a36Sopenharmony_ci /* 24062306a36Sopenharmony_ci * This cannot use syscall_enter_from_user_mode() as it has to 24162306a36Sopenharmony_ci * fetch EBP before invoking any of the syscall entry work 24262306a36Sopenharmony_ci * functions. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ci syscall_enter_from_user_mode_prepare(regs); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci instrumentation_begin(); 24762306a36Sopenharmony_ci /* Fetch EBP from where the vDSO stashed it. */ 24862306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_X86_64)) { 24962306a36Sopenharmony_ci /* 25062306a36Sopenharmony_ci * Micro-optimization: the pointer we're following is 25162306a36Sopenharmony_ci * explicitly 32 bits, so it can't be out of range. 25262306a36Sopenharmony_ci */ 25362306a36Sopenharmony_ci res = __get_user(*(u32 *)®s->bp, 25462306a36Sopenharmony_ci (u32 __user __force *)(unsigned long)(u32)regs->sp); 25562306a36Sopenharmony_ci } else { 25662306a36Sopenharmony_ci res = get_user(*(u32 *)®s->bp, 25762306a36Sopenharmony_ci (u32 __user __force *)(unsigned long)(u32)regs->sp); 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci if (res) { 26162306a36Sopenharmony_ci /* User code screwed up. */ 26262306a36Sopenharmony_ci regs->ax = -EFAULT; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci local_irq_disable(); 26562306a36Sopenharmony_ci instrumentation_end(); 26662306a36Sopenharmony_ci irqentry_exit_to_user_mode(regs); 26762306a36Sopenharmony_ci return false; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci nr = syscall_enter_from_user_mode_work(regs, nr); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* Now this is just like a normal syscall. */ 27362306a36Sopenharmony_ci do_syscall_32_irqs_on(regs, nr); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci instrumentation_end(); 27662306a36Sopenharmony_ci syscall_exit_to_user_mode(regs); 27762306a36Sopenharmony_ci return true; 27862306a36Sopenharmony_ci} 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ 28162306a36Sopenharmony_ci__visible noinstr long do_fast_syscall_32(struct pt_regs *regs) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci /* 28462306a36Sopenharmony_ci * Called using the internal vDSO SYSENTER/SYSCALL32 calling 28562306a36Sopenharmony_ci * convention. Adjust regs so it looks like we entered using int80. 28662306a36Sopenharmony_ci */ 28762306a36Sopenharmony_ci unsigned long landing_pad = (unsigned long)current->mm->context.vdso + 28862306a36Sopenharmony_ci vdso_image_32.sym_int80_landing_pad; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* 29162306a36Sopenharmony_ci * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward 29262306a36Sopenharmony_ci * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. 29362306a36Sopenharmony_ci * Fix it up. 29462306a36Sopenharmony_ci */ 29562306a36Sopenharmony_ci regs->ip = landing_pad; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci /* Invoke the syscall. If it failed, keep it simple: use IRET. */ 29862306a36Sopenharmony_ci if (!__do_fast_syscall_32(regs)) 29962306a36Sopenharmony_ci return 0; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci#ifdef CONFIG_X86_64 30262306a36Sopenharmony_ci /* 30362306a36Sopenharmony_ci * Opportunistic SYSRETL: if possible, try to return using SYSRETL. 30462306a36Sopenharmony_ci * SYSRETL is available on all 64-bit CPUs, so we don't need to 30562306a36Sopenharmony_ci * bother with SYSEXIT. 30662306a36Sopenharmony_ci * 30762306a36Sopenharmony_ci * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, 30862306a36Sopenharmony_ci * because the ECX fixup above will ensure that this is essentially 30962306a36Sopenharmony_ci * never the case. 31062306a36Sopenharmony_ci */ 31162306a36Sopenharmony_ci return regs->cs == __USER32_CS && regs->ss == __USER_DS && 31262306a36Sopenharmony_ci regs->ip == landing_pad && 31362306a36Sopenharmony_ci (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; 31462306a36Sopenharmony_ci#else 31562306a36Sopenharmony_ci /* 31662306a36Sopenharmony_ci * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT. 31762306a36Sopenharmony_ci * 31862306a36Sopenharmony_ci * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, 31962306a36Sopenharmony_ci * because the ECX fixup above will ensure that this is essentially 32062306a36Sopenharmony_ci * never the case. 32162306a36Sopenharmony_ci * 32262306a36Sopenharmony_ci * We don't allow syscalls at all from VM86 mode, but we still 32362306a36Sopenharmony_ci * need to check VM, because we might be returning from sys_vm86. 32462306a36Sopenharmony_ci */ 32562306a36Sopenharmony_ci return static_cpu_has(X86_FEATURE_SEP) && 32662306a36Sopenharmony_ci regs->cs == __USER_CS && regs->ss == __USER_DS && 32762306a36Sopenharmony_ci regs->ip == landing_pad && 32862306a36Sopenharmony_ci (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; 32962306a36Sopenharmony_ci#endif 33062306a36Sopenharmony_ci} 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ 33362306a36Sopenharmony_ci__visible noinstr long do_SYSENTER_32(struct pt_regs *regs) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ 33662306a36Sopenharmony_ci regs->sp = regs->bp; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */ 33962306a36Sopenharmony_ci regs->flags |= X86_EFLAGS_IF; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci return do_fast_syscall_32(regs); 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ci#endif 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ciSYSCALL_DEFINE0(ni_syscall) 34662306a36Sopenharmony_ci{ 34762306a36Sopenharmony_ci return -ENOSYS; 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci#ifdef CONFIG_XEN_PV 35162306a36Sopenharmony_ci#ifndef CONFIG_PREEMPTION 35262306a36Sopenharmony_ci/* 35362306a36Sopenharmony_ci * Some hypercalls issued by the toolstack can take many 10s of 35462306a36Sopenharmony_ci * seconds. Allow tasks running hypercalls via the privcmd driver to 35562306a36Sopenharmony_ci * be voluntarily preempted even if full kernel preemption is 35662306a36Sopenharmony_ci * disabled. 35762306a36Sopenharmony_ci * 35862306a36Sopenharmony_ci * Such preemptible hypercalls are bracketed by 35962306a36Sopenharmony_ci * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() 36062306a36Sopenharmony_ci * calls. 36162306a36Sopenharmony_ci */ 36262306a36Sopenharmony_ciDEFINE_PER_CPU(bool, xen_in_preemptible_hcall); 36362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci/* 36662306a36Sopenharmony_ci * In case of scheduling the flag must be cleared and restored after 36762306a36Sopenharmony_ci * returning from schedule as the task might move to a different CPU. 36862306a36Sopenharmony_ci */ 36962306a36Sopenharmony_cistatic __always_inline bool get_and_clear_inhcall(void) 37062306a36Sopenharmony_ci{ 37162306a36Sopenharmony_ci bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci __this_cpu_write(xen_in_preemptible_hcall, false); 37462306a36Sopenharmony_ci return inhcall; 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_cistatic __always_inline void restore_inhcall(bool inhcall) 37862306a36Sopenharmony_ci{ 37962306a36Sopenharmony_ci __this_cpu_write(xen_in_preemptible_hcall, inhcall); 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ci#else 38262306a36Sopenharmony_cistatic __always_inline bool get_and_clear_inhcall(void) { return false; } 38362306a36Sopenharmony_cistatic __always_inline void restore_inhcall(bool inhcall) { } 38462306a36Sopenharmony_ci#endif 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_cistatic void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) 38762306a36Sopenharmony_ci{ 38862306a36Sopenharmony_ci struct pt_regs *old_regs = set_irq_regs(regs); 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci inc_irq_stat(irq_hv_callback_count); 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci xen_evtchn_do_upcall(); 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci set_irq_regs(old_regs); 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) 39862306a36Sopenharmony_ci{ 39962306a36Sopenharmony_ci irqentry_state_t state = irqentry_enter(regs); 40062306a36Sopenharmony_ci bool inhcall; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci instrumentation_begin(); 40362306a36Sopenharmony_ci run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci inhcall = get_and_clear_inhcall(); 40662306a36Sopenharmony_ci if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { 40762306a36Sopenharmony_ci irqentry_exit_cond_resched(); 40862306a36Sopenharmony_ci instrumentation_end(); 40962306a36Sopenharmony_ci restore_inhcall(inhcall); 41062306a36Sopenharmony_ci } else { 41162306a36Sopenharmony_ci instrumentation_end(); 41262306a36Sopenharmony_ci irqentry_exit(regs, state); 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci} 41562306a36Sopenharmony_ci#endif /* CONFIG_XEN_PV */ 416