162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#ifndef _ASM_X86_NOSPEC_BRANCH_H_
462306a36Sopenharmony_ci#define _ASM_X86_NOSPEC_BRANCH_H_
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/static_key.h>
762306a36Sopenharmony_ci#include <linux/objtool.h>
862306a36Sopenharmony_ci#include <linux/linkage.h>
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <asm/alternative.h>
1162306a36Sopenharmony_ci#include <asm/cpufeatures.h>
1262306a36Sopenharmony_ci#include <asm/msr-index.h>
1362306a36Sopenharmony_ci#include <asm/unwind_hints.h>
1462306a36Sopenharmony_ci#include <asm/percpu.h>
1562306a36Sopenharmony_ci#include <asm/current.h>
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci/*
1862306a36Sopenharmony_ci * Call depth tracking for Intel SKL CPUs to address the RSB underflow
1962306a36Sopenharmony_ci * issue in software.
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci * The tracking does not use a counter. It uses uses arithmetic shift
2262306a36Sopenharmony_ci * right on call entry and logical shift left on return.
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci * The depth tracking variable is initialized to 0x8000.... when the call
2562306a36Sopenharmony_ci * depth is zero. The arithmetic shift right sign extends the MSB and
2662306a36Sopenharmony_ci * saturates after the 12th call. The shift count is 5 for both directions
2762306a36Sopenharmony_ci * so the tracking covers 12 nested calls.
2862306a36Sopenharmony_ci *
2962306a36Sopenharmony_ci *  Call
3062306a36Sopenharmony_ci *  0: 0x8000000000000000	0x0000000000000000
3162306a36Sopenharmony_ci *  1: 0xfc00000000000000	0xf000000000000000
3262306a36Sopenharmony_ci * ...
3362306a36Sopenharmony_ci * 11: 0xfffffffffffffff8	0xfffffffffffffc00
3462306a36Sopenharmony_ci * 12: 0xffffffffffffffff	0xffffffffffffffe0
3562306a36Sopenharmony_ci *
3662306a36Sopenharmony_ci * After a return buffer fill the depth is credited 12 calls before the
3762306a36Sopenharmony_ci * next stuffing has to take place.
3862306a36Sopenharmony_ci *
3962306a36Sopenharmony_ci * There is a inaccuracy for situations like this:
4062306a36Sopenharmony_ci *
4162306a36Sopenharmony_ci *  10 calls
4262306a36Sopenharmony_ci *   5 returns
4362306a36Sopenharmony_ci *   3 calls
4462306a36Sopenharmony_ci *   4 returns
4562306a36Sopenharmony_ci *   3 calls
4662306a36Sopenharmony_ci *   ....
4762306a36Sopenharmony_ci *
4862306a36Sopenharmony_ci * The shift count might cause this to be off by one in either direction,
4962306a36Sopenharmony_ci * but there is still a cushion vs. the RSB depth. The algorithm does not
5062306a36Sopenharmony_ci * claim to be perfect and it can be speculated around by the CPU, but it
5162306a36Sopenharmony_ci * is considered that it obfuscates the problem enough to make exploitation
5262306a36Sopenharmony_ci * extremly difficult.
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_ci#define RET_DEPTH_SHIFT			5
5562306a36Sopenharmony_ci#define RSB_RET_STUFF_LOOPS		16
5662306a36Sopenharmony_ci#define RET_DEPTH_INIT			0x8000000000000000ULL
5762306a36Sopenharmony_ci#define RET_DEPTH_INIT_FROM_CALL	0xfc00000000000000ULL
5862306a36Sopenharmony_ci#define RET_DEPTH_CREDIT		0xffffffffffffffffULL
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#ifdef CONFIG_CALL_THUNKS_DEBUG
6162306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_CALLS				\
6262306a36Sopenharmony_ci	incq	%gs:__x86_call_count;
6362306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_RETS				\
6462306a36Sopenharmony_ci	incq	%gs:__x86_ret_count;
6562306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_STUFFS				\
6662306a36Sopenharmony_ci	incq	%gs:__x86_stuffs_count;
6762306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_CTXSW				\
6862306a36Sopenharmony_ci	incq	%gs:__x86_ctxsw_count;
6962306a36Sopenharmony_ci#else
7062306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_CALLS
7162306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_RETS
7262306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_STUFFS
7362306a36Sopenharmony_ci# define CALL_THUNKS_DEBUG_INC_CTXSW
7462306a36Sopenharmony_ci#endif
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci#include <asm/asm-offsets.h>
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci#define CREDIT_CALL_DEPTH					\
8162306a36Sopenharmony_ci	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci#define ASM_CREDIT_CALL_DEPTH					\
8462306a36Sopenharmony_ci	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci#define RESET_CALL_DEPTH					\
8762306a36Sopenharmony_ci	xor	%eax, %eax;					\
8862306a36Sopenharmony_ci	bts	$63, %rax;					\
8962306a36Sopenharmony_ci	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci#define RESET_CALL_DEPTH_FROM_CALL				\
9262306a36Sopenharmony_ci	movb	$0xfc, %al;					\
9362306a36Sopenharmony_ci	shl	$56, %rax;					\
9462306a36Sopenharmony_ci	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
9562306a36Sopenharmony_ci	CALL_THUNKS_DEBUG_INC_CALLS
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci#define INCREMENT_CALL_DEPTH					\
9862306a36Sopenharmony_ci	sarq	$5, %gs:pcpu_hot + X86_call_depth;		\
9962306a36Sopenharmony_ci	CALL_THUNKS_DEBUG_INC_CALLS
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci#define ASM_INCREMENT_CALL_DEPTH				\
10262306a36Sopenharmony_ci	sarq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
10362306a36Sopenharmony_ci	CALL_THUNKS_DEBUG_INC_CALLS
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci#else
10662306a36Sopenharmony_ci#define CREDIT_CALL_DEPTH
10762306a36Sopenharmony_ci#define ASM_CREDIT_CALL_DEPTH
10862306a36Sopenharmony_ci#define RESET_CALL_DEPTH
10962306a36Sopenharmony_ci#define INCREMENT_CALL_DEPTH
11062306a36Sopenharmony_ci#define ASM_INCREMENT_CALL_DEPTH
11162306a36Sopenharmony_ci#define RESET_CALL_DEPTH_FROM_CALL
11262306a36Sopenharmony_ci#endif
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci/*
11562306a36Sopenharmony_ci * Fill the CPU return stack buffer.
11662306a36Sopenharmony_ci *
11762306a36Sopenharmony_ci * Each entry in the RSB, if used for a speculative 'ret', contains an
11862306a36Sopenharmony_ci * infinite 'pause; lfence; jmp' loop to capture speculative execution.
11962306a36Sopenharmony_ci *
12062306a36Sopenharmony_ci * This is required in various cases for retpoline and IBRS-based
12162306a36Sopenharmony_ci * mitigations for the Spectre variant 2 vulnerability. Sometimes to
12262306a36Sopenharmony_ci * eliminate potentially bogus entries from the RSB, and sometimes
12362306a36Sopenharmony_ci * purely to ensure that it doesn't get empty, which on some CPUs would
12462306a36Sopenharmony_ci * allow predictions from other (unwanted!) sources to be used.
12562306a36Sopenharmony_ci *
12662306a36Sopenharmony_ci * We define a CPP macro such that it can be used from both .S files and
12762306a36Sopenharmony_ci * inline assembly. It's possible to do a .macro and then include that
12862306a36Sopenharmony_ci * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
12962306a36Sopenharmony_ci */
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci#define RETPOLINE_THUNK_SIZE	32
13262306a36Sopenharmony_ci#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci/*
13562306a36Sopenharmony_ci * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
13662306a36Sopenharmony_ci */
13762306a36Sopenharmony_ci#define __FILL_RETURN_SLOT			\
13862306a36Sopenharmony_ci	ANNOTATE_INTRA_FUNCTION_CALL;		\
13962306a36Sopenharmony_ci	call	772f;				\
14062306a36Sopenharmony_ci	int3;					\
14162306a36Sopenharmony_ci772:
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci/*
14462306a36Sopenharmony_ci * Stuff the entire RSB.
14562306a36Sopenharmony_ci *
14662306a36Sopenharmony_ci * Google experimented with loop-unrolling and this turned out to be
14762306a36Sopenharmony_ci * the optimal version - two calls, each with their own speculation
14862306a36Sopenharmony_ci * trap should their return address end up getting used, in a loop.
14962306a36Sopenharmony_ci */
15062306a36Sopenharmony_ci#ifdef CONFIG_X86_64
15162306a36Sopenharmony_ci#define __FILL_RETURN_BUFFER(reg, nr)			\
15262306a36Sopenharmony_ci	mov	$(nr/2), reg;				\
15362306a36Sopenharmony_ci771:							\
15462306a36Sopenharmony_ci	__FILL_RETURN_SLOT				\
15562306a36Sopenharmony_ci	__FILL_RETURN_SLOT				\
15662306a36Sopenharmony_ci	add	$(BITS_PER_LONG/8) * 2, %_ASM_SP;	\
15762306a36Sopenharmony_ci	dec	reg;					\
15862306a36Sopenharmony_ci	jnz	771b;					\
15962306a36Sopenharmony_ci	/* barrier for jnz misprediction */		\
16062306a36Sopenharmony_ci	lfence;						\
16162306a36Sopenharmony_ci	ASM_CREDIT_CALL_DEPTH				\
16262306a36Sopenharmony_ci	CALL_THUNKS_DEBUG_INC_CTXSW
16362306a36Sopenharmony_ci#else
16462306a36Sopenharmony_ci/*
16562306a36Sopenharmony_ci * i386 doesn't unconditionally have LFENCE, as such it can't
16662306a36Sopenharmony_ci * do a loop.
16762306a36Sopenharmony_ci */
16862306a36Sopenharmony_ci#define __FILL_RETURN_BUFFER(reg, nr)			\
16962306a36Sopenharmony_ci	.rept nr;					\
17062306a36Sopenharmony_ci	__FILL_RETURN_SLOT;				\
17162306a36Sopenharmony_ci	.endr;						\
17262306a36Sopenharmony_ci	add	$(BITS_PER_LONG/8) * nr, %_ASM_SP;
17362306a36Sopenharmony_ci#endif
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci/*
17662306a36Sopenharmony_ci * Stuff a single RSB slot.
17762306a36Sopenharmony_ci *
17862306a36Sopenharmony_ci * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
17962306a36Sopenharmony_ci * forced to retire before letting a RET instruction execute.
18062306a36Sopenharmony_ci *
18162306a36Sopenharmony_ci * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
18262306a36Sopenharmony_ci * before this point.
18362306a36Sopenharmony_ci */
18462306a36Sopenharmony_ci#define __FILL_ONE_RETURN				\
18562306a36Sopenharmony_ci	__FILL_RETURN_SLOT				\
18662306a36Sopenharmony_ci	add	$(BITS_PER_LONG/8), %_ASM_SP;		\
18762306a36Sopenharmony_ci	lfence;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci#ifdef __ASSEMBLY__
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci/*
19262306a36Sopenharmony_ci * This should be used immediately before an indirect jump/call. It tells
19362306a36Sopenharmony_ci * objtool the subsequent indirect jump/call is vouched safe for retpoline
19462306a36Sopenharmony_ci * builds.
19562306a36Sopenharmony_ci */
19662306a36Sopenharmony_ci.macro ANNOTATE_RETPOLINE_SAFE
19762306a36Sopenharmony_ci.Lhere_\@:
19862306a36Sopenharmony_ci	.pushsection .discard.retpoline_safe
19962306a36Sopenharmony_ci	.long .Lhere_\@
20062306a36Sopenharmony_ci	.popsection
20162306a36Sopenharmony_ci.endm
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci/*
20462306a36Sopenharmony_ci * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
20562306a36Sopenharmony_ci * vs RETBleed validation.
20662306a36Sopenharmony_ci */
20762306a36Sopenharmony_ci#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci/*
21062306a36Sopenharmony_ci * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
21162306a36Sopenharmony_ci * eventually turn into it's own annotation.
21262306a36Sopenharmony_ci */
21362306a36Sopenharmony_ci.macro VALIDATE_UNRET_END
21462306a36Sopenharmony_ci#if defined(CONFIG_NOINSTR_VALIDATION) && \
21562306a36Sopenharmony_ci	(defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
21662306a36Sopenharmony_ci	ANNOTATE_RETPOLINE_SAFE
21762306a36Sopenharmony_ci	nop
21862306a36Sopenharmony_ci#endif
21962306a36Sopenharmony_ci.endm
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci/*
22262306a36Sopenharmony_ci * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
22362306a36Sopenharmony_ci * to the retpoline thunk with a CS prefix when the register requires
22462306a36Sopenharmony_ci * a RAX prefix byte to encode. Also see apply_retpolines().
22562306a36Sopenharmony_ci */
22662306a36Sopenharmony_ci.macro __CS_PREFIX reg:req
22762306a36Sopenharmony_ci	.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
22862306a36Sopenharmony_ci	.ifc \reg,\rs
22962306a36Sopenharmony_ci	.byte 0x2e
23062306a36Sopenharmony_ci	.endif
23162306a36Sopenharmony_ci	.endr
23262306a36Sopenharmony_ci.endm
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci/*
23562306a36Sopenharmony_ci * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
23662306a36Sopenharmony_ci * indirect jmp/call which may be susceptible to the Spectre variant 2
23762306a36Sopenharmony_ci * attack.
23862306a36Sopenharmony_ci *
23962306a36Sopenharmony_ci * NOTE: these do not take kCFI into account and are thus not comparable to C
24062306a36Sopenharmony_ci * indirect calls, take care when using. The target of these should be an ENDBR
24162306a36Sopenharmony_ci * instruction irrespective of kCFI.
24262306a36Sopenharmony_ci */
24362306a36Sopenharmony_ci.macro JMP_NOSPEC reg:req
24462306a36Sopenharmony_ci#ifdef CONFIG_RETPOLINE
24562306a36Sopenharmony_ci	__CS_PREFIX \reg
24662306a36Sopenharmony_ci	jmp	__x86_indirect_thunk_\reg
24762306a36Sopenharmony_ci#else
24862306a36Sopenharmony_ci	jmp	*%\reg
24962306a36Sopenharmony_ci	int3
25062306a36Sopenharmony_ci#endif
25162306a36Sopenharmony_ci.endm
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci.macro CALL_NOSPEC reg:req
25462306a36Sopenharmony_ci#ifdef CONFIG_RETPOLINE
25562306a36Sopenharmony_ci	__CS_PREFIX \reg
25662306a36Sopenharmony_ci	call	__x86_indirect_thunk_\reg
25762306a36Sopenharmony_ci#else
25862306a36Sopenharmony_ci	call	*%\reg
25962306a36Sopenharmony_ci#endif
26062306a36Sopenharmony_ci.endm
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci /*
26362306a36Sopenharmony_ci  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
26462306a36Sopenharmony_ci  * monstrosity above, manually.
26562306a36Sopenharmony_ci  */
26662306a36Sopenharmony_ci.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
26762306a36Sopenharmony_ci	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
26862306a36Sopenharmony_ci		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
26962306a36Sopenharmony_ci		__stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci.Lskip_rsb_\@:
27262306a36Sopenharmony_ci.endm
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
27562306a36Sopenharmony_ci#define CALL_UNTRAIN_RET	"call entry_untrain_ret"
27662306a36Sopenharmony_ci#else
27762306a36Sopenharmony_ci#define CALL_UNTRAIN_RET	""
27862306a36Sopenharmony_ci#endif
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci/*
28162306a36Sopenharmony_ci * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
28262306a36Sopenharmony_ci * return thunk isn't mapped into the userspace tables (then again, AMD
28362306a36Sopenharmony_ci * typically has NO_MELTDOWN).
28462306a36Sopenharmony_ci *
28562306a36Sopenharmony_ci * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
28662306a36Sopenharmony_ci * entry_ibpb() will clobber AX, CX, DX.
28762306a36Sopenharmony_ci *
28862306a36Sopenharmony_ci * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
28962306a36Sopenharmony_ci * where we have a stack but before any RET instruction.
29062306a36Sopenharmony_ci */
29162306a36Sopenharmony_ci.macro UNTRAIN_RET
29262306a36Sopenharmony_ci#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
29362306a36Sopenharmony_ci	defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
29462306a36Sopenharmony_ci	VALIDATE_UNRET_END
29562306a36Sopenharmony_ci	ALTERNATIVE_3 "",						\
29662306a36Sopenharmony_ci		      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,		\
29762306a36Sopenharmony_ci		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,	\
29862306a36Sopenharmony_ci		      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
29962306a36Sopenharmony_ci#endif
30062306a36Sopenharmony_ci.endm
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci.macro UNTRAIN_RET_VM
30362306a36Sopenharmony_ci#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
30462306a36Sopenharmony_ci	defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
30562306a36Sopenharmony_ci	VALIDATE_UNRET_END
30662306a36Sopenharmony_ci	ALTERNATIVE_3 "",						\
30762306a36Sopenharmony_ci		      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,		\
30862306a36Sopenharmony_ci		      "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT,	\
30962306a36Sopenharmony_ci		      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
31062306a36Sopenharmony_ci#endif
31162306a36Sopenharmony_ci.endm
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci.macro UNTRAIN_RET_FROM_CALL
31462306a36Sopenharmony_ci#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
31562306a36Sopenharmony_ci	defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
31662306a36Sopenharmony_ci	VALIDATE_UNRET_END
31762306a36Sopenharmony_ci	ALTERNATIVE_3 "",						\
31862306a36Sopenharmony_ci		      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,		\
31962306a36Sopenharmony_ci		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,	\
32062306a36Sopenharmony_ci		      __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
32162306a36Sopenharmony_ci#endif
32262306a36Sopenharmony_ci.endm
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci.macro CALL_DEPTH_ACCOUNT
32662306a36Sopenharmony_ci#ifdef CONFIG_CALL_DEPTH_TRACKING
32762306a36Sopenharmony_ci	ALTERNATIVE "",							\
32862306a36Sopenharmony_ci		    __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
32962306a36Sopenharmony_ci#endif
33062306a36Sopenharmony_ci.endm
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci/*
33362306a36Sopenharmony_ci * Macro to execute VERW instruction that mitigate transient data sampling
33462306a36Sopenharmony_ci * attacks such as MDS. On affected systems a microcode update overloaded VERW
33562306a36Sopenharmony_ci * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
33662306a36Sopenharmony_ci *
33762306a36Sopenharmony_ci * Note: Only the memory operand variant of VERW clears the CPU buffers.
33862306a36Sopenharmony_ci */
33962306a36Sopenharmony_ci.macro CLEAR_CPU_BUFFERS
34062306a36Sopenharmony_ci	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
34162306a36Sopenharmony_ci.endm
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci#else /* __ASSEMBLY__ */
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci#define ANNOTATE_RETPOLINE_SAFE					\
34662306a36Sopenharmony_ci	"999:\n\t"						\
34762306a36Sopenharmony_ci	".pushsection .discard.retpoline_safe\n\t"		\
34862306a36Sopenharmony_ci	".long 999b\n\t"					\
34962306a36Sopenharmony_ci	".popsection\n\t"
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_citypedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
35262306a36Sopenharmony_ciextern retpoline_thunk_t __x86_indirect_thunk_array[];
35362306a36Sopenharmony_ciextern retpoline_thunk_t __x86_indirect_call_thunk_array[];
35462306a36Sopenharmony_ciextern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci#ifdef CONFIG_RETHUNK
35762306a36Sopenharmony_ciextern void __x86_return_thunk(void);
35862306a36Sopenharmony_ci#else
35962306a36Sopenharmony_cistatic inline void __x86_return_thunk(void) {}
36062306a36Sopenharmony_ci#endif
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ciextern void retbleed_return_thunk(void);
36362306a36Sopenharmony_ciextern void srso_return_thunk(void);
36462306a36Sopenharmony_ciextern void srso_alias_return_thunk(void);
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ciextern void retbleed_untrain_ret(void);
36762306a36Sopenharmony_ciextern void srso_untrain_ret(void);
36862306a36Sopenharmony_ciextern void srso_alias_untrain_ret(void);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ciextern void entry_untrain_ret(void);
37162306a36Sopenharmony_ciextern void entry_ibpb(void);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ciextern void (*x86_return_thunk)(void);
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci#ifdef CONFIG_CALL_DEPTH_TRACKING
37662306a36Sopenharmony_ciextern void __x86_return_skl(void);
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_cistatic inline void x86_set_skl_return_thunk(void)
37962306a36Sopenharmony_ci{
38062306a36Sopenharmony_ci	x86_return_thunk = &__x86_return_skl;
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci#define CALL_DEPTH_ACCOUNT					\
38462306a36Sopenharmony_ci	ALTERNATIVE("",						\
38562306a36Sopenharmony_ci		    __stringify(INCREMENT_CALL_DEPTH),		\
38662306a36Sopenharmony_ci		    X86_FEATURE_CALL_DEPTH)
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci#ifdef CONFIG_CALL_THUNKS_DEBUG
38962306a36Sopenharmony_ciDECLARE_PER_CPU(u64, __x86_call_count);
39062306a36Sopenharmony_ciDECLARE_PER_CPU(u64, __x86_ret_count);
39162306a36Sopenharmony_ciDECLARE_PER_CPU(u64, __x86_stuffs_count);
39262306a36Sopenharmony_ciDECLARE_PER_CPU(u64, __x86_ctxsw_count);
39362306a36Sopenharmony_ci#endif
39462306a36Sopenharmony_ci#else
39562306a36Sopenharmony_cistatic inline void x86_set_skl_return_thunk(void) {}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci#define CALL_DEPTH_ACCOUNT ""
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci#endif
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci#ifdef CONFIG_RETPOLINE
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci#define GEN(reg) \
40462306a36Sopenharmony_ci	extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
40562306a36Sopenharmony_ci#include <asm/GEN-for-each-reg.h>
40662306a36Sopenharmony_ci#undef GEN
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci#define GEN(reg)						\
40962306a36Sopenharmony_ci	extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
41062306a36Sopenharmony_ci#include <asm/GEN-for-each-reg.h>
41162306a36Sopenharmony_ci#undef GEN
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci#define GEN(reg)						\
41462306a36Sopenharmony_ci	extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
41562306a36Sopenharmony_ci#include <asm/GEN-for-each-reg.h>
41662306a36Sopenharmony_ci#undef GEN
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci#ifdef CONFIG_X86_64
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci/*
42162306a36Sopenharmony_ci * Inline asm uses the %V modifier which is only in newer GCC
42262306a36Sopenharmony_ci * which is ensured when CONFIG_RETPOLINE is defined.
42362306a36Sopenharmony_ci */
42462306a36Sopenharmony_ci# define CALL_NOSPEC						\
42562306a36Sopenharmony_ci	ALTERNATIVE_2(						\
42662306a36Sopenharmony_ci	ANNOTATE_RETPOLINE_SAFE					\
42762306a36Sopenharmony_ci	"call *%[thunk_target]\n",				\
42862306a36Sopenharmony_ci	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
42962306a36Sopenharmony_ci	X86_FEATURE_RETPOLINE,					\
43062306a36Sopenharmony_ci	"lfence;\n"						\
43162306a36Sopenharmony_ci	ANNOTATE_RETPOLINE_SAFE					\
43262306a36Sopenharmony_ci	"call *%[thunk_target]\n",				\
43362306a36Sopenharmony_ci	X86_FEATURE_RETPOLINE_LFENCE)
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci#else /* CONFIG_X86_32 */
43862306a36Sopenharmony_ci/*
43962306a36Sopenharmony_ci * For i386 we use the original ret-equivalent retpoline, because
44062306a36Sopenharmony_ci * otherwise we'll run out of registers. We don't care about CET
44162306a36Sopenharmony_ci * here, anyway.
44262306a36Sopenharmony_ci */
44362306a36Sopenharmony_ci# define CALL_NOSPEC						\
44462306a36Sopenharmony_ci	ALTERNATIVE_2(						\
44562306a36Sopenharmony_ci	ANNOTATE_RETPOLINE_SAFE					\
44662306a36Sopenharmony_ci	"call *%[thunk_target]\n",				\
44762306a36Sopenharmony_ci	"       jmp    904f;\n"					\
44862306a36Sopenharmony_ci	"       .align 16\n"					\
44962306a36Sopenharmony_ci	"901:	call   903f;\n"					\
45062306a36Sopenharmony_ci	"902:	pause;\n"					\
45162306a36Sopenharmony_ci	"    	lfence;\n"					\
45262306a36Sopenharmony_ci	"       jmp    902b;\n"					\
45362306a36Sopenharmony_ci	"       .align 16\n"					\
45462306a36Sopenharmony_ci	"903:	lea    4(%%esp), %%esp;\n"			\
45562306a36Sopenharmony_ci	"       pushl  %[thunk_target];\n"			\
45662306a36Sopenharmony_ci	"       ret;\n"						\
45762306a36Sopenharmony_ci	"       .align 16\n"					\
45862306a36Sopenharmony_ci	"904:	call   901b;\n",				\
45962306a36Sopenharmony_ci	X86_FEATURE_RETPOLINE,					\
46062306a36Sopenharmony_ci	"lfence;\n"						\
46162306a36Sopenharmony_ci	ANNOTATE_RETPOLINE_SAFE					\
46262306a36Sopenharmony_ci	"call *%[thunk_target]\n",				\
46362306a36Sopenharmony_ci	X86_FEATURE_RETPOLINE_LFENCE)
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
46662306a36Sopenharmony_ci#endif
46762306a36Sopenharmony_ci#else /* No retpoline for C / inline asm */
46862306a36Sopenharmony_ci# define CALL_NOSPEC "call *%[thunk_target]\n"
46962306a36Sopenharmony_ci# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
47062306a36Sopenharmony_ci#endif
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci/* The Spectre V2 mitigation variants */
47362306a36Sopenharmony_cienum spectre_v2_mitigation {
47462306a36Sopenharmony_ci	SPECTRE_V2_NONE,
47562306a36Sopenharmony_ci	SPECTRE_V2_RETPOLINE,
47662306a36Sopenharmony_ci	SPECTRE_V2_LFENCE,
47762306a36Sopenharmony_ci	SPECTRE_V2_EIBRS,
47862306a36Sopenharmony_ci	SPECTRE_V2_EIBRS_RETPOLINE,
47962306a36Sopenharmony_ci	SPECTRE_V2_EIBRS_LFENCE,
48062306a36Sopenharmony_ci	SPECTRE_V2_IBRS,
48162306a36Sopenharmony_ci};
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci/* The indirect branch speculation control variants */
48462306a36Sopenharmony_cienum spectre_v2_user_mitigation {
48562306a36Sopenharmony_ci	SPECTRE_V2_USER_NONE,
48662306a36Sopenharmony_ci	SPECTRE_V2_USER_STRICT,
48762306a36Sopenharmony_ci	SPECTRE_V2_USER_STRICT_PREFERRED,
48862306a36Sopenharmony_ci	SPECTRE_V2_USER_PRCTL,
48962306a36Sopenharmony_ci	SPECTRE_V2_USER_SECCOMP,
49062306a36Sopenharmony_ci};
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci/* The Speculative Store Bypass disable variants */
49362306a36Sopenharmony_cienum ssb_mitigation {
49462306a36Sopenharmony_ci	SPEC_STORE_BYPASS_NONE,
49562306a36Sopenharmony_ci	SPEC_STORE_BYPASS_DISABLE,
49662306a36Sopenharmony_ci	SPEC_STORE_BYPASS_PRCTL,
49762306a36Sopenharmony_ci	SPEC_STORE_BYPASS_SECCOMP,
49862306a36Sopenharmony_ci};
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_cistatic __always_inline
50162306a36Sopenharmony_civoid alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
50462306a36Sopenharmony_ci		: : "c" (msr),
50562306a36Sopenharmony_ci		    "a" ((u32)val),
50662306a36Sopenharmony_ci		    "d" ((u32)(val >> 32)),
50762306a36Sopenharmony_ci		    [feature] "i" (feature)
50862306a36Sopenharmony_ci		: "memory");
50962306a36Sopenharmony_ci}
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ciextern u64 x86_pred_cmd;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_cistatic inline void indirect_branch_prediction_barrier(void)
51462306a36Sopenharmony_ci{
51562306a36Sopenharmony_ci	alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
51662306a36Sopenharmony_ci}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci/* The Intel SPEC CTRL MSR base value cache */
51962306a36Sopenharmony_ciextern u64 x86_spec_ctrl_base;
52062306a36Sopenharmony_ciDECLARE_PER_CPU(u64, x86_spec_ctrl_current);
52162306a36Sopenharmony_ciextern void update_spec_ctrl_cond(u64 val);
52262306a36Sopenharmony_ciextern u64 spec_ctrl_current(void);
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci/*
52562306a36Sopenharmony_ci * With retpoline, we must use IBRS to restrict branch prediction
52662306a36Sopenharmony_ci * before calling into firmware.
52762306a36Sopenharmony_ci *
52862306a36Sopenharmony_ci * (Implemented as CPP macros due to header hell.)
52962306a36Sopenharmony_ci */
53062306a36Sopenharmony_ci#define firmware_restrict_branch_speculation_start()			\
53162306a36Sopenharmony_cido {									\
53262306a36Sopenharmony_ci	preempt_disable();						\
53362306a36Sopenharmony_ci	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
53462306a36Sopenharmony_ci			      spec_ctrl_current() | SPEC_CTRL_IBRS,	\
53562306a36Sopenharmony_ci			      X86_FEATURE_USE_IBRS_FW);			\
53662306a36Sopenharmony_ci	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,		\
53762306a36Sopenharmony_ci			      X86_FEATURE_USE_IBPB_FW);			\
53862306a36Sopenharmony_ci} while (0)
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci#define firmware_restrict_branch_speculation_end()			\
54162306a36Sopenharmony_cido {									\
54262306a36Sopenharmony_ci	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
54362306a36Sopenharmony_ci			      spec_ctrl_current(),			\
54462306a36Sopenharmony_ci			      X86_FEATURE_USE_IBRS_FW);			\
54562306a36Sopenharmony_ci	preempt_enable();						\
54662306a36Sopenharmony_ci} while (0)
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ciDECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
54962306a36Sopenharmony_ciDECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
55062306a36Sopenharmony_ciDECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ciDECLARE_STATIC_KEY_FALSE(mds_idle_clear);
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ciDECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ciDECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ciextern u16 mds_verw_sel;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci#include <asm/segment.h>
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci/**
56362306a36Sopenharmony_ci * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
56462306a36Sopenharmony_ci *
56562306a36Sopenharmony_ci * This uses the otherwise unused and obsolete VERW instruction in
56662306a36Sopenharmony_ci * combination with microcode which triggers a CPU buffer flush when the
56762306a36Sopenharmony_ci * instruction is executed.
56862306a36Sopenharmony_ci */
56962306a36Sopenharmony_cistatic __always_inline void mds_clear_cpu_buffers(void)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	static const u16 ds = __KERNEL_DS;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	/*
57462306a36Sopenharmony_ci	 * Has to be the memory-operand variant because only that
57562306a36Sopenharmony_ci	 * guarantees the CPU buffer flush functionality according to
57662306a36Sopenharmony_ci	 * documentation. The register-operand variant does not.
57762306a36Sopenharmony_ci	 * Works with any segment selector, but a valid writable
57862306a36Sopenharmony_ci	 * data segment is the fastest variant.
57962306a36Sopenharmony_ci	 *
58062306a36Sopenharmony_ci	 * "cc" clobber is required because VERW modifies ZF.
58162306a36Sopenharmony_ci	 */
58262306a36Sopenharmony_ci	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
58362306a36Sopenharmony_ci}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci/**
58662306a36Sopenharmony_ci * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
58762306a36Sopenharmony_ci *
58862306a36Sopenharmony_ci * Clear CPU buffers if the corresponding static key is enabled
58962306a36Sopenharmony_ci */
59062306a36Sopenharmony_cistatic __always_inline void mds_idle_clear_cpu_buffers(void)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	if (static_branch_likely(&mds_idle_clear))
59362306a36Sopenharmony_ci		mds_clear_cpu_buffers();
59462306a36Sopenharmony_ci}
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci#endif /* __ASSEMBLY__ */
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
599