xref: /kernel/linux/linux-5.10/arch/x86/lib/retpoline.S (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci#include <linux/stringify.h>
48c2ecf20Sopenharmony_ci#include <linux/linkage.h>
58c2ecf20Sopenharmony_ci#include <asm/dwarf2.h>
68c2ecf20Sopenharmony_ci#include <asm/cpufeatures.h>
78c2ecf20Sopenharmony_ci#include <asm/alternative.h>
88c2ecf20Sopenharmony_ci#include <asm/export.h>
98c2ecf20Sopenharmony_ci#include <asm/nospec-branch.h>
108c2ecf20Sopenharmony_ci#include <asm/unwind_hints.h>
118c2ecf20Sopenharmony_ci#include <asm/frame.h>
128c2ecf20Sopenharmony_ci#include <asm/nops.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci	.section .text..__x86.indirect_thunk
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci.macro RETPOLINE reg
178c2ecf20Sopenharmony_ci	ANNOTATE_INTRA_FUNCTION_CALL
188c2ecf20Sopenharmony_ci	call    .Ldo_rop_\@
198c2ecf20Sopenharmony_ci.Lspec_trap_\@:
208c2ecf20Sopenharmony_ci	UNWIND_HINT_EMPTY
218c2ecf20Sopenharmony_ci	pause
228c2ecf20Sopenharmony_ci	lfence
238c2ecf20Sopenharmony_ci	jmp .Lspec_trap_\@
248c2ecf20Sopenharmony_ci.Ldo_rop_\@:
258c2ecf20Sopenharmony_ci	mov     %\reg, (%_ASM_SP)
268c2ecf20Sopenharmony_ci	UNWIND_HINT_FUNC
278c2ecf20Sopenharmony_ci	RET
288c2ecf20Sopenharmony_ci.endm
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci.macro THUNK reg
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	.align RETPOLINE_THUNK_SIZE
338c2ecf20Sopenharmony_ciSYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
348c2ecf20Sopenharmony_ci	UNWIND_HINT_EMPTY
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
378c2ecf20Sopenharmony_ci		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
388c2ecf20Sopenharmony_ci		      __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci.endm
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci/*
438c2ecf20Sopenharmony_ci * Despite being an assembler file we can't just use .irp here
448c2ecf20Sopenharmony_ci * because __KSYM_DEPS__ only uses the C preprocessor and would
458c2ecf20Sopenharmony_ci * only see one instance of "__x86_indirect_thunk_\reg" rather
468c2ecf20Sopenharmony_ci * than one per register with the correct names. So we do it
478c2ecf20Sopenharmony_ci * the simple and nasty way...
488c2ecf20Sopenharmony_ci *
498c2ecf20Sopenharmony_ci * Worse, you can only have a single EXPORT_SYMBOL per line,
508c2ecf20Sopenharmony_ci * and CPP can't insert newlines, so we have to repeat everything
518c2ecf20Sopenharmony_ci * at least twice.
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
558c2ecf20Sopenharmony_ci#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	.align RETPOLINE_THUNK_SIZE
588c2ecf20Sopenharmony_ciSYM_CODE_START(__x86_indirect_thunk_array)
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci#define GEN(reg) THUNK reg
618c2ecf20Sopenharmony_ci#include <asm/GEN-for-each-reg.h>
628c2ecf20Sopenharmony_ci#undef GEN
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	.align RETPOLINE_THUNK_SIZE
658c2ecf20Sopenharmony_ciSYM_CODE_END(__x86_indirect_thunk_array)
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci#define GEN(reg) EXPORT_THUNK(reg)
688c2ecf20Sopenharmony_ci#include <asm/GEN-for-each-reg.h>
698c2ecf20Sopenharmony_ci#undef GEN
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci/*
728c2ecf20Sopenharmony_ci * This function name is magical and is used by -mfunction-return=thunk-extern
738c2ecf20Sopenharmony_ci * for the compiler to generate JMPs to it.
748c2ecf20Sopenharmony_ci */
758c2ecf20Sopenharmony_ci#ifdef CONFIG_RETHUNK
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci/*
788c2ecf20Sopenharmony_ci * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
798c2ecf20Sopenharmony_ci * special addresses:
808c2ecf20Sopenharmony_ci *
818c2ecf20Sopenharmony_ci * - srso_alias_untrain_ret() is 2M aligned
828c2ecf20Sopenharmony_ci * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
838c2ecf20Sopenharmony_ci * and 20 in its virtual address are set (while those bits in the
848c2ecf20Sopenharmony_ci * srso_alias_untrain_ret() function are cleared).
858c2ecf20Sopenharmony_ci *
868c2ecf20Sopenharmony_ci * This guarantees that those two addresses will alias in the branch
878c2ecf20Sopenharmony_ci * target buffer of Zen3/4 generations, leading to any potential
888c2ecf20Sopenharmony_ci * poisoned entries at that BTB slot to get evicted.
898c2ecf20Sopenharmony_ci *
908c2ecf20Sopenharmony_ci * As a result, srso_alias_safe_ret() becomes a safe return.
918c2ecf20Sopenharmony_ci */
928c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_SRSO
938c2ecf20Sopenharmony_ci	.section .text..__x86.rethunk_untrain
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ciSYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
968c2ecf20Sopenharmony_ci	UNWIND_HINT_FUNC
978c2ecf20Sopenharmony_ci	ASM_NOP2
988c2ecf20Sopenharmony_ci	lfence
998c2ecf20Sopenharmony_ci	jmp srso_alias_return_thunk
1008c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_alias_untrain_ret)
1018c2ecf20Sopenharmony_ci__EXPORT_THUNK(srso_alias_untrain_ret)
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	.section .text..__x86.rethunk_safe
1048c2ecf20Sopenharmony_ci#else
1058c2ecf20Sopenharmony_ci/* dummy definition for alternatives */
1068c2ecf20Sopenharmony_ciSYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
1078c2ecf20Sopenharmony_ci	ANNOTATE_UNRET_SAFE
1088c2ecf20Sopenharmony_ci	ret
1098c2ecf20Sopenharmony_ci	int3
1108c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_alias_untrain_ret)
1118c2ecf20Sopenharmony_ci#endif
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ciSYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
1148c2ecf20Sopenharmony_ci	lea 8(%_ASM_SP), %_ASM_SP
1158c2ecf20Sopenharmony_ci	UNWIND_HINT_FUNC
1168c2ecf20Sopenharmony_ci	ANNOTATE_UNRET_SAFE
1178c2ecf20Sopenharmony_ci	ret
1188c2ecf20Sopenharmony_ci	int3
1198c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_alias_safe_ret)
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	.section .text..__x86.return_thunk
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ciSYM_CODE_START(srso_alias_return_thunk)
1248c2ecf20Sopenharmony_ci	UNWIND_HINT_FUNC
1258c2ecf20Sopenharmony_ci	ANNOTATE_NOENDBR
1268c2ecf20Sopenharmony_ci	call srso_alias_safe_ret
1278c2ecf20Sopenharmony_ci	ud2
1288c2ecf20Sopenharmony_ciSYM_CODE_END(srso_alias_return_thunk)
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci/*
1318c2ecf20Sopenharmony_ci * Some generic notes on the untraining sequences:
1328c2ecf20Sopenharmony_ci *
1338c2ecf20Sopenharmony_ci * They are interchangeable when it comes to flushing potentially wrong
1348c2ecf20Sopenharmony_ci * RET predictions from the BTB.
1358c2ecf20Sopenharmony_ci *
1368c2ecf20Sopenharmony_ci * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
1378c2ecf20Sopenharmony_ci * Retbleed sequence because the return sequence done there
1388c2ecf20Sopenharmony_ci * (srso_safe_ret()) is longer and the return sequence must fully nest
1398c2ecf20Sopenharmony_ci * (end before) the untraining sequence. Therefore, the untraining
1408c2ecf20Sopenharmony_ci * sequence must fully overlap the return sequence.
1418c2ecf20Sopenharmony_ci *
1428c2ecf20Sopenharmony_ci * Regarding alignment - the instructions which need to be untrained,
1438c2ecf20Sopenharmony_ci * must all start at a cacheline boundary for Zen1/2 generations. That
1448c2ecf20Sopenharmony_ci * is, instruction sequences starting at srso_safe_ret() and
1458c2ecf20Sopenharmony_ci * the respective instruction sequences at retbleed_return_thunk()
1468c2ecf20Sopenharmony_ci * must start at a cacheline boundary.
1478c2ecf20Sopenharmony_ci */
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci/*
1508c2ecf20Sopenharmony_ci * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
1518c2ecf20Sopenharmony_ci * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
1528c2ecf20Sopenharmony_ci *    alignment within the BTB.
1538c2ecf20Sopenharmony_ci * 2) The instruction at retbleed_untrain_ret must contain, and not
1548c2ecf20Sopenharmony_ci *    end with, the 0xc3 byte of the RET.
1558c2ecf20Sopenharmony_ci * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
1568c2ecf20Sopenharmony_ci *    from re-poisioning the BTB prediction.
1578c2ecf20Sopenharmony_ci */
1588c2ecf20Sopenharmony_ci	.align 64
1598c2ecf20Sopenharmony_ci	.skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
1608c2ecf20Sopenharmony_ciSYM_FUNC_START_NOALIGN(retbleed_untrain_ret);
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	/*
1638c2ecf20Sopenharmony_ci	 * As executed from retbleed_untrain_ret, this is:
1648c2ecf20Sopenharmony_ci	 *
1658c2ecf20Sopenharmony_ci	 *   TEST $0xcc, %bl
1668c2ecf20Sopenharmony_ci	 *   LFENCE
1678c2ecf20Sopenharmony_ci	 *   JMP retbleed_return_thunk
1688c2ecf20Sopenharmony_ci	 *
1698c2ecf20Sopenharmony_ci	 * Executing the TEST instruction has a side effect of evicting any BTB
1708c2ecf20Sopenharmony_ci	 * prediction (potentially attacker controlled) attached to the RET, as
1718c2ecf20Sopenharmony_ci	 * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
1728c2ecf20Sopenharmony_ci	 */
1738c2ecf20Sopenharmony_ci	.byte	0xf6
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	/*
1768c2ecf20Sopenharmony_ci	 * As executed from retbleed_return_thunk, this is a plain RET.
1778c2ecf20Sopenharmony_ci	 *
1788c2ecf20Sopenharmony_ci	 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
1798c2ecf20Sopenharmony_ci	 *
1808c2ecf20Sopenharmony_ci	 * We subsequently jump backwards and architecturally execute the RET.
1818c2ecf20Sopenharmony_ci	 * This creates a correct BTB prediction (type=ret), but in the
1828c2ecf20Sopenharmony_ci	 * meantime we suffer Straight Line Speculation (because the type was
1838c2ecf20Sopenharmony_ci	 * no branch) which is halted by the INT3.
1848c2ecf20Sopenharmony_ci	 *
1858c2ecf20Sopenharmony_ci	 * With SMT enabled and STIBP active, a sibling thread cannot poison
1868c2ecf20Sopenharmony_ci	 * RET's prediction to a type of its choice, but can evict the
1878c2ecf20Sopenharmony_ci	 * prediction due to competitive sharing. If the prediction is
1888c2ecf20Sopenharmony_ci	 * evicted, retbleed_return_thunk will suffer Straight Line Speculation
1898c2ecf20Sopenharmony_ci	 * which will be contained safely by the INT3.
1908c2ecf20Sopenharmony_ci	 */
1918c2ecf20Sopenharmony_ciSYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
1928c2ecf20Sopenharmony_ci	ret
1938c2ecf20Sopenharmony_ci	int3
1948c2ecf20Sopenharmony_ciSYM_CODE_END(retbleed_return_thunk)
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	/*
1978c2ecf20Sopenharmony_ci	 * Ensure the TEST decoding / BTB invalidation is complete.
1988c2ecf20Sopenharmony_ci	 */
1998c2ecf20Sopenharmony_ci	lfence
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	/*
2028c2ecf20Sopenharmony_ci	 * Jump back and execute the RET in the middle of the TEST instruction.
2038c2ecf20Sopenharmony_ci	 * INT3 is for SLS protection.
2048c2ecf20Sopenharmony_ci	 */
2058c2ecf20Sopenharmony_ci	jmp retbleed_return_thunk
2068c2ecf20Sopenharmony_ci	int3
2078c2ecf20Sopenharmony_ciSYM_FUNC_END(retbleed_untrain_ret)
2088c2ecf20Sopenharmony_ci__EXPORT_THUNK(retbleed_untrain_ret)
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci/*
2118c2ecf20Sopenharmony_ci * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
2128c2ecf20Sopenharmony_ci * above. On kernel entry, srso_untrain_ret() is executed which is a
2138c2ecf20Sopenharmony_ci *
2148c2ecf20Sopenharmony_ci * movabs $0xccccc30824648d48,%rax
2158c2ecf20Sopenharmony_ci *
2168c2ecf20Sopenharmony_ci * and when the return thunk executes the inner label srso_safe_ret()
2178c2ecf20Sopenharmony_ci * later, it is a stack manipulation and a RET which is mispredicted and
2188c2ecf20Sopenharmony_ci * thus a "safe" one to use.
2198c2ecf20Sopenharmony_ci */
2208c2ecf20Sopenharmony_ci	.align 64
2218c2ecf20Sopenharmony_ci	.skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
2228c2ecf20Sopenharmony_ciSYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
2238c2ecf20Sopenharmony_ci	.byte 0x48, 0xb8
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci/*
2268c2ecf20Sopenharmony_ci * This forces the function return instruction to speculate into a trap
2278c2ecf20Sopenharmony_ci * (UD2 in srso_return_thunk() below).  This RET will then mispredict
2288c2ecf20Sopenharmony_ci * and execution will continue at the return site read from the top of
2298c2ecf20Sopenharmony_ci * the stack.
2308c2ecf20Sopenharmony_ci */
2318c2ecf20Sopenharmony_ciSYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
2328c2ecf20Sopenharmony_ci	lea 8(%_ASM_SP), %_ASM_SP
2338c2ecf20Sopenharmony_ci	ret
2348c2ecf20Sopenharmony_ci	int3
2358c2ecf20Sopenharmony_ci	int3
2368c2ecf20Sopenharmony_ci	/* end of movabs */
2378c2ecf20Sopenharmony_ci	lfence
2388c2ecf20Sopenharmony_ci	call srso_safe_ret
2398c2ecf20Sopenharmony_ci	ud2
2408c2ecf20Sopenharmony_ciSYM_CODE_END(srso_safe_ret)
2418c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_untrain_ret)
2428c2ecf20Sopenharmony_ci__EXPORT_THUNK(srso_untrain_ret)
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ciSYM_CODE_START(srso_return_thunk)
2458c2ecf20Sopenharmony_ci	UNWIND_HINT_FUNC
2468c2ecf20Sopenharmony_ci	ANNOTATE_NOENDBR
2478c2ecf20Sopenharmony_ci	call srso_safe_ret
2488c2ecf20Sopenharmony_ci	ud2
2498c2ecf20Sopenharmony_ciSYM_CODE_END(srso_return_thunk)
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ciSYM_FUNC_START(entry_untrain_ret)
2528c2ecf20Sopenharmony_ci	ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
2538c2ecf20Sopenharmony_ci		      "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
2548c2ecf20Sopenharmony_ci		      "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
2558c2ecf20Sopenharmony_ciSYM_FUNC_END(entry_untrain_ret)
2568c2ecf20Sopenharmony_ci__EXPORT_THUNK(entry_untrain_ret)
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ciSYM_CODE_START(__x86_return_thunk)
2598c2ecf20Sopenharmony_ci	UNWIND_HINT_FUNC
2608c2ecf20Sopenharmony_ci	ANNOTATE_NOENDBR
2618c2ecf20Sopenharmony_ci	ANNOTATE_UNRET_SAFE
2628c2ecf20Sopenharmony_ci	ret
2638c2ecf20Sopenharmony_ci	int3
2648c2ecf20Sopenharmony_ciSYM_CODE_END(__x86_return_thunk)
2658c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__x86_return_thunk)
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci#endif /* CONFIG_RETHUNK */
268