18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include <linux/stringify.h> 48c2ecf20Sopenharmony_ci#include <linux/linkage.h> 58c2ecf20Sopenharmony_ci#include <asm/dwarf2.h> 68c2ecf20Sopenharmony_ci#include <asm/cpufeatures.h> 78c2ecf20Sopenharmony_ci#include <asm/alternative.h> 88c2ecf20Sopenharmony_ci#include <asm/export.h> 98c2ecf20Sopenharmony_ci#include <asm/nospec-branch.h> 108c2ecf20Sopenharmony_ci#include <asm/unwind_hints.h> 118c2ecf20Sopenharmony_ci#include <asm/frame.h> 128c2ecf20Sopenharmony_ci#include <asm/nops.h> 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci .section .text..__x86.indirect_thunk 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci.macro RETPOLINE reg 178c2ecf20Sopenharmony_ci ANNOTATE_INTRA_FUNCTION_CALL 188c2ecf20Sopenharmony_ci call .Ldo_rop_\@ 198c2ecf20Sopenharmony_ci.Lspec_trap_\@: 208c2ecf20Sopenharmony_ci UNWIND_HINT_EMPTY 218c2ecf20Sopenharmony_ci pause 228c2ecf20Sopenharmony_ci lfence 238c2ecf20Sopenharmony_ci jmp .Lspec_trap_\@ 248c2ecf20Sopenharmony_ci.Ldo_rop_\@: 258c2ecf20Sopenharmony_ci mov %\reg, (%_ASM_SP) 268c2ecf20Sopenharmony_ci UNWIND_HINT_FUNC 278c2ecf20Sopenharmony_ci RET 288c2ecf20Sopenharmony_ci.endm 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci.macro THUNK reg 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci .align RETPOLINE_THUNK_SIZE 338c2ecf20Sopenharmony_ciSYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) 348c2ecf20Sopenharmony_ci UNWIND_HINT_EMPTY 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ 378c2ecf20Sopenharmony_ci __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ 388c2ecf20Sopenharmony_ci __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci.endm 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci/* 438c2ecf20Sopenharmony_ci * Despite being an assembler file we can't just use .irp here 448c2ecf20Sopenharmony_ci * because __KSYM_DEPS__ only uses the C preprocessor and would 458c2ecf20Sopenharmony_ci * only see one instance of "__x86_indirect_thunk_\reg" rather 468c2ecf20Sopenharmony_ci * than one per register with the correct names. So we do it 478c2ecf20Sopenharmony_ci * the simple and nasty way... 488c2ecf20Sopenharmony_ci * 498c2ecf20Sopenharmony_ci * Worse, you can only have a single EXPORT_SYMBOL per line, 508c2ecf20Sopenharmony_ci * and CPP can't insert newlines, so we have to repeat everything 518c2ecf20Sopenharmony_ci * at least twice. 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) 558c2ecf20Sopenharmony_ci#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci .align RETPOLINE_THUNK_SIZE 588c2ecf20Sopenharmony_ciSYM_CODE_START(__x86_indirect_thunk_array) 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#define GEN(reg) THUNK reg 618c2ecf20Sopenharmony_ci#include <asm/GEN-for-each-reg.h> 628c2ecf20Sopenharmony_ci#undef GEN 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci .align RETPOLINE_THUNK_SIZE 658c2ecf20Sopenharmony_ciSYM_CODE_END(__x86_indirect_thunk_array) 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci#define GEN(reg) EXPORT_THUNK(reg) 688c2ecf20Sopenharmony_ci#include <asm/GEN-for-each-reg.h> 698c2ecf20Sopenharmony_ci#undef GEN 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci/* 728c2ecf20Sopenharmony_ci * This function name is magical and is used by -mfunction-return=thunk-extern 738c2ecf20Sopenharmony_ci * for the compiler to generate JMPs to it. 748c2ecf20Sopenharmony_ci */ 758c2ecf20Sopenharmony_ci#ifdef CONFIG_RETHUNK 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci/* 788c2ecf20Sopenharmony_ci * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at 798c2ecf20Sopenharmony_ci * special addresses: 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * - srso_alias_untrain_ret() is 2M aligned 828c2ecf20Sopenharmony_ci * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 838c2ecf20Sopenharmony_ci * and 20 in its virtual address are set (while those bits in the 848c2ecf20Sopenharmony_ci * srso_alias_untrain_ret() function are cleared). 858c2ecf20Sopenharmony_ci * 868c2ecf20Sopenharmony_ci * This guarantees that those two addresses will alias in the branch 878c2ecf20Sopenharmony_ci * target buffer of Zen3/4 generations, leading to any potential 888c2ecf20Sopenharmony_ci * poisoned entries at that BTB slot to get evicted. 898c2ecf20Sopenharmony_ci * 908c2ecf20Sopenharmony_ci * As a result, srso_alias_safe_ret() becomes a safe return. 918c2ecf20Sopenharmony_ci */ 928c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_SRSO 938c2ecf20Sopenharmony_ci .section .text..__x86.rethunk_untrain 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ciSYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 968c2ecf20Sopenharmony_ci UNWIND_HINT_FUNC 978c2ecf20Sopenharmony_ci ASM_NOP2 988c2ecf20Sopenharmony_ci lfence 998c2ecf20Sopenharmony_ci jmp srso_alias_return_thunk 1008c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_alias_untrain_ret) 1018c2ecf20Sopenharmony_ci__EXPORT_THUNK(srso_alias_untrain_ret) 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci .section .text..__x86.rethunk_safe 1048c2ecf20Sopenharmony_ci#else 1058c2ecf20Sopenharmony_ci/* dummy definition for alternatives */ 1068c2ecf20Sopenharmony_ciSYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 1078c2ecf20Sopenharmony_ci ANNOTATE_UNRET_SAFE 1088c2ecf20Sopenharmony_ci ret 1098c2ecf20Sopenharmony_ci int3 1108c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_alias_untrain_ret) 1118c2ecf20Sopenharmony_ci#endif 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ciSYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) 1148c2ecf20Sopenharmony_ci lea 8(%_ASM_SP), %_ASM_SP 1158c2ecf20Sopenharmony_ci UNWIND_HINT_FUNC 1168c2ecf20Sopenharmony_ci ANNOTATE_UNRET_SAFE 1178c2ecf20Sopenharmony_ci ret 1188c2ecf20Sopenharmony_ci int3 1198c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_alias_safe_ret) 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci .section .text..__x86.return_thunk 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ciSYM_CODE_START(srso_alias_return_thunk) 1248c2ecf20Sopenharmony_ci UNWIND_HINT_FUNC 1258c2ecf20Sopenharmony_ci ANNOTATE_NOENDBR 1268c2ecf20Sopenharmony_ci call srso_alias_safe_ret 1278c2ecf20Sopenharmony_ci ud2 1288c2ecf20Sopenharmony_ciSYM_CODE_END(srso_alias_return_thunk) 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci/* 1318c2ecf20Sopenharmony_ci * Some generic notes on the untraining sequences: 1328c2ecf20Sopenharmony_ci * 1338c2ecf20Sopenharmony_ci * They are interchangeable when it comes to flushing potentially wrong 1348c2ecf20Sopenharmony_ci * RET predictions from the BTB. 1358c2ecf20Sopenharmony_ci * 1368c2ecf20Sopenharmony_ci * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the 1378c2ecf20Sopenharmony_ci * Retbleed sequence because the return sequence done there 1388c2ecf20Sopenharmony_ci * (srso_safe_ret()) is longer and the return sequence must fully nest 1398c2ecf20Sopenharmony_ci * (end before) the untraining sequence. Therefore, the untraining 1408c2ecf20Sopenharmony_ci * sequence must fully overlap the return sequence. 1418c2ecf20Sopenharmony_ci * 1428c2ecf20Sopenharmony_ci * Regarding alignment - the instructions which need to be untrained, 1438c2ecf20Sopenharmony_ci * must all start at a cacheline boundary for Zen1/2 generations. That 1448c2ecf20Sopenharmony_ci * is, instruction sequences starting at srso_safe_ret() and 1458c2ecf20Sopenharmony_ci * the respective instruction sequences at retbleed_return_thunk() 1468c2ecf20Sopenharmony_ci * must start at a cacheline boundary. 1478c2ecf20Sopenharmony_ci */ 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci/* 1508c2ecf20Sopenharmony_ci * Safety details here pertain to the AMD Zen{1,2} microarchitecture: 1518c2ecf20Sopenharmony_ci * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for 1528c2ecf20Sopenharmony_ci * alignment within the BTB. 1538c2ecf20Sopenharmony_ci * 2) The instruction at retbleed_untrain_ret must contain, and not 1548c2ecf20Sopenharmony_ci * end with, the 0xc3 byte of the RET. 1558c2ecf20Sopenharmony_ci * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread 1568c2ecf20Sopenharmony_ci * from re-poisioning the BTB prediction. 1578c2ecf20Sopenharmony_ci */ 1588c2ecf20Sopenharmony_ci .align 64 1598c2ecf20Sopenharmony_ci .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc 1608c2ecf20Sopenharmony_ciSYM_FUNC_START_NOALIGN(retbleed_untrain_ret); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci /* 1638c2ecf20Sopenharmony_ci * As executed from retbleed_untrain_ret, this is: 1648c2ecf20Sopenharmony_ci * 1658c2ecf20Sopenharmony_ci * TEST $0xcc, %bl 1668c2ecf20Sopenharmony_ci * LFENCE 1678c2ecf20Sopenharmony_ci * JMP retbleed_return_thunk 1688c2ecf20Sopenharmony_ci * 1698c2ecf20Sopenharmony_ci * Executing the TEST instruction has a side effect of evicting any BTB 1708c2ecf20Sopenharmony_ci * prediction (potentially attacker controlled) attached to the RET, as 1718c2ecf20Sopenharmony_ci * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. 1728c2ecf20Sopenharmony_ci */ 1738c2ecf20Sopenharmony_ci .byte 0xf6 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci /* 1768c2ecf20Sopenharmony_ci * As executed from retbleed_return_thunk, this is a plain RET. 1778c2ecf20Sopenharmony_ci * 1788c2ecf20Sopenharmony_ci * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. 1798c2ecf20Sopenharmony_ci * 1808c2ecf20Sopenharmony_ci * We subsequently jump backwards and architecturally execute the RET. 1818c2ecf20Sopenharmony_ci * This creates a correct BTB prediction (type=ret), but in the 1828c2ecf20Sopenharmony_ci * meantime we suffer Straight Line Speculation (because the type was 1838c2ecf20Sopenharmony_ci * no branch) which is halted by the INT3. 1848c2ecf20Sopenharmony_ci * 1858c2ecf20Sopenharmony_ci * With SMT enabled and STIBP active, a sibling thread cannot poison 1868c2ecf20Sopenharmony_ci * RET's prediction to a type of its choice, but can evict the 1878c2ecf20Sopenharmony_ci * prediction due to competitive sharing. If the prediction is 1888c2ecf20Sopenharmony_ci * evicted, retbleed_return_thunk will suffer Straight Line Speculation 1898c2ecf20Sopenharmony_ci * which will be contained safely by the INT3. 1908c2ecf20Sopenharmony_ci */ 1918c2ecf20Sopenharmony_ciSYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) 1928c2ecf20Sopenharmony_ci ret 1938c2ecf20Sopenharmony_ci int3 1948c2ecf20Sopenharmony_ciSYM_CODE_END(retbleed_return_thunk) 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci /* 1978c2ecf20Sopenharmony_ci * Ensure the TEST decoding / BTB invalidation is complete. 1988c2ecf20Sopenharmony_ci */ 1998c2ecf20Sopenharmony_ci lfence 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci /* 2028c2ecf20Sopenharmony_ci * Jump back and execute the RET in the middle of the TEST instruction. 2038c2ecf20Sopenharmony_ci * INT3 is for SLS protection. 2048c2ecf20Sopenharmony_ci */ 2058c2ecf20Sopenharmony_ci jmp retbleed_return_thunk 2068c2ecf20Sopenharmony_ci int3 2078c2ecf20Sopenharmony_ciSYM_FUNC_END(retbleed_untrain_ret) 2088c2ecf20Sopenharmony_ci__EXPORT_THUNK(retbleed_untrain_ret) 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci/* 2118c2ecf20Sopenharmony_ci * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() 2128c2ecf20Sopenharmony_ci * above. On kernel entry, srso_untrain_ret() is executed which is a 2138c2ecf20Sopenharmony_ci * 2148c2ecf20Sopenharmony_ci * movabs $0xccccc30824648d48,%rax 2158c2ecf20Sopenharmony_ci * 2168c2ecf20Sopenharmony_ci * and when the return thunk executes the inner label srso_safe_ret() 2178c2ecf20Sopenharmony_ci * later, it is a stack manipulation and a RET which is mispredicted and 2188c2ecf20Sopenharmony_ci * thus a "safe" one to use. 2198c2ecf20Sopenharmony_ci */ 2208c2ecf20Sopenharmony_ci .align 64 2218c2ecf20Sopenharmony_ci .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc 2228c2ecf20Sopenharmony_ciSYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 2238c2ecf20Sopenharmony_ci .byte 0x48, 0xb8 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci/* 2268c2ecf20Sopenharmony_ci * This forces the function return instruction to speculate into a trap 2278c2ecf20Sopenharmony_ci * (UD2 in srso_return_thunk() below). This RET will then mispredict 2288c2ecf20Sopenharmony_ci * and execution will continue at the return site read from the top of 2298c2ecf20Sopenharmony_ci * the stack. 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_ciSYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) 2328c2ecf20Sopenharmony_ci lea 8(%_ASM_SP), %_ASM_SP 2338c2ecf20Sopenharmony_ci ret 2348c2ecf20Sopenharmony_ci int3 2358c2ecf20Sopenharmony_ci int3 2368c2ecf20Sopenharmony_ci /* end of movabs */ 2378c2ecf20Sopenharmony_ci lfence 2388c2ecf20Sopenharmony_ci call srso_safe_ret 2398c2ecf20Sopenharmony_ci ud2 2408c2ecf20Sopenharmony_ciSYM_CODE_END(srso_safe_ret) 2418c2ecf20Sopenharmony_ciSYM_FUNC_END(srso_untrain_ret) 2428c2ecf20Sopenharmony_ci__EXPORT_THUNK(srso_untrain_ret) 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ciSYM_CODE_START(srso_return_thunk) 2458c2ecf20Sopenharmony_ci UNWIND_HINT_FUNC 2468c2ecf20Sopenharmony_ci ANNOTATE_NOENDBR 2478c2ecf20Sopenharmony_ci call srso_safe_ret 2488c2ecf20Sopenharmony_ci ud2 2498c2ecf20Sopenharmony_ciSYM_CODE_END(srso_return_thunk) 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ciSYM_FUNC_START(entry_untrain_ret) 2528c2ecf20Sopenharmony_ci ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ 2538c2ecf20Sopenharmony_ci "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ 2548c2ecf20Sopenharmony_ci "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS 2558c2ecf20Sopenharmony_ciSYM_FUNC_END(entry_untrain_ret) 2568c2ecf20Sopenharmony_ci__EXPORT_THUNK(entry_untrain_ret) 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ciSYM_CODE_START(__x86_return_thunk) 2598c2ecf20Sopenharmony_ci UNWIND_HINT_FUNC 2608c2ecf20Sopenharmony_ci ANNOTATE_NOENDBR 2618c2ecf20Sopenharmony_ci ANNOTATE_UNRET_SAFE 2628c2ecf20Sopenharmony_ci ret 2638c2ecf20Sopenharmony_ci int3 2648c2ecf20Sopenharmony_ciSYM_CODE_END(__x86_return_thunk) 2658c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__x86_return_thunk) 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci#endif /* CONFIG_RETHUNK */ 268