1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #include <linux/stringify.h> 4 #include <linux/linkage.h> 5 #include <asm/dwarf2.h> 6 #include <asm/cpufeatures.h> 7 #include <asm/alternative.h> 8 #include <asm/export.h> 9 #include <asm/nospec-branch.h> 10 #include <asm/unwind_hints.h> 11 #include <asm/frame.h> 12 #include <asm/nops.h> 13 14 .section .text..__x86.indirect_thunk 15 16 .macro RETPOLINE reg 17 ANNOTATE_INTRA_FUNCTION_CALL 18 call .Ldo_rop_\@ 19 .Lspec_trap_\@: 20 UNWIND_HINT_EMPTY 21 pause 22 lfence 23 jmp .Lspec_trap_\@ 24 .Ldo_rop_\@: 25 mov %\reg, (%_ASM_SP) 26 UNWIND_HINT_FUNC 27 RET 28 .endm 29 30 .macro THUNK reg 31 32 .align RETPOLINE_THUNK_SIZE 33 SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) 34 UNWIND_HINT_EMPTY 35 36 ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ 37 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ 38 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) 39 40 .endm 41 42 /* 43 * Despite being an assembler file we can't just use .irp here 44 * because __KSYM_DEPS__ only uses the C preprocessor and would 45 * only see one instance of "__x86_indirect_thunk_\reg" rather 46 * than one per register with the correct names. So we do it 47 * the simple and nasty way... 48 * 49 * Worse, you can only have a single EXPORT_SYMBOL per line, 50 * and CPP can't insert newlines, so we have to repeat everything 51 * at least twice. 52 */ 53 54 #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) 55 #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) 56 57 .align RETPOLINE_THUNK_SIZE 58 SYM_CODE_START(__x86_indirect_thunk_array) 59 60 #define GEN(reg) THUNK reg 61 #include <asm/GEN-for-each-reg.h> 62 #undef GEN 63 64 .align RETPOLINE_THUNK_SIZE 65 SYM_CODE_END(__x86_indirect_thunk_array) 66 67 #define GEN(reg) EXPORT_THUNK(reg) 68 #include <asm/GEN-for-each-reg.h> 69 #undef GEN 70 71 /* 72 * This function name is magical and is used by -mfunction-return=thunk-extern 73 * for the compiler to generate JMPs to it. 74 */ 75 #ifdef CONFIG_RETHUNK 76 77 /* 78 * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at 79 * special addresses: 80 * 81 * - srso_alias_untrain_ret() is 2M aligned 82 * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 83 * and 20 in its virtual address are set (while those bits in the 84 * srso_alias_untrain_ret() function are cleared). 85 * 86 * This guarantees that those two addresses will alias in the branch 87 * target buffer of Zen3/4 generations, leading to any potential 88 * poisoned entries at that BTB slot to get evicted. 89 * 90 * As a result, srso_alias_safe_ret() becomes a safe return. 91 */ 92 #ifdef CONFIG_CPU_SRSO 93 .section .text..__x86.rethunk_untrain 94 95 SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 96 UNWIND_HINT_FUNC 97 ASM_NOP2 98 lfence 99 jmp srso_alias_return_thunk 100 SYM_FUNC_END(srso_alias_untrain_ret) 101 __EXPORT_THUNK(srso_alias_untrain_ret) 102 103 .section .text..__x86.rethunk_safe 104 #else 105 /* dummy definition for alternatives */ 106 SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 107 ANNOTATE_UNRET_SAFE 108 ret 109 int3 110 SYM_FUNC_END(srso_alias_untrain_ret) 111 #endif 112 113 SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) 114 lea 8(%_ASM_SP), %_ASM_SP 115 UNWIND_HINT_FUNC 116 ANNOTATE_UNRET_SAFE 117 ret 118 int3 119 SYM_FUNC_END(srso_alias_safe_ret) 120 121 .section .text..__x86.return_thunk 122 123 SYM_CODE_START(srso_alias_return_thunk) 124 UNWIND_HINT_FUNC 125 ANNOTATE_NOENDBR 126 call srso_alias_safe_ret 127 ud2 128 SYM_CODE_END(srso_alias_return_thunk) 129 130 /* 131 * Some generic notes on the untraining sequences: 132 * 133 * They are interchangeable when it comes to flushing potentially wrong 134 * RET predictions from the BTB. 135 * 136 * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the 137 * Retbleed sequence because the return sequence done there 138 * (srso_safe_ret()) is longer and the return sequence must fully nest 139 * (end before) the untraining sequence. Therefore, the untraining 140 * sequence must fully overlap the return sequence. 141 * 142 * Regarding alignment - the instructions which need to be untrained, 143 * must all start at a cacheline boundary for Zen1/2 generations. That 144 * is, instruction sequences starting at srso_safe_ret() and 145 * the respective instruction sequences at retbleed_return_thunk() 146 * must start at a cacheline boundary. 147 */ 148 149 /* 150 * Safety details here pertain to the AMD Zen{1,2} microarchitecture: 151 * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for 152 * alignment within the BTB. 153 * 2) The instruction at retbleed_untrain_ret must contain, and not 154 * end with, the 0xc3 byte of the RET. 155 * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread 156 * from re-poisioning the BTB prediction. 157 */ 158 .align 64 159 .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc 160 SYM_FUNC_START_NOALIGN(retbleed_untrain_ret); 161 162 /* 163 * As executed from retbleed_untrain_ret, this is: 164 * 165 * TEST $0xcc, %bl 166 * LFENCE 167 * JMP retbleed_return_thunk 168 * 169 * Executing the TEST instruction has a side effect of evicting any BTB 170 * prediction (potentially attacker controlled) attached to the RET, as 171 * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. 172 */ 173 .byte 0xf6 174 175 /* 176 * As executed from retbleed_return_thunk, this is a plain RET. 177 * 178 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. 179 * 180 * We subsequently jump backwards and architecturally execute the RET. 181 * This creates a correct BTB prediction (type=ret), but in the 182 * meantime we suffer Straight Line Speculation (because the type was 183 * no branch) which is halted by the INT3. 184 * 185 * With SMT enabled and STIBP active, a sibling thread cannot poison 186 * RET's prediction to a type of its choice, but can evict the 187 * prediction due to competitive sharing. If the prediction is 188 * evicted, retbleed_return_thunk will suffer Straight Line Speculation 189 * which will be contained safely by the INT3. 190 */ 191 SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) 192 ret 193 int3 194 SYM_CODE_END(retbleed_return_thunk) 195 196 /* 197 * Ensure the TEST decoding / BTB invalidation is complete. 198 */ 199 lfence 200 201 /* 202 * Jump back and execute the RET in the middle of the TEST instruction. 203 * INT3 is for SLS protection. 204 */ 205 jmp retbleed_return_thunk 206 int3 207 SYM_FUNC_END(retbleed_untrain_ret) 208 __EXPORT_THUNK(retbleed_untrain_ret) 209 210 /* 211 * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() 212 * above. On kernel entry, srso_untrain_ret() is executed which is a 213 * 214 * movabs $0xccccc30824648d48,%rax 215 * 216 * and when the return thunk executes the inner label srso_safe_ret() 217 * later, it is a stack manipulation and a RET which is mispredicted and 218 * thus a "safe" one to use. 219 */ 220 .align 64 221 .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc 222 SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 223 .byte 0x48, 0xb8 224 225 /* 226 * This forces the function return instruction to speculate into a trap 227 * (UD2 in srso_return_thunk() below). This RET will then mispredict 228 * and execution will continue at the return site read from the top of 229 * the stack. 230 */ 231 SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) 232 lea 8(%_ASM_SP), %_ASM_SP 233 ret 234 int3 235 int3 236 /* end of movabs */ 237 lfence 238 call srso_safe_ret 239 ud2 240 SYM_CODE_END(srso_safe_ret) 241 SYM_FUNC_END(srso_untrain_ret) 242 __EXPORT_THUNK(srso_untrain_ret) 243 244 SYM_CODE_START(srso_return_thunk) 245 UNWIND_HINT_FUNC 246 ANNOTATE_NOENDBR 247 call srso_safe_ret 248 ud2 249 SYM_CODE_END(srso_return_thunk) 250 251 SYM_FUNC_START(entry_untrain_ret) 252 ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ 253 "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ 254 "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS 255 SYM_FUNC_END(entry_untrain_ret) 256 __EXPORT_THUNK(entry_untrain_ret) 257 258 SYM_CODE_START(__x86_return_thunk) 259 UNWIND_HINT_FUNC 260 ANNOTATE_NOENDBR 261 ANNOTATE_UNRET_SAFE 262 ret 263 int3 264 SYM_CODE_END(__x86_return_thunk) 265 EXPORT_SYMBOL(__x86_return_thunk) 266 267 #endif /* CONFIG_RETHUNK */ 268