1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#ifndef _ASM_X86_NOSPEC_BRANCH_H_ 4#define _ASM_X86_NOSPEC_BRANCH_H_ 5 6#include <linux/static_key.h> 7#include <linux/objtool.h> 8#include <linux/linkage.h> 9 10#include <asm/alternative.h> 11#include <asm/cpufeatures.h> 12#include <asm/msr-index.h> 13#include <asm/unwind_hints.h> 14#include <asm/percpu.h> 15 16#define RETPOLINE_THUNK_SIZE 32 17 18/* 19 * Fill the CPU return stack buffer. 20 * 21 * Each entry in the RSB, if used for a speculative 'ret', contains an 22 * infinite 'pause; lfence; jmp' loop to capture speculative execution. 23 * 24 * This is required in various cases for retpoline and IBRS-based 25 * mitigations for the Spectre variant 2 vulnerability. Sometimes to 26 * eliminate potentially bogus entries from the RSB, and sometimes 27 * purely to ensure that it doesn't get empty, which on some CPUs would 28 * allow predictions from other (unwanted!) sources to be used. 29 * 30 * We define a CPP macro such that it can be used from both .S files and 31 * inline assembly. It's possible to do a .macro and then include that 32 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. 33 */ 34 35#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ 36 37/* 38 * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. 39 */ 40#define __FILL_RETURN_SLOT \ 41 ANNOTATE_INTRA_FUNCTION_CALL; \ 42 call 772f; \ 43 int3; \ 44772: 45 46/* 47 * Stuff the entire RSB. 48 * 49 * Google experimented with loop-unrolling and this turned out to be 50 * the optimal version — two calls, each with their own speculation 51 * trap should their return address end up getting used, in a loop. 52 */ 53#ifdef CONFIG_X86_64 54#define __FILL_RETURN_BUFFER(reg, nr) \ 55 mov $(nr/2), reg; \ 56771: \ 57 __FILL_RETURN_SLOT \ 58 __FILL_RETURN_SLOT \ 59 add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ 60 dec reg; \ 61 jnz 771b; \ 62 /* barrier for jnz misprediction */ \ 63 lfence; 64#else 65/* 66 * i386 doesn't unconditionally have LFENCE, as such it can't 67 * do a loop. 68 */ 69#define __FILL_RETURN_BUFFER(reg, nr) \ 70 .rept nr; \ 71 __FILL_RETURN_SLOT; \ 72 .endr; \ 73 add $(BITS_PER_LONG/8) * nr, %_ASM_SP; 74#endif 75 76/* 77 * Stuff a single RSB slot. 78 * 79 * To mitigate Post-Barrier RSB speculation, one CALL instruction must be 80 * forced to retire before letting a RET instruction execute. 81 * 82 * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed 83 * before this point. 84 */ 85#define __FILL_ONE_RETURN \ 86 __FILL_RETURN_SLOT \ 87 add $(BITS_PER_LONG/8), %_ASM_SP; \ 88 lfence; 89 90#ifdef __ASSEMBLY__ 91 92/* 93 * This should be used immediately before an indirect jump/call. It tells 94 * objtool the subsequent indirect jump/call is vouched safe for retpoline 95 * builds. 96 */ 97.macro ANNOTATE_RETPOLINE_SAFE 98 .Lannotate_\@: 99 .pushsection .discard.retpoline_safe 100 _ASM_PTR .Lannotate_\@ 101 .popsection 102.endm 103 104/* 105 * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions 106 * vs RETBleed validation. 107 */ 108#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE 109 110/* 111 * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should 112 * eventually turn into it's own annotation. 113 */ 114.macro ANNOTATE_UNRET_END 115#if (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) 116 ANNOTATE_RETPOLINE_SAFE 117 nop 118#endif 119.endm 120 121/* 122 * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple 123 * indirect jmp/call which may be susceptible to the Spectre variant 2 124 * attack. 125 */ 126.macro JMP_NOSPEC reg:req 127#ifdef CONFIG_RETPOLINE 128 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ 129 __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ 130 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE 131#else 132 jmp *%\reg 133#endif 134.endm 135 136.macro CALL_NOSPEC reg:req 137#ifdef CONFIG_RETPOLINE 138 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ 139 __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ 140 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE 141#else 142 call *%\reg 143#endif 144.endm 145 146 /* 147 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP 148 * monstrosity above, manually. 149 */ 150.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) 151 ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ 152 __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ 153 __stringify(__FILL_ONE_RETURN), \ftr2 154 155.Lskip_rsb_\@: 156.endm 157 158#ifdef CONFIG_CPU_UNRET_ENTRY 159#define CALL_UNTRAIN_RET "call entry_untrain_ret" 160#else 161#define CALL_UNTRAIN_RET "" 162#endif 163 164/* 165 * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the 166 * return thunk isn't mapped into the userspace tables (then again, AMD 167 * typically has NO_MELTDOWN). 168 * 169 * While retbleed_untrain_ret() doesn't clobber anything but requires stack, 170 * entry_ibpb() will clobber AX, CX, DX. 171 * 172 * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point 173 * where we have a stack but before any RET instruction. 174 */ 175.macro UNTRAIN_RET 176#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ 177 defined(CONFIG_CPU_SRSO) 178 ANNOTATE_UNRET_END 179 ALTERNATIVE_2 "", \ 180 CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ 181 "call entry_ibpb", X86_FEATURE_ENTRY_IBPB 182#endif 183.endm 184 185/* 186 * Macro to execute VERW instruction that mitigate transient data sampling 187 * attacks such as MDS. On affected systems a microcode update overloaded VERW 188 * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. 189 * 190 * Note: Only the memory operand variant of VERW clears the CPU buffers. 191 */ 192.macro CLEAR_CPU_BUFFERS 193 ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF 194 verw _ASM_RIP(mds_verw_sel) 195.Lskip_verw_\@: 196.endm 197 198#else /* __ASSEMBLY__ */ 199 200#define ANNOTATE_RETPOLINE_SAFE \ 201 "999:\n\t" \ 202 ".pushsection .discard.retpoline_safe\n\t" \ 203 _ASM_PTR " 999b\n\t" \ 204 ".popsection\n\t" 205 206#ifdef CONFIG_RETHUNK 207extern void __x86_return_thunk(void); 208#else 209static inline void __x86_return_thunk(void) {} 210#endif 211 212extern void retbleed_return_thunk(void); 213extern void srso_return_thunk(void); 214extern void srso_alias_return_thunk(void); 215 216extern void retbleed_untrain_ret(void); 217extern void srso_untrain_ret(void); 218extern void srso_alias_untrain_ret(void); 219 220extern void entry_untrain_ret(void); 221extern void entry_ibpb(void); 222 223#ifdef CONFIG_RETPOLINE 224 225typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; 226 227#define GEN(reg) \ 228 extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; 229#include <asm/GEN-for-each-reg.h> 230#undef GEN 231 232extern retpoline_thunk_t __x86_indirect_thunk_array[]; 233 234#ifdef CONFIG_X86_64 235 236/* 237 * Inline asm uses the %V modifier which is only in newer GCC 238 * which is ensured when CONFIG_RETPOLINE is defined. 239 */ 240# define CALL_NOSPEC \ 241 ALTERNATIVE_2( \ 242 ANNOTATE_RETPOLINE_SAFE \ 243 "call *%[thunk_target]\n", \ 244 "call __x86_indirect_thunk_%V[thunk_target]\n", \ 245 X86_FEATURE_RETPOLINE, \ 246 "lfence;\n" \ 247 ANNOTATE_RETPOLINE_SAFE \ 248 "call *%[thunk_target]\n", \ 249 X86_FEATURE_RETPOLINE_LFENCE) 250 251# define THUNK_TARGET(addr) [thunk_target] "r" (addr) 252 253#else /* CONFIG_X86_32 */ 254/* 255 * For i386 we use the original ret-equivalent retpoline, because 256 * otherwise we'll run out of registers. We don't care about CET 257 * here, anyway. 258 */ 259# define CALL_NOSPEC \ 260 ALTERNATIVE_2( \ 261 ANNOTATE_RETPOLINE_SAFE \ 262 "call *%[thunk_target]\n", \ 263 " jmp 904f;\n" \ 264 " .align 16\n" \ 265 "901: call 903f;\n" \ 266 "902: pause;\n" \ 267 " lfence;\n" \ 268 " jmp 902b;\n" \ 269 " .align 16\n" \ 270 "903: lea 4(%%esp), %%esp;\n" \ 271 " pushl %[thunk_target];\n" \ 272 " ret;\n" \ 273 " .align 16\n" \ 274 "904: call 901b;\n", \ 275 X86_FEATURE_RETPOLINE, \ 276 "lfence;\n" \ 277 ANNOTATE_RETPOLINE_SAFE \ 278 "call *%[thunk_target]\n", \ 279 X86_FEATURE_RETPOLINE_LFENCE) 280 281# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) 282#endif 283#else /* No retpoline for C / inline asm */ 284# define CALL_NOSPEC "call *%[thunk_target]\n" 285# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) 286#endif 287 288/* The Spectre V2 mitigation variants */ 289enum spectre_v2_mitigation { 290 SPECTRE_V2_NONE, 291 SPECTRE_V2_RETPOLINE, 292 SPECTRE_V2_LFENCE, 293 SPECTRE_V2_EIBRS, 294 SPECTRE_V2_EIBRS_RETPOLINE, 295 SPECTRE_V2_EIBRS_LFENCE, 296 SPECTRE_V2_IBRS, 297}; 298 299/* The indirect branch speculation control variants */ 300enum spectre_v2_user_mitigation { 301 SPECTRE_V2_USER_NONE, 302 SPECTRE_V2_USER_STRICT, 303 SPECTRE_V2_USER_STRICT_PREFERRED, 304 SPECTRE_V2_USER_PRCTL, 305 SPECTRE_V2_USER_SECCOMP, 306}; 307 308/* The Speculative Store Bypass disable variants */ 309enum ssb_mitigation { 310 SPEC_STORE_BYPASS_NONE, 311 SPEC_STORE_BYPASS_DISABLE, 312 SPEC_STORE_BYPASS_PRCTL, 313 SPEC_STORE_BYPASS_SECCOMP, 314}; 315 316extern char __indirect_thunk_start[]; 317extern char __indirect_thunk_end[]; 318 319static __always_inline 320void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) 321{ 322 asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) 323 : : "c" (msr), 324 "a" ((u32)val), 325 "d" ((u32)(val >> 32)), 326 [feature] "i" (feature) 327 : "memory"); 328} 329 330extern u64 x86_pred_cmd; 331 332static inline void indirect_branch_prediction_barrier(void) 333{ 334 alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); 335} 336 337/* The Intel SPEC CTRL MSR base value cache */ 338extern u64 x86_spec_ctrl_base; 339DECLARE_PER_CPU(u64, x86_spec_ctrl_current); 340extern void update_spec_ctrl_cond(u64 val); 341extern u64 spec_ctrl_current(void); 342 343/* 344 * With retpoline, we must use IBRS to restrict branch prediction 345 * before calling into firmware. 346 * 347 * (Implemented as CPP macros due to header hell.) 348 */ 349#define firmware_restrict_branch_speculation_start() \ 350do { \ 351 preempt_disable(); \ 352 alternative_msr_write(MSR_IA32_SPEC_CTRL, \ 353 spec_ctrl_current() | SPEC_CTRL_IBRS, \ 354 X86_FEATURE_USE_IBRS_FW); \ 355 alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ 356 X86_FEATURE_USE_IBPB_FW); \ 357} while (0) 358 359#define firmware_restrict_branch_speculation_end() \ 360do { \ 361 alternative_msr_write(MSR_IA32_SPEC_CTRL, \ 362 spec_ctrl_current(), \ 363 X86_FEATURE_USE_IBRS_FW); \ 364 preempt_enable(); \ 365} while (0) 366 367DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); 368DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); 369DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); 370 371DECLARE_STATIC_KEY_FALSE(mds_idle_clear); 372 373DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); 374 375extern u16 mds_verw_sel; 376 377#include <asm/segment.h> 378 379/** 380 * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability 381 * 382 * This uses the otherwise unused and obsolete VERW instruction in 383 * combination with microcode which triggers a CPU buffer flush when the 384 * instruction is executed. 385 */ 386static __always_inline void mds_clear_cpu_buffers(void) 387{ 388 static const u16 ds = __KERNEL_DS; 389 390 /* 391 * Has to be the memory-operand variant because only that 392 * guarantees the CPU buffer flush functionality according to 393 * documentation. The register-operand variant does not. 394 * Works with any segment selector, but a valid writable 395 * data segment is the fastest variant. 396 * 397 * "cc" clobber is required because VERW modifies ZF. 398 */ 399 asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); 400} 401 402/** 403 * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability 404 * 405 * Clear CPU buffers if the corresponding static key is enabled 406 */ 407static inline void mds_idle_clear_cpu_buffers(void) 408{ 409 if (static_branch_likely(&mds_idle_clear)) 410 mds_clear_cpu_buffers(); 411} 412 413#endif /* __ASSEMBLY__ */ 414 415#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ 416