From 0cd01ac5dcb1e18eb18df0f0d05b5de76522a437 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Apr 2024 11:14:13 -0700 Subject: x86/bugs: Change commas to semicolons in 'spectre_v2' sysfs file Change the format of the 'spectre_v2' vulnerabilities sysfs file slightly by converting the commas to semicolons, so that mitigations for future variants can be grouped together and separated by commas. Signed-off-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7ba936d798b81..e8c7146f9ed7cb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2695,15 +2695,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2712,10 +2712,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2725,11 +2725,11 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } @@ -2748,9 +2748,9 @@ static ssize_t spectre_v2_show_state(char *buf) return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), spectre_v2_module_string()); } -- cgit 1.2.3-korg From 1e3ad78334a69b36e107232e337f9d693dcc9df2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Apr 2024 16:36:44 -0700 Subject: x86/syscall: Don't force use of indirect calls for system calls Make build a switch statement instead, and the compiler can either decide to generate an indirect jump, or - more likely these days due to mitigations - just a series of conditional branches. Yes, the conditional branches also have branch prediction, but the branch prediction is much more controlled, in that it just causes speculatively running the wrong system call (harmless), rather than speculatively running possibly wrong random less controlled code gadgets. This doesn't mitigate other indirect calls, but the system call indirection is the first and most easily triggered case. Signed-off-by: Linus Torvalds Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf --- arch/x86/entry/common.c | 6 +++--- arch/x86/entry/syscall_32.c | 21 +++++++++++++++++++-- arch/x86/entry/syscall_64.c | 19 +++++++++++++++++-- arch/x86/entry/syscall_x32.c | 10 +++++++--- arch/x86/include/asm/syscall.h | 10 ++++------ 5 files changed, 50 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6356060caaf311..cea0e2a23b4267 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f8b..c2235bae17ef66 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9f9..33b3f09e6f151e 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265d2..03de4a93213182 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f44e2f9ab65d77..3c28f26bfe2201 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include /* for TS_COMPAT */ #include +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. -- cgit 1.2.3-korg From 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:56:58 -0700 Subject: x86/bhi: Add support for clearing branch history at syscall entry Branch History Injection (BHI) attacks may allow a malicious application to influence indirect branch prediction in kernel by poisoning the branch history. eIBRS isolates indirect branch targets in ring0. The BHB can still influence the choice of indirect branch predictor entry, and although branch predictor entries are isolated between modes when eIBRS is enabled, the BHB itself is not isolated between modes. Alder Lake and new processors supports a hardware control BHI_DIS_S to mitigate BHI. For older processors Intel has released a software sequence to clear the branch history on parts that don't support BHI_DIS_S. Add support to execute the software sequence at syscall entry and VMexit to overwrite the branch history. For now, branch history is not cleared at interrupt entry, as malicious applications are not believed to have sufficient control over the registers, since previous register state is cleared at interrupt entry. Researchers continue to poke at this area and it may become necessary to clear at interrupt entry as well in the future. This mitigation is only defined here. It is enabled later. Signed-off-by: Pawan Gupta Co-developed-by: Daniel Sneddon Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf --- arch/x86/entry/common.c | 4 +-- arch/x86/entry/entry_64.S | 61 ++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_64_compat.S | 16 ++++++++++ arch/x86/include/asm/cpufeatures.h | 3 +- arch/x86/include/asm/nospec-branch.h | 12 +++++++ arch/x86/include/asm/syscall.h | 1 + arch/x86/kvm/vmx/vmenter.S | 2 ++ 7 files changed, 96 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index cea0e2a23b4267..6de50b80702e61 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8af2a26b24f6a9..1b5be07f86698a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index eabf48c4d4b4c3..c779046cc3fe79 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a38f8f9ba65729..9b94a86f46003b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -461,11 +461,12 @@ /* * Extended auxiliary flags: Linux defined - for features scattered in various - * CPUID levels like 0x80000022, etc. + * CPUID levels like 0x80000022, etc and Linux defined features. * * Reuse free bits when adding new feature flags! */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 170c89ed22fcd3..aea40204278b64 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -326,6 +326,14 @@ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm +#else +#define CLEAR_BRANCH_HISTORY +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -368,6 +376,10 @@ extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); extern void __warn_thunk(void); diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 3c28f26bfe2201..2fc7bc3863ff6f 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -125,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) } bool do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 2bfbf758d06110..0f3593e10c57d8 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX -- cgit 1.2.3-korg From 0f4a837615ff925ba62648d280a861adf1582df7 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 13 Mar 2024 09:47:57 -0700 Subject: x86/bhi: Define SPEC_CTRL_BHI_DIS_S Newer processors supports a hardware control BHI_DIS_S to mitigate Branch History Injection (BHI). Setting BHI_DIS_S protects the kernel from userspace BHI attacks without having to manually overwrite the branch history. Define MSR_SPEC_CTRL bit BHI_DIS_S and its enumeration CPUID.BHI_CTRL. Mitigation is enabled later. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 5 ++++- arch/x86/kernel/cpu/scattered.c | 1 + arch/x86/kvm/reverse_cpuid.h | 3 ++- 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9b94a86f46003b..40850907042996 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -467,6 +467,7 @@ */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 05956bd8bacf50..24615e8269988b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,10 +61,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index a515328d9d7d88..af5aa2c754c222 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index aadefcaa9561d0..da9880d74a0bc0 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) /* CPUID level 0x80000007 (EDX). */ @@ -126,6 +126,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } -- cgit 1.2.3-korg From be482ff9500999f56093738f9219bbabc729d163 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:03 -0700 Subject: x86/bhi: Enumerate Branch History Injection (BHI) bug Mitigation for BHI is selected based on the bug enumeration. Add bits needed to enumerate BHI bug. Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kernel/cpu/common.c | 24 ++++++++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 40850907042996..8edd7a28869a8d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -517,4 +517,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 24615e8269988b..e72c2b87295799 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -166,6 +166,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5c1e6d6be267af..754d91857d634a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1435,6 +1436,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(ia32_cap)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(ia32_cap & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; -- cgit 1.2.3-korg From ec9404e40e8f36421a2b66ecb76dc2209fe7f3ef Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:05 -0700 Subject: x86/bhi: Add BHI mitigation knob Branch history clearing software sequences and hardware control BHI_DIS_S were defined to mitigate Branch History Injection (BHI). Add cmdline spectre_bhi={on|off|auto} to control BHI mitigation: auto - Deploy the hardware mitigation BHI_DIS_S, if available. on - Deploy the hardware mitigation BHI_DIS_S, if available, otherwise deploy the software sequence at syscall entry and VMexit. off - Turn off BHI mitigation. The default is auto mode which does not deploy the software sequence mitigation. This is because of the hardening done in the syscall dispatch path, which is the likely target of BHI. Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf --- Documentation/admin-guide/hw-vuln/spectre.rst | 45 +++++++++++-- Documentation/admin-guide/kernel-parameters.txt | 11 +++ arch/x86/Kconfig | 25 +++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 90 ++++++++++++++++++++++++- 5 files changed, 165 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index cce768afec6bed..7cb99b09827ce3 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically, the BHB might be shared across privilege levels even in the presence of Enhanced IBRS. -Currently the only known real-world BHB attack vector is via -unprivileged eBPF. Therefore, it's highly recommended to not enable -unprivileged eBPF, especially when eIBRS is used (without retpolines). -For a full mitigation against BHB attacks, it's recommended to use -retpolines (or eIBRS combined with retpolines). +Previously the only known real-world BHB attack vector was via unprivileged +eBPF. Further research has found attacks that don't require unprivileged eBPF. +For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or +use the BHB clearing sequence. Attack scenarios ---------------- @@ -430,6 +429,21 @@ The possible values in this file are: 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB =========================== ======================================================= + - Branch History Injection (BHI) protection status: + +.. list-table:: + + * - BHI: Not affected + - System is not affected + * - BHI: Retpoline + - System is protected by retpoline + * - BHI: BHI_DIS_S + - System is protected by BHI_DIS_S + * - BHI: SW loop + - System is protected by software clearing sequence + * - BHI: Syscall hardening + - Syscalls are hardened against BHI + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -484,7 +498,11 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks. + some Spectre v2 variant attacks. The BHB can still influence the choice of + indirect branch predictor entry, and although branch predictor entries are + isolated between modes when eIBRS is enabled, the BHB itself is not isolated + between modes. Systems which support BHI_DIS_S will set it to protect against + BHI attacks. On Intel's enhanced IBRS systems, this includes cross-thread branch target injections on SMT systems (STIBP). In other words, Intel eIBRS enables @@ -638,6 +656,21 @@ kernel command line. spectre_v2=off. Spectre variant 1 mitigations cannot be disabled. + spectre_bhi= + + [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. Syscalls are hardened against BHI + regardless of this setting. This setting affects the deployment + of the HW BHI control and the SW BHB clearing sequence. + + on + unconditionally enable. + off + unconditionally disable. + auto + enable if hardware mitigation + control(BHI_DIS_S) is available. + For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt Mitigation selection guide diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bb884c14b2f679..2dbe60c1db225d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6063,6 +6063,17 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/admin-guide/laptops/sonypi.rst + spectre_bhi= [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. Syscalls are hardened against BHI + reglardless of this setting. This setting affects the + deployment of the HW BHI control and the SW BHB + clearing sequence. + + on - unconditionally enable. + off - unconditionally disable. + auto - (default) enable only if hardware mitigation + control(BHI_DIS_S) is available. + spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. The default operation protects the kernel from diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4fff6ed46e902c..29a7b69a131662 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2633,6 +2633,31 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also +choice + prompt "Clear branch history" + depends on CPU_SUP_INTEL + default SPECTRE_BHI_AUTO + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See + +config SPECTRE_BHI_ON + bool "on" + help + Equivalent to setting spectre_bhi=on command line parameter. +config SPECTRE_BHI_OFF + bool "off" + help + Equivalent to setting spectre_bhi=off command line parameter. +config SPECTRE_BHI_AUTO + bool "auto" + help + Equivalent to setting spectre_bhi=auto command line parameter. + +endchoice + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8edd7a28869a8d..a2ee9a00e4a7ee 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -468,6 +468,7 @@ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e8c7146f9ed7cb..1ab27502988874 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1607,6 +1607,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, + BHI_MITIGATION_AUTO, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : + IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : + BHI_MITIGATION_AUTO; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else if (!strcmp(str, "auto")) + bhi_mitigation = BHI_MITIGATION_AUTO; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return; + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (bhi_mitigation == BHI_MITIGATION_AUTO) + return; + + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1718,6 +1786,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -2733,6 +2804,21 @@ static char *pbrsb_eibrs_state(void) } } +static const char * const spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return "; BHI: Retpoline"; + + return "; BHI: Vulnerable (Syscall hardening enabled)"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } -- cgit 1.2.3-korg From 95a6ccbdc7199a14b71ad8901cb788ba7fb5167b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:09 -0700 Subject: x86/bhi: Mitigate KVM by default BHI mitigation mode spectre_bhi=auto does not deploy the software mitigation by default. In a cloud environment, it is a likely scenario where userspace is trusted but the guests are not trusted. Deploying system wide mitigation in such cases is not desirable. Update the auto mode to unconditionally mitigate against malicious guests. Deploy the software sequence at VMexit in auto mode also, when hardware mitigation is not available. Unlike the force =on mode, software sequence is not deployed at syscalls in auto mode. Suggested-by: Alexandre Chartre Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf --- Documentation/admin-guide/hw-vuln/spectre.rst | 7 +++++-- Documentation/admin-guide/kernel-parameters.txt | 5 +++-- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 5 +++++ arch/x86/kernel/cpu/bugs.c | 9 ++++++++- arch/x86/kvm/vmx/vmenter.S | 2 +- 6 files changed, 23 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 7cb99b09827ce3..b70b1d8bd8e657 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -439,10 +439,12 @@ The possible values in this file are: - System is protected by retpoline * - BHI: BHI_DIS_S - System is protected by BHI_DIS_S - * - BHI: SW loop + * - BHI: SW loop; KVM SW loop - System is protected by software clearing sequence * - BHI: Syscall hardening - Syscalls are hardened against BHI + * - BHI: Syscall hardening; KVM: SW loop + - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will @@ -669,7 +671,8 @@ kernel command line. unconditionally disable. auto enable if hardware mitigation - control(BHI_DIS_S) is available. + control(BHI_DIS_S) is available, otherwise + enable alternate mitigation in KVM. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2dbe60c1db225d..4fa46302f4368c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6071,8 +6071,9 @@ on - unconditionally enable. off - unconditionally disable. - auto - (default) enable only if hardware mitigation - control(BHI_DIS_S) is available. + auto - (default) enable hardware mitigation + (BHI_DIS_S) if available, otherwise enable + alternate mitigation in KVM. spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a2ee9a00e4a7ee..3c7434329661c6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -469,6 +469,7 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index aea40204278b64..ff5f1ecc7d1e65 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -330,8 +330,13 @@ .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP .endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm #else #define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT #endif #else /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1ab27502988874..295463707e6818 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1668,9 +1668,14 @@ static void __init bhi_select_mitigation(void) if (!IS_ENABLED(CONFIG_X86_64)) return; + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + if (bhi_mitigation == BHI_MITIGATION_AUTO) return; + /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); } @@ -2811,10 +2816,12 @@ static const char * const spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) - return "; BHI: SW loop"; + return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Syscall hardening, KVM: SW loop"; return "; BHI: Vulnerable (Syscall hardening enabled)"; } diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0f3593e10c57d8..f6986dee6f8c7c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -275,7 +275,7 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host - CLEAR_BRANCH_HISTORY + CLEAR_BRANCH_HISTORY_VMEXIT /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX -- cgit 1.2.3-korg From ed2e8d49b54d677f3123668a21a57822d679651f Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 13 Mar 2024 09:49:17 -0700 Subject: KVM: x86: Add BHI_NO Intel processors that aren't vulnerable to BHI will set MSR_IA32_ARCH_CAPABILITIES[BHI_NO] = 1;. Guests may use this BHI_NO bit to determine if they need to implement BHI mitigations or not. Allow this bit to be passed to the guests. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47d9f03b777837..984ea2089efc31 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { -- cgit 1.2.3-korg