aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/fpu/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/fpu/core.c')
-rw-r--r--arch/x86/kernel/fpu/core.c392
1 files changed, 315 insertions, 77 deletions
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 7ada7bd03a3272..8ea306b1bf8ec1 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -6,8 +6,9 @@
* General FPU state handling cleanups
* Gareth Hughes <gareth@valinux.com>, May 2000
*/
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
+#include <asm/fpu/sched.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
#include <asm/traps.h>
@@ -15,15 +16,30 @@
#include <linux/hardirq.h>
#include <linux/pkeys.h>
+#include <linux/vmalloc.h>
+
+#include "context.h"
+#include "internal.h"
+#include "legacy.h"
+#include "xstate.h"
#define CREATE_TRACE_POINTS
#include <asm/trace/fpu.h>
+#ifdef CONFIG_X86_64
+DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+DEFINE_PER_CPU(u64, xfd_state);
+#endif
+
+/* The FPU state configuration data for kernel and user space */
+struct fpu_state_config fpu_kernel_cfg __ro_after_init;
+struct fpu_state_config fpu_user_cfg __ro_after_init;
+
/*
* Represents the initial FPU state. It's mostly (but not completely) zeroes,
* depending on the FPU hardware format:
*/
-union fpregs_state init_fpstate __ro_after_init;
+struct fpstate init_fpstate __ro_after_init;
/*
* Track whether the kernel is using the FPU state
@@ -83,7 +99,7 @@ bool irq_fpu_usable(void)
EXPORT_SYMBOL(irq_fpu_usable);
/*
- * Save the FPU register state in fpu->state. The register state is
+ * Save the FPU register state in fpu->fpstate->regs. The register state is
* preserved.
*
* Must be called with fpregs_lock() held.
@@ -99,19 +115,19 @@ EXPORT_SYMBOL(irq_fpu_usable);
void save_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
- os_xsave(&fpu->state.xsave);
+ os_xsave(fpu->fpstate);
/*
* AVX512 state is tracked here because its use is
* known to slow the max clock speed of the core.
*/
- if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+ if (fpu->fpstate->regs.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
fpu->avx512_timestamp = jiffies;
return;
}
if (likely(use_fxsr())) {
- fxsave(&fpu->state.fxsave);
+ fxsave(&fpu->fpstate->regs.fxsave);
return;
}
@@ -119,12 +135,11 @@ void save_fpregs_to_fpstate(struct fpu *fpu)
* Legacy FPU register saving, FNSAVE always clears FPU registers,
* so we have to reload them from the memory state.
*/
- asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
- frstor(&fpu->state.fsave);
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
+ frstor(&fpu->fpstate->regs.fsave);
}
-EXPORT_SYMBOL(save_fpregs_to_fpstate);
-void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
+void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
{
/*
* AMD K7/K8 and later CPUs up to Zen don't save/restore
@@ -141,15 +156,181 @@ void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
}
if (use_xsave()) {
- os_xrstor(&fpstate->xsave, mask);
+ /*
+ * Dynamically enabled features are enabled in XCR0, but
+ * usage requires also that the corresponding bits in XFD
+ * are cleared. If the bits are set then using a related
+ * instruction will raise #NM. This allows to do the
+ * allocation of the larger FPU buffer lazy from #NM or if
+ * the task has no permission to kill it which would happen
+ * via #UD if the feature is disabled in XCR0.
+ *
+ * XFD state is following the same life time rules as
+ * XSTATE and to restore state correctly XFD has to be
+ * updated before XRSTORS otherwise the component would
+ * stay in or go into init state even if the bits are set
+ * in fpstate::regs::xsave::xfeatures.
+ */
+ xfd_update_state(fpstate);
+
+ /*
+ * Restoring state always needs to modify all features
+ * which are in @mask even if the current task cannot use
+ * extended features.
+ *
+ * So fpstate->xfeatures cannot be used here, because then
+ * a feature for which the task has no permission but was
+ * used by the previous task would not go into init state.
+ */
+ mask = fpu_kernel_cfg.max_features & mask;
+
+ os_xrstor(fpstate, mask);
} else {
if (use_fxsr())
- fxrstor(&fpstate->fxsave);
+ fxrstor(&fpstate->regs.fxsave);
else
- frstor(&fpstate->fsave);
+ frstor(&fpstate->regs.fsave);
}
}
-EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate);
+
+void fpu_reset_from_exception_fixup(void)
+{
+ restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);
+}
+
+#if IS_ENABLED(CONFIG_KVM)
+static void __fpstate_reset(struct fpstate *fpstate);
+
+bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
+{
+ struct fpstate *fpstate;
+ unsigned int size;
+
+ size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
+ fpstate = vzalloc(size);
+ if (!fpstate)
+ return false;
+
+ __fpstate_reset(fpstate);
+ fpstate_init_user(fpstate);
+ fpstate->is_valloc = true;
+ fpstate->is_guest = true;
+
+ gfpu->fpstate = fpstate;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
+
+void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
+{
+ struct fpstate *fps = gfpu->fpstate;
+
+ if (!fps)
+ return;
+
+ if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
+ return;
+
+ gfpu->fpstate = NULL;
+ vfree(fps);
+}
+EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
+
+int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
+{
+ struct fpstate *guest_fps = guest_fpu->fpstate;
+ struct fpu *fpu = &current->thread.fpu;
+ struct fpstate *cur_fps = fpu->fpstate;
+
+ fpregs_lock();
+ if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))
+ save_fpregs_to_fpstate(fpu);
+
+ /* Swap fpstate */
+ if (enter_guest) {
+ fpu->__task_fpstate = cur_fps;
+ fpu->fpstate = guest_fps;
+ guest_fps->in_use = true;
+ } else {
+ guest_fps->in_use = false;
+ fpu->fpstate = fpu->__task_fpstate;
+ fpu->__task_fpstate = NULL;
+ }
+
+ cur_fps = fpu->fpstate;
+
+ if (!cur_fps->is_confidential) {
+ /* Includes XFD update */
+ restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);
+ } else {
+ /*
+ * XSTATE is restored by firmware from encrypted
+ * memory. Make sure XFD state is correct while
+ * running with guest fpstate
+ */
+ xfd_update_state(cur_fps);
+ }
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
+
+void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u32 pkru)
+{
+ struct fpstate *kstate = gfpu->fpstate;
+ union fpregs_state *ustate = buf;
+ struct membuf mb = { .p = buf, .left = size };
+
+ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+ __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
+ } else {
+ memcpy(&ustate->fxsave, &kstate->regs.fxsave,
+ sizeof(ustate->fxsave));
+ /* Make it restorable on a XSAVE enabled host */
+ ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
+ }
+}
+EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);
+
+int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
+ u64 xcr0, u32 *vpkru)
+{
+ struct fpstate *kstate = gfpu->fpstate;
+ const union fpregs_state *ustate = buf;
+ struct pkru_state *xpkru;
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+ if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
+ return -EINVAL;
+ if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
+ return -EINVAL;
+ memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
+ return 0;
+ }
+
+ if (ustate->xsave.header.xfeatures & ~xcr0)
+ return -EINVAL;
+
+ ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
+ if (ret)
+ return ret;
+
+ /* Retrieve PKRU if not in init state */
+ if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
+ xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
+ *vpkru = xpkru->pkru;
+ }
+
+ /* Ensure that XCOMP_BV is set up for XSAVES */
+ xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
+#endif /* CONFIG_KVM */
void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
@@ -203,52 +384,88 @@ void fpu_sync_fpstate(struct fpu *fpu)
fpregs_unlock();
}
-static inline void fpstate_init_xstate(struct xregs_state *xsave)
+static inline unsigned int init_fpstate_copy_size(void)
{
- /*
- * XRSTORS requires these bits set in xcomp_bv, or it will
- * trigger #GP:
- */
- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
+ if (!use_xsave())
+ return fpu_kernel_cfg.default_size;
+
+ /* XSAVE(S) just needs the legacy and the xstate header part */
+ return sizeof(init_fpstate.regs.xsave);
}
-static inline void fpstate_init_fxstate(struct fxregs_state *fx)
+static inline void fpstate_init_fxstate(struct fpstate *fpstate)
{
- fx->cwd = 0x37f;
- fx->mxcsr = MXCSR_DEFAULT;
+ fpstate->regs.fxsave.cwd = 0x37f;
+ fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
}
/*
* Legacy x87 fpstate state init:
*/
-static inline void fpstate_init_fstate(struct fregs_state *fp)
+static inline void fpstate_init_fstate(struct fpstate *fpstate)
{
- fp->cwd = 0xffff037fu;
- fp->swd = 0xffff0000u;
- fp->twd = 0xffffffffu;
- fp->fos = 0xffff0000u;
+ fpstate->regs.fsave.cwd = 0xffff037fu;
+ fpstate->regs.fsave.swd = 0xffff0000u;
+ fpstate->regs.fsave.twd = 0xffffffffu;
+ fpstate->regs.fsave.fos = 0xffff0000u;
}
-void fpstate_init(union fpregs_state *state)
+/*
+ * Used in two places:
+ * 1) Early boot to setup init_fpstate for non XSAVE systems
+ * 2) fpu_init_fpstate_user() which is invoked from KVM
+ */
+void fpstate_init_user(struct fpstate *fpstate)
{
- if (!static_cpu_has(X86_FEATURE_FPU)) {
- fpstate_init_soft(&state->soft);
+ if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
+ fpstate_init_soft(&fpstate->regs.soft);
return;
}
- memset(state, 0, fpu_kernel_xstate_size);
+ xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);
- if (static_cpu_has(X86_FEATURE_XSAVES))
- fpstate_init_xstate(&state->xsave);
- if (static_cpu_has(X86_FEATURE_FXSR))
- fpstate_init_fxstate(&state->fxsave);
+ if (cpu_feature_enabled(X86_FEATURE_FXSR))
+ fpstate_init_fxstate(fpstate);
else
- fpstate_init_fstate(&state->fsave);
+ fpstate_init_fstate(fpstate);
+}
+
+static void __fpstate_reset(struct fpstate *fpstate)
+{
+ /* Initialize sizes and feature masks */
+ fpstate->size = fpu_kernel_cfg.default_size;
+ fpstate->user_size = fpu_user_cfg.default_size;
+ fpstate->xfeatures = fpu_kernel_cfg.default_features;
+ fpstate->user_xfeatures = fpu_user_cfg.default_features;
+ fpstate->xfd = init_fpstate.xfd;
+}
+
+void fpstate_reset(struct fpu *fpu)
+{
+ /* Set the fpstate pointer to the default fpstate */
+ fpu->fpstate = &fpu->__fpstate;
+ __fpstate_reset(fpu->fpstate);
+
+ /* Initialize the permission related info in fpu */
+ fpu->perm.__state_perm = fpu_kernel_cfg.default_features;
+ fpu->perm.__state_size = fpu_kernel_cfg.default_size;
+ fpu->perm.__user_state_size = fpu_user_cfg.default_size;
+}
+
+static inline void fpu_inherit_perms(struct fpu *dst_fpu)
+{
+ if (fpu_state_size_dynamic()) {
+ struct fpu *src_fpu = &current->group_leader->thread.fpu;
+
+ spin_lock_irq(&current->sighand->siglock);
+ /* Fork also inherits the permissions of the parent */
+ dst_fpu->perm = src_fpu->perm;
+ spin_unlock_irq(&current->sighand->siglock);
+ }
}
-EXPORT_SYMBOL_GPL(fpstate_init);
/* Clone current's FPU state on fork */
-int fpu_clone(struct task_struct *dst)
+int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
{
struct fpu *src_fpu = &current->thread.fpu;
struct fpu *dst_fpu = &dst->thread.fpu;
@@ -256,30 +473,51 @@ int fpu_clone(struct task_struct *dst)
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
+ fpstate_reset(dst_fpu);
+
if (!cpu_feature_enabled(X86_FEATURE_FPU))
return 0;
/*
- * Don't let 'init optimized' areas of the XSAVE area
- * leak into the child task:
+ * Enforce reload for user space tasks and prevent kernel threads
+ * from trying to save the FPU registers on context switch.
+ */
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
+
+ /*
+ * No FPU state inheritance for kernel threads and IO
+ * worker threads.
+ */
+ if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
+ /* Clear out the minimal state */
+ memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
+ init_fpstate_copy_size());
+ return 0;
+ }
+
+ /*
+ * If a new feature is added, ensure all dynamic features are
+ * caller-saved from here!
*/
- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+ BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
/*
- * If the FPU registers are not owned by current just memcpy() the
- * state. Otherwise save the FPU registers directly into the
- * child's FPU context, without any memory-to-memory copying.
+ * Save the default portion of the current FPU state into the
+ * clone. Assume all dynamic features to be defined as caller-
+ * saved, which enables skipping both the expansion of fpstate
+ * and the copying of any dynamic state.
+ *
+ * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because
+ * copying is not valid when current uses non-default states.
*/
fpregs_lock();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
- memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
-
- else
- save_fpregs_to_fpstate(dst_fpu);
+ fpregs_restore_userregs();
+ save_fpregs_to_fpstate(dst_fpu);
+ if (!(clone_flags & CLONE_THREAD))
+ fpu_inherit_perms(dst_fpu);
fpregs_unlock();
- set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
-
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -287,6 +525,16 @@ int fpu_clone(struct task_struct *dst)
}
/*
+ * Whitelist the FPU register state embedded into task_struct for hardened
+ * usercopy.
+ */
+void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+ *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
+ *size = fpu_kernel_cfg.default_size;
+}
+
+/*
* Drops current FPU state: deactivates the fpregs and
* the fpstate. NOTE: it still leaves previous contents
* in the fpregs in the eager-FPU case.
@@ -319,28 +567,19 @@ void fpu__drop(struct fpu *fpu)
static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
{
if (use_xsave())
- os_xrstor(&init_fpstate.xsave, features_mask);
+ os_xrstor(&init_fpstate, features_mask);
else if (use_fxsr())
- fxrstor(&init_fpstate.fxsave);
+ fxrstor(&init_fpstate.regs.fxsave);
else
- frstor(&init_fpstate.fsave);
+ frstor(&init_fpstate.regs.fsave);
pkru_write_default();
}
-static inline unsigned int init_fpstate_copy_size(void)
-{
- if (!use_xsave())
- return fpu_kernel_xstate_size;
-
- /* XSAVE(S) just needs the legacy and the xstate header part */
- return sizeof(init_fpstate.xsave);
-}
-
/*
* Reset current->fpu memory state to the init values.
*/
-static void fpu_reset_fpstate(void)
+static void fpu_reset_fpregs(void)
{
struct fpu *fpu = &current->thread.fpu;
@@ -359,7 +598,7 @@ static void fpu_reset_fpstate(void)
* user space as PKRU is eagerly written in switch_to() and
* flush_thread().
*/
- memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size());
+ memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
set_thread_flag(TIF_NEED_FPU_LOAD);
fpregs_unlock();
}
@@ -375,7 +614,7 @@ void fpu__clear_user_states(struct fpu *fpu)
fpregs_lock();
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
- fpu_reset_fpstate();
+ fpu_reset_fpregs();
fpregs_unlock();
return;
}
@@ -385,12 +624,11 @@ void fpu__clear_user_states(struct fpu *fpu)
* corresponding registers.
*/
if (xfeatures_mask_supervisor() &&
- !fpregs_state_valid(fpu, smp_processor_id())) {
- os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
- }
+ !fpregs_state_valid(fpu, smp_processor_id()))
+ os_xrstor_supervisor(fpu->fpstate);
/* Reset user states in registers. */
- restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user());
+ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
/*
* Now all FPU registers have their desired values. Inform the FPU
@@ -405,7 +643,8 @@ void fpu__clear_user_states(struct fpu *fpu)
void fpu_flush_thread(void)
{
- fpu_reset_fpstate();
+ fpstate_reset(&current->thread.fpu);
+ fpu_reset_fpregs();
}
/*
* Load FPU context before returning to userspace.
@@ -445,7 +684,6 @@ void fpregs_mark_activate(void)
fpu->last_cpu = smp_processor_id();
clear_thread_flag(TIF_NEED_FPU_LOAD);
}
-EXPORT_SYMBOL_GPL(fpregs_mark_activate);
/*
* x87 math exception handling:
@@ -468,11 +706,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
* fully reproduce the context of the exception.
*/
if (boot_cpu_has(X86_FEATURE_FXSR)) {
- cwd = fpu->state.fxsave.cwd;
- swd = fpu->state.fxsave.swd;
+ cwd = fpu->fpstate->regs.fxsave.cwd;
+ swd = fpu->fpstate->regs.fxsave.swd;
} else {
- cwd = (unsigned short)fpu->state.fsave.cwd;
- swd = (unsigned short)fpu->state.fsave.swd;
+ cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
+ swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
}
err = swd & ~cwd;
@@ -486,7 +724,7 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
unsigned short mxcsr = MXCSR_DEFAULT;
if (boot_cpu_has(X86_FEATURE_XMM))
- mxcsr = fpu->state.fxsave.mxcsr;
+ mxcsr = fpu->fpstate->regs.fxsave.mxcsr;
err = ~(mxcsr >> 7) & mxcsr;
}