aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2017-11-30 13:40:10 +0100
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2019-07-11 17:28:19 +0200
commita1b268eb96731f1efdb51bd084b7abadb549cdbc (patch)
tree74f325369ac5eac9d666158fd485bbbf53155f9e
parentd856a3de0de252dbea548eedb5e77c44d2d8eb04 (diff)
downloadlinux-rt-devel-a1b268eb96731f1efdb51bd084b7abadb549cdbc.tar.gz
crypto: limit more FPU-enabled sections
Those crypto drivers use SSE/AVX/… for their crypto work and in order to do so in kernel they need to enable the "FPU" in kernel mode which disables preemption. There are two problems with the way they are used: - the while loop which processes X bytes may create latency spikes and should be avoided or limited. - the cipher-walk-next part may allocate/free memory and may use kmap_atomic(). The whole kernel_fpu_begin()/end() processing isn't probably that cheap. It most likely makes sense to process as much of those as possible in one go. The new *_fpu_sched_rt() schedules only if a RT task is pending. Probably we should measure the performance those ciphers in pure SW mode and with this optimisations to see if it makes sense to keep them for RT. This kernel_fpu_resched() makes the code more preemptible which might hurt performance. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--arch/x86/crypto/chacha_glue.c11
-rw-r--r--arch/x86/include/asm/fpu/api.h1
-rw-r--r--arch/x86/kernel/fpu/core.c12
3 files changed, 15 insertions, 9 deletions
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 1ce0019c059c39..0e5f93c87fe0d6 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -127,7 +127,6 @@ static int chacha_simd_stream_xor(struct skcipher_walk *walk,
struct chacha_ctx *ctx, u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
- int next_yield = 4096; /* bytes until next FPU yield */
int err = 0;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
@@ -140,20 +139,14 @@ static int chacha_simd_stream_xor(struct skcipher_walk *walk,
if (nbytes < walk->total) {
nbytes = round_down(nbytes, walk->stride);
- next_yield -= nbytes;
}
chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
nbytes, ctx->nrounds);
- if (next_yield <= 0) {
- /* temporarily allow preemption */
- kernel_fpu_end();
- kernel_fpu_begin();
- next_yield = 4096;
- }
-
+ kernel_fpu_end();
err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+ kernel_fpu_begin();
}
return err;
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b774c52e5411fc..1786e4d9f70e8f 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -23,6 +23,7 @@ extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
extern void fpregs_mark_activate(void);
+extern void kernel_fpu_resched(void);
/*
* Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 649fbc3fcf9f52..c47d2b6a118ea4 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -134,6 +134,18 @@ void kernel_fpu_end(void)
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
+void kernel_fpu_resched(void)
+{
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ if (should_resched(PREEMPT_OFFSET)) {
+ kernel_fpu_end();
+ cond_resched();
+ kernel_fpu_begin();
+ }
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_resched);
+
/*
* Save the FPU state (mark it for reload if necessary):
*