From 9bafaa9375cbf892033f188d8cb624ae328754b5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 25 Dec 2021 01:50:07 +0100 Subject: MAINTAINERS: add git tree for random.c This is handy not just for humans, but also so that the 0-day bot can automatically test posted mailing list patches against the right tree. Signed-off-by: Jason A. Donenfeld --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index dd36acc87ce627..42a62d47c37499 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15989,6 +15989,7 @@ F: arch/mips/generic/board-ranchu.c RANDOM NUMBER DRIVER M: "Theodore Ts'o" M: Jason A. Donenfeld +T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git S: Maintained F: drivers/char/random.c -- cgit 1.2.3-korg From 2b6c6e3d9ce3aa0e547ac25d60e06fe035cd9f79 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Dec 2021 17:44:49 +0000 Subject: random: document add_hwgenerator_randomness() with other input functions The section at the top of random.c which documents the input functions available does not document add_hwgenerator_randomness() which might lead a reader to overlook it. Add a brief note about it. Signed-off-by: Mark Brown [Jason: reorganize position of function in doc comment and also document add_bootloader_randomness() while we're at it.] Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 605969ed0f965c..c81485e2f12642 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -202,6 +202,9 @@ * unsigned int value); * void add_interrupt_randomness(int irq, int irq_flags); * void add_disk_randomness(struct gendisk *disk); + * void add_hwgenerator_randomness(const char *buffer, size_t count, + * size_t entropy); + * void add_bootloader_randomness(const void *buf, unsigned int size); * * add_device_randomness() is for adding data to the random pool that * is likely to differ between two devices (or possibly even per boot). @@ -228,6 +231,14 @@ * particular randomness source. They do this by keeping track of the * first and second order deltas of the event timings. * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or + * add_device_randomness(), depending on whether or not the configuration + * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * * Ensuring unpredictability at system startup * ============================================ * -- cgit 1.2.3-korg From 703f7066f40599c290babdb79dd61319264987e9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Dec 2021 13:17:33 +0100 Subject: random: remove unused irq_flags argument from add_interrupt_randomness() Since commit ee3e00e9e7101 ("random: use registers from interrupted code for CPU's w/o a cycle counter") the irq_flags argument is no longer used. Remove unused irq_flags. Cc: Borislav Petkov Cc: Dave Hansen Cc: Dexuan Cui Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: Ingo Molnar Cc: K. Y. Srinivasan Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Wei Liu Cc: linux-hyperv@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Wei Liu Signed-off-by: Jason A. Donenfeld --- arch/x86/kernel/cpu/mshyperv.c | 2 +- drivers/char/random.c | 4 ++-- drivers/hv/vmbus_drv.c | 2 +- include/linux/random.h | 2 +- kernel/irq/handle.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index ff55df60228f72..2a0f8367891183 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -79,7 +79,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); - add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR); ack_APIC_irq(); set_irq_regs(old_regs); diff --git a/drivers/char/random.c b/drivers/char/random.c index c81485e2f12642..4b0753eb8bfb84 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -200,7 +200,7 @@ * void add_device_randomness(const void *buf, unsigned int size); * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); - * void add_interrupt_randomness(int irq, int irq_flags); + * void add_interrupt_randomness(int irq); * void add_disk_randomness(struct gendisk *disk); * void add_hwgenerator_randomness(const char *buffer, size_t count, * size_t entropy); @@ -1253,7 +1253,7 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *ptr; } -void add_interrupt_randomness(int irq, int irq_flags) +void add_interrupt_randomness(int irq) { struct entropy_store *r; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 392c1ac4f8193b..7ae04ccb104388 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1381,7 +1381,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } - add_interrupt_randomness(vmbus_interrupt, 0); + add_interrupt_randomness(vmbus_interrupt); } static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) diff --git a/include/linux/random.h b/include/linux/random.h index f45b8be3e3c4e0..c45b2693e51fb8 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; +extern void add_interrupt_randomness(int irq) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 27182003b879cc..68c76c3caaf558 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -197,7 +197,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) retval = __handle_irq_event_percpu(desc, &flags); - add_interrupt_randomness(desc->irq_data.irq, flags); + add_interrupt_randomness(desc->irq_data.irq); if (!irq_settings_no_debug(desc)) note_interrupt(desc, retval); -- cgit 1.2.3-korg From 5320eb42dec7a7ef3ab7da3c5c0d7f889a5181e5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Dec 2021 13:17:34 +0100 Subject: irq: remove unused flags argument from __handle_irq_event_percpu() The __IRQF_TIMER bit from the flags argument was used in add_interrupt_randomness() to distinguish the timer interrupt from other interrupts. This is no longer the case. Remove the flags argument from __handle_irq_event_percpu(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jason A. Donenfeld --- kernel/irq/chip.c | 4 +--- kernel/irq/handle.c | 9 ++------- kernel/irq/internals.h | 2 +- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f895265d754870..c0932466308829 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -575,8 +575,6 @@ EXPORT_SYMBOL_GPL(handle_simple_irq); */ void handle_untracked_irq(struct irq_desc *desc) { - unsigned int flags = 0; - raw_spin_lock(&desc->lock); if (!irq_may_run(desc)) @@ -593,7 +591,7 @@ void handle_untracked_irq(struct irq_desc *desc) irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); raw_spin_unlock(&desc->lock); - __handle_irq_event_percpu(desc, &flags); + __handle_irq_event_percpu(desc); raw_spin_lock(&desc->lock); irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 68c76c3caaf558..9489f93b3db344 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -136,7 +136,7 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } -irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags) +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval = IRQ_NONE; unsigned int irq = desc->irq_data.irq; @@ -174,10 +174,6 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags } __irq_wake_thread(desc, action); - - fallthrough; /* to add to randomness */ - case IRQ_HANDLED: - *flags |= action->flags; break; default: @@ -193,9 +189,8 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval; - unsigned int flags = 0; - retval = __handle_irq_event_percpu(desc, &flags); + retval = __handle_irq_event_percpu(desc); add_interrupt_randomness(desc->irq_data.irq); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 54363527feea4d..99cbdf55a8bda0 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -103,7 +103,7 @@ extern int __irq_get_irqchip_state(struct irq_data *data, extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); -irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event(struct irq_desc *desc); -- cgit 1.2.3-korg From 5d73d1e320c3fd94ea15ba5f79301da9a8bcc7de Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Dec 2021 16:41:56 -0600 Subject: random: fix data race on crng_node_pool extract_crng() and crng_backtrack_protect() load crng_node_pool with a plain load, which causes undefined behavior if do_numa_crng_init() modifies it concurrently. Fix this by using READ_ONCE(). Note: as per the previous discussion https://lore.kernel.org/lkml/20211219025139.31085-1-ebiggers@kernel.org/T/#u, READ_ONCE() is believed to be sufficient here, and it was requested that it be used here instead of smp_load_acquire(). Also change do_numa_crng_init() to set crng_node_pool using cmpxchg_release() instead of mb() + cmpxchg(), as the former is sufficient here but is more lightweight. Fixes: 1e7f583af67b ("random: make /dev/urandom scalable for silly userspace programs") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Paul E. McKenney Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4b0753eb8bfb84..5e3046606640c8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -854,8 +854,8 @@ static void do_numa_crng_init(struct work_struct *work) crng_initialize_secondary(crng); pool[i] = crng; } - mb(); - if (cmpxchg(&crng_node_pool, NULL, pool)) { + /* pairs with READ_ONCE() in select_crng() */ + if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) { for_each_node(i) kfree(pool[i]); kfree(pool); @@ -868,8 +868,26 @@ static void numa_crng_init(void) { schedule_work(&numa_crng_init_work); } + +static struct crng_state *select_crng(void) +{ + struct crng_state **pool; + int nid = numa_node_id(); + + /* pairs with cmpxchg_release() in do_numa_crng_init() */ + pool = READ_ONCE(crng_node_pool); + if (pool && pool[nid]) + return pool[nid]; + + return &primary_crng; +} #else static void numa_crng_init(void) {} + +static struct crng_state *select_crng(void) +{ + return &primary_crng; +} #endif /* @@ -1016,15 +1034,7 @@ static void _extract_crng(struct crng_state *crng, static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) { - struct crng_state *crng = NULL; - -#ifdef CONFIG_NUMA - if (crng_node_pool) - crng = crng_node_pool[numa_node_id()]; - if (crng == NULL) -#endif - crng = &primary_crng; - _extract_crng(crng, out); + _extract_crng(select_crng(), out); } /* @@ -1053,15 +1063,7 @@ static void _crng_backtrack_protect(struct crng_state *crng, static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) { - struct crng_state *crng = NULL; - -#ifdef CONFIG_NUMA - if (crng_node_pool) - crng = crng_node_pool[numa_node_id()]; - if (crng == NULL) -#endif - crng = &primary_crng; - _crng_backtrack_protect(crng, tmp, used); + _crng_backtrack_protect(select_crng(), tmp, used); } static ssize_t extract_crng_user(void __user *buf, size_t nbytes) -- cgit 1.2.3-korg From 009ba8568be497c640cab7571f7bfd18345d7b24 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Dec 2021 16:41:57 -0600 Subject: random: fix data race on crng init time _extract_crng() does plain loads of crng->init_time and crng_global_init_time, which causes undefined behavior if crng_reseed() and RNDRESEEDCRNG modify these corrently. Use READ_ONCE() and WRITE_ONCE() to make the behavior defined. Don't fix the race on crng->init_time by protecting it with crng->lock, since it's not a problem for duplicate reseedings to occur. I.e., the lockless access with READ_ONCE() is fine. Fixes: d848e5f8e1eb ("random: add new ioctl RNDRESEEDCRNG") Fixes: e192be9d9a30 ("random: replace non-blocking pool with a Chacha20-based CRNG") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Paul E. McKenney Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5e3046606640c8..13c968b950c588 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -991,7 +991,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) crng->state[i+4] ^= buf.key[i] ^ rv; } memzero_explicit(&buf, sizeof(buf)); - crng->init_time = jiffies; + WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); if (crng == &primary_crng && crng_init < 2) { invalidate_batched_entropy(); @@ -1017,12 +1017,15 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]) { - unsigned long v, flags; + unsigned long v, flags, init_time; - if (crng_ready() && - (time_after(crng_global_init_time, crng->init_time) || - time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))) - crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); + if (crng_ready()) { + init_time = READ_ONCE(crng->init_time); + if (time_after(READ_ONCE(crng_global_init_time), init_time) || + time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) + crng_reseed(crng, crng == &primary_crng ? + &input_pool : NULL); + } spin_lock_irqsave(&crng->lock, flags); if (arch_get_random_long(&v)) crng->state[14] ^= v; @@ -1962,7 +1965,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) if (crng_init < 2) return -ENODATA; crng_reseed(&primary_crng, &input_pool); - crng_global_init_time = jiffies - 1; + WRITE_ONCE(crng_global_init_time, jiffies - 1); return 0; default: return -EINVAL; -- cgit 1.2.3-korg From 6048fdcc5f269c7f31d774c295ce59081b36e6f9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 22 Dec 2021 14:56:58 +0100 Subject: lib/crypto: blake2s: include as built-in In preparation for using blake2s in the RNG, we change the way that it is wired-in to the build system. Instead of using ifdefs to select the right symbol, we use weak symbols. And because ARM doesn't need the generic implementation, we make the generic one default only if an arch library doesn't need it already, and then have arch libraries that do need it opt-in. So that the arch libraries can remain tristate rather than bool, we then split the shash part from the glue code. Acked-by: Herbert Xu Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Jason A. Donenfeld --- arch/arm/crypto/Makefile | 4 +- arch/arm/crypto/blake2s-core.S | 8 ++-- arch/arm/crypto/blake2s-glue.c | 73 +------------------------------------ arch/arm/crypto/blake2s-shash.c | 75 ++++++++++++++++++++++++++++++++++++++ arch/x86/crypto/Makefile | 4 +- arch/x86/crypto/blake2s-glue.c | 68 +++------------------------------- arch/x86/crypto/blake2s-shash.c | 77 +++++++++++++++++++++++++++++++++++++++ crypto/Kconfig | 3 +- drivers/net/Kconfig | 1 - include/crypto/internal/blake2s.h | 6 +-- lib/crypto/Kconfig | 23 +++--------- lib/crypto/Makefile | 9 ++--- lib/crypto/blake2s-generic.c | 6 ++- lib/crypto/blake2s.c | 6 --- 14 files changed, 189 insertions(+), 174 deletions(-) create mode 100644 arch/arm/crypto/blake2s-shash.c create mode 100644 arch/x86/crypto/blake2s-shash.c diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index eafa898ba6a737..0274f81cc8ea0e 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o +obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o @@ -31,7 +32,8 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) -blake2s-arm-y := blake2s-core.o blake2s-glue.o +blake2s-arm-y := blake2s-shash.o +libblake2s-arm-y:= blake2s-core.o blake2s-glue.o blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S index 86345751bbf3a3..df40e46601f100 100644 --- a/arch/arm/crypto/blake2s-core.S +++ b/arch/arm/crypto/blake2s-core.S @@ -167,8 +167,8 @@ .endm // -// void blake2s_compress_arch(struct blake2s_state *state, -// const u8 *block, size_t nblocks, u32 inc); +// void blake2s_compress(struct blake2s_state *state, +// const u8 *block, size_t nblocks, u32 inc); // // Only the first three fields of struct blake2s_state are used: // u32 h[8]; (inout) @@ -176,7 +176,7 @@ // u32 f[2]; (in) // .align 5 -ENTRY(blake2s_compress_arch) +ENTRY(blake2s_compress) push {r0-r2,r4-r11,lr} // keep this an even number .Lnext_block: @@ -303,4 +303,4 @@ ENTRY(blake2s_compress_arch) str r3, [r12], #4 bne 1b b .Lcopy_block_done -ENDPROC(blake2s_compress_arch) +ENDPROC(blake2s_compress) diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/crypto/blake2s-glue.c index f2cc1e5fc9ec18..0238a70d9581e9 100644 --- a/arch/arm/crypto/blake2s-glue.c +++ b/arch/arm/crypto/blake2s-glue.c @@ -1,78 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * BLAKE2s digest algorithm, ARM scalar implementation - * - * Copyright 2020 Google LLC - */ #include -#include - #include /* defined in blake2s-core.S */ -EXPORT_SYMBOL(blake2s_compress_arch); - -static int crypto_blake2s_update_arm(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); -} - -static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, blake2s_compress_arch); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_arm, \ - .final = crypto_blake2s_final_arm, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_arm_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), -}; - -static int __init blake2s_arm_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)) : 0; -} - -static void __exit blake2s_arm_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) - crypto_unregister_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)); -} - -module_init(blake2s_arm_mod_init); -module_exit(blake2s_arm_mod_exit); - -MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-arm"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-arm"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-arm"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-arm"); +EXPORT_SYMBOL(blake2s_compress); diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c new file mode 100644 index 00000000000000..17c1c3bfe2f505 --- /dev/null +++ b/arch/arm/crypto/blake2s-shash.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BLAKE2s digest algorithm, ARM scalar implementation + * + * Copyright 2020 Google LLC + */ + +#include +#include + +#include + +static int crypto_blake2s_update_arm(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, blake2s_compress); +} + +static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, blake2s_compress); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_arm, \ + .final = crypto_blake2s_final_arm, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_arm_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_arm_mod_init(void) +{ + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? + crypto_register_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)) : 0; +} + +static void __exit blake2s_arm_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + crypto_unregister_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)); +} + +module_init(blake2s_arm_mod_init); +module_exit(blake2s_arm_mod_exit); + +MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-arm"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-arm"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-arm"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-arm"); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index f307c93fc90a7a..c3af959648e620 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -62,7 +62,9 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o -blake2s-x86_64-y := blake2s-core.o blake2s-glue.o +blake2s-x86_64-y := blake2s-shash.o +obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o +libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index a40365ab301eef..69853c13e8fb0a 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -28,9 +27,8 @@ asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); -void blake2s_compress_arch(struct blake2s_state *state, - const u8 *block, size_t nblocks, - const u32 inc) +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) { /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); @@ -56,49 +54,12 @@ void blake2s_compress_arch(struct blake2s_state *state, block += blocks * BLAKE2S_BLOCK_SIZE; } while (nblocks); } -EXPORT_SYMBOL(blake2s_compress_arch); - -static int crypto_blake2s_update_x86(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); -} - -static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, blake2s_compress_arch); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_x86, \ - .final = crypto_blake2s_final_x86, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), -}; +EXPORT_SYMBOL(blake2s_compress); static int __init blake2s_mod_init(void) { - if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return 0; - - static_branch_enable(&blake2s_use_ssse3); + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && @@ -109,26 +70,9 @@ static int __init blake2s_mod_init(void) XFEATURE_MASK_AVX512, NULL)) static_branch_enable(&blake2s_use_avx512); - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_algs, - ARRAY_SIZE(blake2s_algs)) : 0; -} - -static void __exit blake2s_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) - crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); + return 0; } module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-x86"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-x86"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-x86"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-x86"); MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c new file mode 100644 index 00000000000000..f9e2fecdb761b8 --- /dev/null +++ b/arch/x86/crypto/blake2s-shash.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +static int crypto_blake2s_update_x86(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, blake2s_compress); +} + +static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, blake2s_compress); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_x86, \ + .final = crypto_blake2s_final_x86, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_mod_init(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); + return 0; +} + +static void __exit blake2s_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); +} + +module_init(blake2s_mod_init); +module_exit(blake2s_mod_exit); + +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-x86"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-x86"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-x86"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-x86"); +MODULE_LICENSE("GPL v2"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 285f82647d2b76..55718de561375e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1919,9 +1919,10 @@ config CRYPTO_STATS config CRYPTO_HASH_INFO bool -source "lib/crypto/Kconfig" source "drivers/crypto/Kconfig" source "crypto/asymmetric_keys/Kconfig" source "certs/Kconfig" endif # if CRYPTO + +source "lib/crypto/Kconfig" diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 6cccc3dc00bcf3..b2a4f998c180e9 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -81,7 +81,6 @@ config WIREGUARD select CRYPTO select CRYPTO_LIB_CURVE25519 select CRYPTO_LIB_CHACHA20POLY1305 - select CRYPTO_LIB_BLAKE2S select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT select CRYPTO_POLY1305_X86_64 if X86 && 64BIT select CRYPTO_BLAKE2S_X86 if X86 && 64BIT diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 8e50d487500f2c..d39cfa0d333e92 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -11,11 +11,11 @@ #include #include -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc); -void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, - size_t nblocks, const u32 inc); +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc); bool blake2s_selftest(void); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 545ccbddf6a1da..8620f38e117c07 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -comment "Crypto library routines" - config CRYPTO_LIB_AES tristate @@ -9,14 +7,14 @@ config CRYPTO_LIB_ARC4 tristate config CRYPTO_ARCH_HAVE_LIB_BLAKE2S - tristate + bool help Declares whether the architecture provides an arch-specific accelerated implementation of the Blake2s library interface, either builtin or as a module. config CRYPTO_LIB_BLAKE2S_GENERIC - tristate + def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S help This symbol can be depended upon by arch implementations of the Blake2s library interface that require the generic code as a @@ -24,15 +22,6 @@ config CRYPTO_LIB_BLAKE2S_GENERIC implementation is enabled, this implementation serves the users of CRYPTO_LIB_BLAKE2S. -config CRYPTO_LIB_BLAKE2S - tristate "BLAKE2s hash function library" - depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S - select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n - help - Enable the Blake2s library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. - config CRYPTO_ARCH_HAVE_LIB_CHACHA tristate help @@ -51,7 +40,7 @@ config CRYPTO_LIB_CHACHA_GENERIC of CRYPTO_LIB_CHACHA. config CRYPTO_LIB_CHACHA - tristate "ChaCha library interface" + tristate depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -76,7 +65,7 @@ config CRYPTO_LIB_CURVE25519_GENERIC of CRYPTO_LIB_CURVE25519. config CRYPTO_LIB_CURVE25519 - tristate "Curve25519 scalar multiplication library" + tristate depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n help @@ -111,7 +100,7 @@ config CRYPTO_LIB_POLY1305_GENERIC of CRYPTO_LIB_POLY1305. config CRYPTO_LIB_POLY1305 - tristate "Poly1305 library interface" + tristate depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n help @@ -120,7 +109,7 @@ config CRYPTO_LIB_POLY1305 is available and enabled. config CRYPTO_LIB_CHACHA20POLY1305 - tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" + tristate depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_CHACHA diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 73205ed269bad6..ed43a41f2dcc8b 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -10,11 +10,10 @@ libaes-y := aes.o obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o -obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o -libblake2s-generic-y += blake2s-generic.o - -obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o -libblake2s-y += blake2s.o +# blake2s is used by the /dev/random driver which is always builtin +obj-y += libblake2s.o +libblake2s-y := blake2s.o +libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c index 04ff8df245136d..75ccb3e633e650 100644 --- a/lib/crypto/blake2s-generic.c +++ b/lib/crypto/blake2s-generic.c @@ -37,7 +37,11 @@ static inline void blake2s_increment_counter(struct blake2s_state *state, state->t[1] += (state->t[0] < inc); } -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) + __weak __alias(blake2s_compress_generic); + +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc) { u32 m[16]; diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 4055aa593ec498..93f2ae0513702c 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,12 +16,6 @@ #include #include -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) -# define blake2s_compress blake2s_compress_arch -#else -# define blake2s_compress blake2s_compress_generic -#endif - void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { __blake2s_update(state, in, inlen, blake2s_compress); -- cgit 1.2.3-korg From 9f9eff85a008b095eafc5f4ecbaf5aca689271c1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 21 Dec 2021 16:31:27 +0100 Subject: random: use BLAKE2s instead of SHA1 in extraction This commit addresses one of the lower hanging fruits of the RNG: its usage of SHA1. BLAKE2s is generally faster, and certainly more secure, than SHA1, which has [1] been [2] really [3] very [4] broken [5]. Additionally, the current construction in the RNG doesn't use the full SHA1 function, as specified, and allows overwriting the IV with RDRAND output in an undocumented way, even in the case when RDRAND isn't set to "trusted", which means potential malicious IV choices. And its short length means that keeping only half of it secret when feeding back into the mixer gives us only 2^80 bits of forward secrecy. In other words, not only is the choice of hash function dated, but the use of it isn't really great either. This commit aims to fix both of these issues while also keeping the general structure and semantics as close to the original as possible. Specifically: a) Rather than overwriting the hash IV with RDRAND, we put it into BLAKE2's documented "salt" and "personal" fields, which were specifically created for this type of usage. b) Since this function feeds the full hash result back into the entropy collector, we only return from it half the length of the hash, just as it was done before. This increases the construction's forward secrecy from 2^80 to a much more comfortable 2^128. c) Rather than using the raw "sha1_transform" function alone, we instead use the full proper BLAKE2s function, with finalization. This also has the advantage of supplying 16 bytes at a time rather than SHA1's 10 bytes, which, in addition to having a faster compression function to begin with, means faster extraction in general. On an Intel i7-11850H, this commit makes initial seeding around 131% faster. BLAKE2s itself has the nice property of internally being based on the ChaCha permutation, which the RNG is already using for expansion, so there shouldn't be any issue with newness, funkiness, or surprising CPU behavior, since it's based on something already in use. [1] https://eprint.iacr.org/2005/010.pdf [2] https://www.iacr.org/archive/crypto2005/36210017/36210017.pdf [3] https://eprint.iacr.org/2015/967.pdf [4] https://shattered.io/static/shattered.pdf [5] https://www.usenix.org/system/files/sec20-leurent.pdf Reviewed-by: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 71 ++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 13c968b950c588..99cce575a79c92 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,8 +1,7 @@ /* * random.c -- A strong random number generator * - * Copyright (C) 2017 Jason A. Donenfeld . All - * Rights Reserved. + * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. * * Copyright Matt Mackall , 2003, 2004, 2005 * @@ -78,12 +77,12 @@ * an *estimate* of how many bits of randomness have been stored into * the random number generator's internal state. * - * When random bytes are desired, they are obtained by taking the SHA - * hash of the contents of the "entropy pool". The SHA hash avoids + * When random bytes are desired, they are obtained by taking the BLAKE2s + * hash of the contents of the "entropy pool". The BLAKE2s hash avoids * exposing the internal state of the entropy pool. It is believed to * be computationally infeasible to derive any useful information - * about the input of SHA from its output. Even if it is possible to - * analyze SHA in some clever way, as long as the amount of data + * about the input of BLAKE2s from its output. Even if it is possible to + * analyze BLAKE2s in some clever way, as long as the amount of data * returned from the generator is less than the inherent entropy in * the pool, the output data is totally unpredictable. For this * reason, the routine decreases its internal estimate of how many @@ -93,7 +92,7 @@ * If this estimate goes to zero, the routine can still generate * random numbers; however, an attacker may (at least in theory) be * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of SHA, which is + * outputs. This requires successful cryptanalysis of BLAKE2s, which is * not believed to be feasible, but there is a remote possibility. * Nonetheless, these numbers should be useful for the vast majority * of purposes. @@ -347,7 +346,7 @@ #include #include #include -#include +#include #include #include @@ -367,10 +366,7 @@ #define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) #define OUTPUT_POOL_SHIFT 10 #define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) -#define EXTRACT_SIZE 10 - - -#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) +#define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2) /* * To allow fractional bits to be tracked, the entropy_count field is @@ -406,7 +402,7 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; * Thanks to Colin Plumb for suggesting this. * * The mixing operation is much less sensitive than the output hash, - * where we use SHA-1. All that we want of mixing operation is that + * where we use BLAKE2s. All that we want of mixing operation is that * it be a good non-cryptographic hash; i.e. it not produce collisions * when fed "random" data of the sort we expect to see. As long as * the pool state differs for different inputs, we have preserved the @@ -1384,56 +1380,49 @@ retry: */ static void extract_buf(struct entropy_store *r, __u8 *out) { - int i; - union { - __u32 w[5]; - unsigned long l[LONGS(20)]; - } hash; - __u32 workspace[SHA1_WORKSPACE_WORDS]; + struct blake2s_state state __aligned(__alignof__(unsigned long)); + u8 hash[BLAKE2S_HASH_SIZE]; + unsigned long *salt; unsigned long flags; + blake2s_init(&state, sizeof(hash)); + /* * If we have an architectural hardware random number - * generator, use it for SHA's initial vector + * generator, use it for BLAKE2's salt & personal fields. */ - sha1_init(hash.w); - for (i = 0; i < LONGS(20); i++) { + for (salt = (unsigned long *)&state.h[4]; + salt < (unsigned long *)&state.h[8]; ++salt) { unsigned long v; if (!arch_get_random_long(&v)) break; - hash.l[i] = v; + *salt ^= v; } - /* Generate a hash across the pool, 16 words (512 bits) at a time */ + /* Generate a hash across the pool */ spin_lock_irqsave(&r->lock, flags); - for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha1_transform(hash.w, (__u8 *)(r->pool + i), workspace); + blake2s_update(&state, (const u8 *)r->pool, + r->poolinfo->poolwords * sizeof(*r->pool)); + blake2s_final(&state, hash); /* final zeros out state */ /* * We mix the hash back into the pool to prevent backtracking * attacks (where the attacker knows the state of the pool * plus the current outputs, and attempts to find previous - * ouputs), unless the hash function can be inverted. By - * mixing at least a SHA1 worth of hash data back, we make + * outputs), unless the hash function can be inverted. By + * mixing at least a hash worth of hash data back, we make * brute-forcing the feedback as hard as brute-forcing the * hash. */ - __mix_pool_bytes(r, hash.w, sizeof(hash.w)); + __mix_pool_bytes(r, hash, sizeof(hash)); spin_unlock_irqrestore(&r->lock, flags); - memzero_explicit(workspace, sizeof(workspace)); - - /* - * In case the hash function has some recognizable output - * pattern, we fold it in half. Thus, we always feed back - * twice as much data as we output. + /* Note that EXTRACT_SIZE is half of hash size here, because above + * we've dumped the full length back into mixer. By reducing the + * amount that we emit, we retain a level of forward secrecy. */ - hash.w[0] ^= hash.w[3]; - hash.w[1] ^= hash.w[4]; - hash.w[2] ^= rol32(hash.w[2], 16); - - memcpy(out, &hash, EXTRACT_SIZE); - memzero_explicit(&hash, sizeof(hash)); + memcpy(out, hash, EXTRACT_SIZE); + memzero_explicit(hash, sizeof(hash)); } static ssize_t _extract_entropy(struct entropy_store *r, void *buf, -- cgit 1.2.3-korg From 0d9488ffbf2faddebc6bac055bfa6c93b94056a3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 24 Dec 2021 19:17:58 +0100 Subject: random: do not sign extend bytes for rotation when mixing By using `char` instead of `unsigned char`, certain platforms will sign extend the byte when `w = rol32(*bytes++, input_rotate)` is called, meaning that bit 7 is overrepresented when mixing. This isn't a real problem (unless the mixer itself is already broken) since it's still invertible, but it's not quite correct either. Fix this by using an explicit unsigned type. Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 99cce575a79c92..82db125aaed7bf 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -546,7 +546,7 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, unsigned long i, tap1, tap2, tap3, tap4, tap5; int input_rotate; int wordmask = r->poolinfo->poolwords - 1; - const char *bytes = in; + const unsigned char *bytes = in; __u32 w; tap1 = r->poolinfo->tap1; -- cgit 1.2.3-korg From f7e67b8e803185d0aabe7f29d25a35c8be724a78 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 29 Dec 2021 22:10:03 +0100 Subject: random: fix crash on multiple early calls to add_bootloader_randomness() Currently, if CONFIG_RANDOM_TRUST_BOOTLOADER is enabled, multiple calls to add_bootloader_randomness() are broken and can cause a NULL pointer dereference, as noted by Ivan T. Ivanov. This is not only a hypothetical problem, as qemu on arm64 may provide bootloader entropy via EFI and via devicetree. On the first call to add_hwgenerator_randomness(), crng_fast_load() is executed, and if the seed is long enough, crng_init will be set to 1. On subsequent calls to add_bootloader_randomness() and then to add_hwgenerator_randomness(), crng_fast_load() will be skipped. Instead, wait_event_interruptible() and then credit_entropy_bits() will be called. If the entropy count for that second seed is large enough, that proceeds to crng_reseed(). However, both wait_event_interruptible() and crng_reseed() depends (at least in numa_crng_init()) on workqueues. Therefore, test whether system_wq is already initialized, which is a sufficient indicator that workqueue_init_early() has progressed far enough. If we wind up hitting the !system_wq case, we later want to do what would have been done there when wqs are up, so set a flag, and do that work later from the rand_initialize() call. Reported-by: Ivan T. Ivanov Fixes: 18b915ac6b0a ("efi/random: Treat EFI_RNG_PROTOCOL output as bootloader randomness") Cc: stable@vger.kernel.org Signed-off-by: Dominik Brodowski [Jason: added crng_need_done state and related logic.] Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 56 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 82db125aaed7bf..144e8841bff44a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -468,6 +468,7 @@ static struct crng_state primary_crng = { * its value (from 0->1->2). */ static int crng_init = 0; +static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; @@ -835,6 +836,36 @@ static void __init crng_initialize_primary(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } +static void crng_finalize_init(struct crng_state *crng) +{ + if (crng != &primary_crng || crng_init >= 2) + return; + if (!system_wq) { + /* We can't call numa_crng_init until we have workqueues, + * so mark this for processing later. */ + crng_need_final_init = true; + return; + } + + invalidate_batched_entropy(); + numa_crng_init(); + crng_init = 2; + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + if (urandom_warning.missed) { + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + urandom_warning.missed = 0; + } +} + #ifdef CONFIG_NUMA static void do_numa_crng_init(struct work_struct *work) { @@ -989,25 +1020,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); - if (crng == &primary_crng && crng_init < 2) { - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } - } + crng_finalize_init(crng); } static void _extract_crng(struct crng_state *crng, @@ -1780,6 +1793,8 @@ static void __init init_std_data(struct entropy_store *r) int __init rand_initialize(void) { init_std_data(&input_pool); + if (crng_need_final_init) + crng_finalize_init(&primary_crng); crng_initialize_primary(&primary_crng); crng_global_init_time = jiffies; if (ratelimit_disable) { @@ -2288,7 +2303,8 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, * We'll be woken up again once below random_write_wakeup_thresh, * or when the calling thread is about to terminate. */ - wait_event_interruptible(random_write_wait, kthread_should_stop() || + wait_event_interruptible(random_write_wait, + !system_wq || kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); -- cgit 1.2.3-korg From 9c3ddde3f811aabbb83778a2a615bf141b4909ef Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 29 Dec 2021 22:10:04 +0100 Subject: random: do not re-init if crng_reseed completes before primary init If the bootloader supplies sufficient material and crng_reseed() is called very early on, but not too early that wqs aren't available yet, then we might transition to crng_init==2 before rand_initialize()'s call to crng_initialize_primary() made. Then, when crng_initialize_primary() is called, if we're trusting the CPU's RDRAND instructions, we'll needlessly reinitialize the RNG and emit a message about it. This is mostly harmless, as numa_crng_init() will allocate and then free what it just allocated, and excessive calls to invalidate_batched_entropy() aren't so harmful. But it is funky and the extra message is confusing, so avoid the re-initialization all together by checking for crng_init < 2 in crng_initialize_primary(), just as we already do in crng_reseed(). Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 144e8841bff44a..916cf791ed0e36 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -827,7 +827,7 @@ static void __init crng_initialize_primary(struct crng_state *crng) { chacha_init_consts(crng->state); _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); - if (crng_init_try_arch_early(crng) && trust_cpu) { + if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; -- cgit 1.2.3-korg From 73c7733f122e8d0107f88655a12011f68f69e74b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 29 Dec 2021 22:10:05 +0100 Subject: random: do not throw away excess input to crng_fast_load When crng_fast_load() is called by add_hwgenerator_randomness(), we currently will advance to crng_init==1 once we've acquired 64 bytes, and then throw away the rest of the buffer. Usually, that is not a problem: When add_hwgenerator_randomness() gets called via EFI or DT during setup_arch(), there won't be any IRQ randomness. Therefore, the 64 bytes passed by EFI exactly matches what is needed to advance to crng_init==1. Usually, DT seems to pass 64 bytes as well -- with one notable exception being kexec, which hands over 128 bytes of entropy to the kexec'd kernel. In that case, we'll advance to crng_init==1 once 64 of those bytes are consumed by crng_fast_load(), but won't continue onward feeding in bytes to progress to crng_init==2. This commit fixes the issue by feeding any leftover bytes into the next phase in add_hwgenerator_randomness(). [linux@dominikbrodowski.net: rewrite commit message] Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 916cf791ed0e36..21166188b7e19c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -919,12 +919,14 @@ static struct crng_state *select_crng(void) /* * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. + * path. So we can't afford to dilly-dally. Returns the number of + * bytes processed from cp. */ -static int crng_fast_load(const char *cp, size_t len) +static size_t crng_fast_load(const char *cp, size_t len) { unsigned long flags; char *p; + size_t ret = 0; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; @@ -935,7 +937,7 @@ static int crng_fast_load(const char *cp, size_t len) p = (unsigned char *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; - cp++; crng_init_cnt++; len--; + cp++; crng_init_cnt++; len--; ret++; } spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { @@ -943,7 +945,7 @@ static int crng_fast_load(const char *cp, size_t len) crng_init = 1; pr_notice("fast init done\n"); } - return 1; + return ret; } /* @@ -1294,7 +1296,7 @@ void add_interrupt_randomness(int irq) if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && crng_fast_load((char *) fast_pool->pool, - sizeof(fast_pool->pool))) { + sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; } @@ -2295,8 +2297,11 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, struct entropy_store *poolp = &input_pool; if (unlikely(crng_init == 0)) { - crng_fast_load(buffer, count); - return; + size_t ret = crng_fast_load(buffer, count); + count -= ret; + buffer += ret; + if (!count || crng_init == 0) + return; } /* Suspend writing if we're above the trickle threshold. -- cgit 1.2.3-korg From 57826feeedb63b091f807ba8325d736775d39afd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 29 Dec 2021 22:10:06 +0100 Subject: random: mix bootloader randomness into pool If we're trusting bootloader randomness, crng_fast_load() is called by add_hwgenerator_randomness(), which sets us to crng_init==1. However, usually it is only called once for an initial 64-byte push, so bootloader entropy will not mix any bytes into the input pool. So it's conceivable that crng_init==1 when crng_initialize_primary() is called later, but then the input pool is empty. When that happens, the crng state key will be overwritten with extracted output from the empty input pool. That's bad. In contrast, if we're not trusting bootloader randomness, we call crng_slow_load() *and* we call mix_pool_bytes(), so that later crng_initialize_primary() isn't drawing on nothing. In order to prevent crng_initialize_primary() from extracting an empty pool, have the trusted bootloader case mirror that of the untrusted bootloader case, mixing the input into the pool. [linux@dominikbrodowski.net: rewrite commit message] Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 21166188b7e19c..9d4e1907e4b116 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2298,6 +2298,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, if (unlikely(crng_init == 0)) { size_t ret = crng_fast_load(buffer, count); + mix_pool_bytes(poolp, buffer, ret); count -= ret; buffer += ret; if (!count || crng_init == 0) -- cgit 1.2.3-korg From 161212c7fd1d9069b232785c75492e50941e2ea8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 29 Dec 2021 22:10:07 +0100 Subject: random: harmonize "crng init done" messages We print out "crng init done" for !TRUST_CPU, so we should also print out the same for TRUST_CPU. Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9d4e1907e4b116..9b5eb6cf82ce28 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -831,7 +831,7 @@ static void __init crng_initialize_primary(struct crng_state *crng) invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; - pr_notice("crng done (trusting CPU's manufacturer)\n"); + pr_notice("crng init done (trusting CPU's manufacturer)\n"); } crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -- cgit 1.2.3-korg From 7b87324112df2e1f9b395217361626362dcfb9fb Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 30 Dec 2021 15:59:26 +0100 Subject: random: use IS_ENABLED(CONFIG_NUMA) instead of ifdefs Rather than an awkward combination of ifdefs and __maybe_unused, we can ensure more source gets parsed, regardless of the configuration, by using IS_ENABLED for the CONFIG_NUMA conditional code. This makes things cleaner and easier to follow. I've confirmed that on !CONFIG_NUMA, we don't wind up with excess code by accident; the generated object file is the same. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9b5eb6cf82ce28..54086e9da05b01 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -759,7 +759,6 @@ static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); -#ifdef CONFIG_NUMA /* * Hack to deal with crazy userspace progams when they are all trying * to access /dev/urandom in parallel. The programs are almost @@ -767,7 +766,6 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); * their brain damage. */ static struct crng_state **crng_node_pool __read_mostly; -#endif static void invalidate_batched_entropy(void); static void numa_crng_init(void); @@ -815,7 +813,7 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng) return arch_init; } -static void __maybe_unused crng_initialize_secondary(struct crng_state *crng) +static void crng_initialize_secondary(struct crng_state *crng) { chacha_init_consts(crng->state); _get_random_bytes(&crng->state[4], sizeof(__u32) * 12); @@ -866,7 +864,6 @@ static void crng_finalize_init(struct crng_state *crng) } } -#ifdef CONFIG_NUMA static void do_numa_crng_init(struct work_struct *work) { int i; @@ -893,29 +890,24 @@ static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); static void numa_crng_init(void) { - schedule_work(&numa_crng_init_work); + if (IS_ENABLED(CONFIG_NUMA)) + schedule_work(&numa_crng_init_work); } static struct crng_state *select_crng(void) { - struct crng_state **pool; - int nid = numa_node_id(); - - /* pairs with cmpxchg_release() in do_numa_crng_init() */ - pool = READ_ONCE(crng_node_pool); - if (pool && pool[nid]) - return pool[nid]; + if (IS_ENABLED(CONFIG_NUMA)) { + struct crng_state **pool; + int nid = numa_node_id(); - return &primary_crng; -} -#else -static void numa_crng_init(void) {} + /* pairs with cmpxchg_release() in do_numa_crng_init() */ + pool = READ_ONCE(crng_node_pool); + if (pool && pool[nid]) + return pool[nid]; + } -static struct crng_state *select_crng(void) -{ return &primary_crng; } -#endif /* * crng_fast_load() can be called by code in the interrupt service -- cgit 1.2.3-korg From 96562f286884e2db89c74215b199a1084b5fb7f7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 31 Dec 2021 09:26:08 +0100 Subject: random: early initialization of ChaCha constants Previously, the ChaCha constants for the primary pool were only initialized in crng_initialize_primary(), called by rand_initialize(). However, some randomness is actually extracted from the primary pool beforehand, e.g. by kmem_cache_create(). Therefore, statically initialize the ChaCha constants for the primary pool. Cc: Herbert Xu Cc: "David S. Miller" Cc: Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 5 ++++- include/crypto/chacha.h | 15 +++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 54086e9da05b01..4de0feb697812f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -457,6 +457,10 @@ struct crng_state { static struct crng_state primary_crng = { .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), + .state[0] = CHACHA_CONSTANT_EXPA, + .state[1] = CHACHA_CONSTANT_ND_3, + .state[2] = CHACHA_CONSTANT_2_BY, + .state[3] = CHACHA_CONSTANT_TE_K, }; /* @@ -823,7 +827,6 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(struct crng_state *crng) { - chacha_init_consts(crng->state); _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index dabaee6987186b..b3ea73b8194434 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) hchacha_block_generic(state, out, nrounds); } +enum chacha_constants { /* expand 32-byte k */ + CHACHA_CONSTANT_EXPA = 0x61707865U, + CHACHA_CONSTANT_ND_3 = 0x3320646eU, + CHACHA_CONSTANT_2_BY = 0x79622d32U, + CHACHA_CONSTANT_TE_K = 0x6b206574U +}; + static inline void chacha_init_consts(u32 *state) { - state[0] = 0x61707865; /* "expa" */ - state[1] = 0x3320646e; /* "nd 3" */ - state[2] = 0x79622d32; /* "2-by" */ - state[3] = 0x6b206574; /* "te k" */ + state[0] = CHACHA_CONSTANT_EXPA; + state[1] = CHACHA_CONSTANT_ND_3; + state[2] = CHACHA_CONSTANT_2_BY; + state[3] = CHACHA_CONSTANT_TE_K; } void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); -- cgit 1.2.3-korg From 2ee25b6968b1b3c66ffa408de23d023c1bce81cf Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 30 Dec 2021 17:50:52 +0100 Subject: random: avoid superfluous call to RDRAND in CRNG extraction RDRAND is not fast. RDRAND is actually quite slow. We've known this for a while, which is why functions like get_random_u{32,64} were converted to use batching of our ChaCha-based CRNG instead. Yet CRNG extraction still includes a call to RDRAND, in the hot path of every call to get_random_bytes(), /dev/urandom, and getrandom(2). This call to RDRAND here seems quite superfluous. CRNG is already extracting things based on a 256-bit key, based on good entropy, which is then reseeded periodically, updated, backtrack-mutated, and so forth. The CRNG extraction construction is something that we're already relying on to be secure and solid. If it's not, that's a serious problem, and it's unlikely that mixing in a measly 32 bits from RDRAND is going to alleviate things. And in the case where the CRNG doesn't have enough entropy yet, we're already initializing the ChaCha key row with RDRAND in crng_init_try_arch_early(). Removing the call to RDRAND improves performance on an i7-11850H by 370%. In other words, the vast majority of the work done by extract_crng() prior to this commit was devoted to fetching 32 bits of RDRAND. Reviewed-by: Theodore Ts'o Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4de0feb697812f..17ec609487953b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1023,7 +1023,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]) { - unsigned long v, flags, init_time; + unsigned long flags, init_time; if (crng_ready()) { init_time = READ_ONCE(crng->init_time); @@ -1033,8 +1033,6 @@ static void _extract_crng(struct crng_state *crng, &input_pool : NULL); } spin_lock_irqsave(&crng->lock, flags); - if (arch_get_random_long(&v)) - crng->state[14] ^= v; chacha20_block(&crng->state[0], out); if (crng->state[12] == 0) crng->state[13]++; -- cgit 1.2.3-korg From 6c8e11e08a5b74bb8a5cdd5cbc1e5143df0fba72 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 3 Jan 2022 16:59:31 +0100 Subject: random: don't reset crng_init_cnt on urandom_read() At the moment, urandom_read() (used for /dev/urandom) resets crng_init_cnt to zero when it is called at crng_init<2. This is inconsistent: We do it for /dev/urandom reads, but not for the equivalent getrandom(GRND_INSECURE). (And worse, as Jason pointed out, we're only doing this as long as maxwarn>0.) crng_init_cnt is only read in crng_fast_load(); it is relevant at crng_init==0 for determining when to switch to crng_init==1 (and where in the RNG state array to write). As far as I understand: - crng_init==0 means "we have nothing, we might just be returning the same exact numbers on every boot on every machine, we don't even have non-cryptographic randomness; we should shove every bit of entropy we can get into the RNG immediately" - crng_init==1 means "well we have something, it might not be cryptographic, but at least we're not gonna return the same data every time or whatever, it's probably good enough for TCP and ASLR and stuff; we now have time to build up actual cryptographic entropy in the input pool" - crng_init==2 means "this is supposed to be cryptographically secure now, but we'll keep adding more entropy just to be sure". The current code means that if someone is pulling data from /dev/urandom fast enough at crng_init==0, we'll keep resetting crng_init_cnt, and we'll never make forward progress to crng_init==1. It seems to be intended to prevent an attacker from bruteforcing the contents of small individual RNG inputs on the way from crng_init==0 to crng_init==1, but that's misguided; crng_init==1 isn't supposed to provide proper cryptographic security anyway, RNG users who care about getting secure RNG output have to wait until crng_init==2. This code was inconsistent, and it probably made things worse - just get rid of it. Signed-off-by: Jann Horn Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 17ec609487953b..227fb780273877 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1831,7 +1831,6 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - unsigned long flags; static int maxwarn = 10; if (!crng_ready() && maxwarn > 0) { @@ -1839,9 +1838,6 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) if (__ratelimit(&urandom_warning)) pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", current->comm, nbytes); - spin_lock_irqsave(&primary_crng.lock, flags); - crng_init_cnt = 0; - spin_unlock_irqrestore(&primary_crng.lock, flags); } return urandom_read_nowarn(file, buf, nbytes, ppos); -- cgit 1.2.3-korg