diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-03-29 11:25:32 +0100 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-03-29 11:25:32 +0100 |
commit | 3c0a8b560f5e39cee7f630b084f2612b78f6f22d (patch) | |
tree | b99249b4c7f5175a82723de082331b59333dd441 /queue-5.4 | |
parent | 1358e3c7660521fd3f674a4a4ceffe20cf17c8fe (diff) | |
download | stable-queue-3c0a8b560f5e39cee7f630b084f2612b78f6f22d.tar.gz |
5.4-stable patches
added patches:
xen-events-close-evtchn-after-mapping-cleanup.patch
Diffstat (limited to 'queue-5.4')
-rw-r--r-- | queue-5.4/series | 1 | ||||
-rw-r--r-- | queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch | 158 |
2 files changed, 159 insertions, 0 deletions
diff --git a/queue-5.4/series b/queue-5.4/series index 926cd06d48..bbf4e77879 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -92,3 +92,4 @@ fs-aio-check-iocb_aio_rw-before-the-struct-aio_kiocb-conversion.patch objtool-is_fentry_call-crashes-if-call-has-no-destination.patch objtool-add-support-for-intra-function-calls.patch x86-speculation-support-intra-function-call-validation.patch +xen-events-close-evtchn-after-mapping-cleanup.patch diff --git a/queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch b/queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch new file mode 100644 index 0000000000..e4b5907e9d --- /dev/null +++ b/queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch @@ -0,0 +1,158 @@ +From fa765c4b4aed2d64266b694520ecb025c862c5a9 Mon Sep 17 00:00:00 2001 +From: Maximilian Heyne <mheyne@amazon.de> +Date: Wed, 24 Jan 2024 16:31:28 +0000 +Subject: xen/events: close evtchn after mapping cleanup + +From: Maximilian Heyne <mheyne@amazon.de> + +commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream. + +shutdown_pirq and startup_pirq are not taking the +irq_mapping_update_lock because they can't due to lock inversion. Both +are called with the irq_desc->lock being taking. The lock order, +however, is first irq_mapping_update_lock and then irq_desc->lock. + +This opens multiple races: +- shutdown_pirq can be interrupted by a function that allocates an event + channel: + + CPU0 CPU1 + shutdown_pirq { + xen_evtchn_close(e) + __startup_pirq { + EVTCHNOP_bind_pirq + -> returns just freed evtchn e + set_evtchn_to_irq(e, irq) + } + xen_irq_info_cleanup() { + set_evtchn_to_irq(e, -1) + } + } + + Assume here event channel e refers here to the same event channel + number. + After this race the evtchn_to_irq mapping for e is invalid (-1). + +- __startup_pirq races with __unbind_from_irq in a similar way. Because + __startup_pirq doesn't take irq_mapping_update_lock it can grab the + evtchn that __unbind_from_irq is currently freeing and cleaning up. In + this case even though the event channel is allocated, its mapping can + be unset in evtchn_to_irq. + +The fix is to first cleanup the mappings and then close the event +channel. In this way, when an event channel gets allocated it's +potential previous evtchn_to_irq mappings are guaranteed to be unset already. +This is also the reverse order of the allocation where first the event +channel is allocated and then the mappings are setup. + +On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal +[un]bind interfaces"), we hit a BUG like the following during probing of NVMe +devices. The issue is that during nvme_setup_io_queues, pci_free_irq +is called for every device which results in a call to shutdown_pirq. +With many nvme devices it's therefore likely to hit this race during +boot because there will be multiple calls to shutdown_pirq and +startup_pirq are running potentially in parallel. + + ------------[ cut here ]------------ + blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled + kernel BUG at drivers/xen/events/events_base.c:499! + invalid opcode: 0000 [#1] SMP PTI + CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 + Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 + Workqueue: nvme-reset-wq nvme_reset_work + RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 + Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 + RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 + RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 + RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff + RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 + R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed + R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 + FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + ? show_trace_log_lvl+0x1c1/0x2d9 + ? show_trace_log_lvl+0x1c1/0x2d9 + ? set_affinity_irq+0xdc/0x1c0 + ? __die_body.cold+0x8/0xd + ? die+0x2b/0x50 + ? do_trap+0x90/0x110 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? do_error_trap+0x65/0x80 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? exc_invalid_op+0x4e/0x70 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? asm_exc_invalid_op+0x12/0x20 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? bind_evtchn_to_cpu+0xc5/0xf0 + set_affinity_irq+0xdc/0x1c0 + irq_do_set_affinity+0x1d7/0x1f0 + irq_setup_affinity+0xd6/0x1a0 + irq_startup+0x8a/0xf0 + __setup_irq+0x639/0x6d0 + ? nvme_suspend+0x150/0x150 + request_threaded_irq+0x10c/0x180 + ? nvme_suspend+0x150/0x150 + pci_request_irq+0xa8/0xf0 + ? __blk_mq_free_request+0x74/0xa0 + queue_request_irq+0x6f/0x80 + nvme_create_queue+0x1af/0x200 + nvme_create_io_queues+0xbd/0xf0 + nvme_setup_io_queues+0x246/0x320 + ? nvme_irq_check+0x30/0x30 + nvme_reset_work+0x1c8/0x400 + process_one_work+0x1b0/0x350 + worker_thread+0x49/0x310 + ? process_one_work+0x350/0x350 + kthread+0x11b/0x140 + ? __kthread_bind_mask+0x60/0x60 + ret_from_fork+0x22/0x30 + Modules linked in: + ---[ end trace a11715de1eee1873 ]--- + +Fixes: d46a78b05c0e ("xen: implement pirq type event channels") +Cc: stable@vger.kernel.org +Co-debugged-by: Andrew Panyakin <apanyaki@amazon.com> +Signed-off-by: Maximilian Heyne <mheyne@amazon.de> +Reviewed-by: Juergen Gross <jgross@suse.com> +Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de +Signed-off-by: Juergen Gross <jgross@suse.com> +[apanyaki: backport to v5.4-stable] +Signed-off-by: Andrew Paniakin <apanyaki@amazon.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/xen/events/events_base.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -825,8 +825,8 @@ static void shutdown_pirq(struct irq_dat + return; + + do_mask(info, EVT_MASK_REASON_EXPLICIT); +- xen_evtchn_close(evtchn); + xen_irq_info_cleanup(info); ++ xen_evtchn_close(evtchn); + } + + static void enable_pirq(struct irq_data *data) +@@ -869,8 +869,6 @@ static void __unbind_from_irq(unsigned i + if (VALID_EVTCHN(evtchn)) { + unsigned int cpu = cpu_from_irq(irq); + +- xen_evtchn_close(evtchn); +- + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; +@@ -883,6 +881,7 @@ static void __unbind_from_irq(unsigned i + } + + xen_irq_info_cleanup(info); ++ xen_evtchn_close(evtchn); + } + + xen_free_irq(irq); |