aboutsummaryrefslogtreecommitdiffstats
path: root/queue-5.4
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-03-29 11:25:32 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-03-29 11:25:32 +0100
commit3c0a8b560f5e39cee7f630b084f2612b78f6f22d (patch)
treeb99249b4c7f5175a82723de082331b59333dd441 /queue-5.4
parent1358e3c7660521fd3f674a4a4ceffe20cf17c8fe (diff)
downloadstable-queue-3c0a8b560f5e39cee7f630b084f2612b78f6f22d.tar.gz
5.4-stable patches
added patches: xen-events-close-evtchn-after-mapping-cleanup.patch
Diffstat (limited to 'queue-5.4')
-rw-r--r--queue-5.4/series1
-rw-r--r--queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch158
2 files changed, 159 insertions, 0 deletions
diff --git a/queue-5.4/series b/queue-5.4/series
index 926cd06d48..bbf4e77879 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -92,3 +92,4 @@ fs-aio-check-iocb_aio_rw-before-the-struct-aio_kiocb-conversion.patch
objtool-is_fentry_call-crashes-if-call-has-no-destination.patch
objtool-add-support-for-intra-function-calls.patch
x86-speculation-support-intra-function-call-validation.patch
+xen-events-close-evtchn-after-mapping-cleanup.patch
diff --git a/queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch b/queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch
new file mode 100644
index 0000000000..e4b5907e9d
--- /dev/null
+++ b/queue-5.4/xen-events-close-evtchn-after-mapping-cleanup.patch
@@ -0,0 +1,158 @@
+From fa765c4b4aed2d64266b694520ecb025c862c5a9 Mon Sep 17 00:00:00 2001
+From: Maximilian Heyne <mheyne@amazon.de>
+Date: Wed, 24 Jan 2024 16:31:28 +0000
+Subject: xen/events: close evtchn after mapping cleanup
+
+From: Maximilian Heyne <mheyne@amazon.de>
+
+commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream.
+
+shutdown_pirq and startup_pirq are not taking the
+irq_mapping_update_lock because they can't due to lock inversion. Both
+are called with the irq_desc->lock being taking. The lock order,
+however, is first irq_mapping_update_lock and then irq_desc->lock.
+
+This opens multiple races:
+- shutdown_pirq can be interrupted by a function that allocates an event
+ channel:
+
+ CPU0 CPU1
+ shutdown_pirq {
+ xen_evtchn_close(e)
+ __startup_pirq {
+ EVTCHNOP_bind_pirq
+ -> returns just freed evtchn e
+ set_evtchn_to_irq(e, irq)
+ }
+ xen_irq_info_cleanup() {
+ set_evtchn_to_irq(e, -1)
+ }
+ }
+
+ Assume here event channel e refers here to the same event channel
+ number.
+ After this race the evtchn_to_irq mapping for e is invalid (-1).
+
+- __startup_pirq races with __unbind_from_irq in a similar way. Because
+ __startup_pirq doesn't take irq_mapping_update_lock it can grab the
+ evtchn that __unbind_from_irq is currently freeing and cleaning up. In
+ this case even though the event channel is allocated, its mapping can
+ be unset in evtchn_to_irq.
+
+The fix is to first cleanup the mappings and then close the event
+channel. In this way, when an event channel gets allocated it's
+potential previous evtchn_to_irq mappings are guaranteed to be unset already.
+This is also the reverse order of the allocation where first the event
+channel is allocated and then the mappings are setup.
+
+On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
+[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
+devices. The issue is that during nvme_setup_io_queues, pci_free_irq
+is called for every device which results in a call to shutdown_pirq.
+With many nvme devices it's therefore likely to hit this race during
+boot because there will be multiple calls to shutdown_pirq and
+startup_pirq are running potentially in parallel.
+
+ ------------[ cut here ]------------
+ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
+ kernel BUG at drivers/xen/events/events_base.c:499!
+ invalid opcode: 0000 [#1] SMP PTI
+ CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
+ Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
+ Workqueue: nvme-reset-wq nvme_reset_work
+ RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
+ Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
+ RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
+ RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
+ RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
+ RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
+ R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
+ R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
+ FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ ? show_trace_log_lvl+0x1c1/0x2d9
+ ? show_trace_log_lvl+0x1c1/0x2d9
+ ? set_affinity_irq+0xdc/0x1c0
+ ? __die_body.cold+0x8/0xd
+ ? die+0x2b/0x50
+ ? do_trap+0x90/0x110
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? do_error_trap+0x65/0x80
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? exc_invalid_op+0x4e/0x70
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? asm_exc_invalid_op+0x12/0x20
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? bind_evtchn_to_cpu+0xc5/0xf0
+ set_affinity_irq+0xdc/0x1c0
+ irq_do_set_affinity+0x1d7/0x1f0
+ irq_setup_affinity+0xd6/0x1a0
+ irq_startup+0x8a/0xf0
+ __setup_irq+0x639/0x6d0
+ ? nvme_suspend+0x150/0x150
+ request_threaded_irq+0x10c/0x180
+ ? nvme_suspend+0x150/0x150
+ pci_request_irq+0xa8/0xf0
+ ? __blk_mq_free_request+0x74/0xa0
+ queue_request_irq+0x6f/0x80
+ nvme_create_queue+0x1af/0x200
+ nvme_create_io_queues+0xbd/0xf0
+ nvme_setup_io_queues+0x246/0x320
+ ? nvme_irq_check+0x30/0x30
+ nvme_reset_work+0x1c8/0x400
+ process_one_work+0x1b0/0x350
+ worker_thread+0x49/0x310
+ ? process_one_work+0x350/0x350
+ kthread+0x11b/0x140
+ ? __kthread_bind_mask+0x60/0x60
+ ret_from_fork+0x22/0x30
+ Modules linked in:
+ ---[ end trace a11715de1eee1873 ]---
+
+Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
+Cc: stable@vger.kernel.org
+Co-debugged-by: Andrew Panyakin <apanyaki@amazon.com>
+Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de
+Signed-off-by: Juergen Gross <jgross@suse.com>
+[apanyaki: backport to v5.4-stable]
+Signed-off-by: Andrew Paniakin <apanyaki@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -825,8 +825,8 @@ static void shutdown_pirq(struct irq_dat
+ return;
+
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
+- xen_evtchn_close(evtchn);
+ xen_irq_info_cleanup(info);
++ xen_evtchn_close(evtchn);
+ }
+
+ static void enable_pirq(struct irq_data *data)
+@@ -869,8 +869,6 @@ static void __unbind_from_irq(unsigned i
+ if (VALID_EVTCHN(evtchn)) {
+ unsigned int cpu = cpu_from_irq(irq);
+
+- xen_evtchn_close(evtchn);
+-
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+@@ -883,6 +881,7 @@ static void __unbind_from_irq(unsigned i
+ }
+
+ xen_irq_info_cleanup(info);
++ xen_evtchn_close(evtchn);
+ }
+
+ xen_free_irq(irq);