aboutsummaryrefslogtreecommitdiffstats
path: root/queue-5.4
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-04-05 12:07:05 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-04-05 12:07:05 +0200
commita685af9cc132bd469bbde3728411ee68213be7bf (patch)
tree1f5067f0b89a17174f796d12a28dd83dc96a3602 /queue-5.4
parentb30efd7d6a93b735de02caaaa2d59728b975dbe4 (diff)
downloadstable-queue-a685af9cc132bd469bbde3728411ee68213be7bf.tar.gz
5.4-stable patches
added patches: bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
Diffstat (limited to 'queue-5.4')
-rw-r--r--queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch74
-rw-r--r--queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch58
-rw-r--r--queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch125
-rw-r--r--queue-5.4/series3
4 files changed, 260 insertions, 0 deletions
diff --git a/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch b/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch
new file mode 100644
index 0000000000..90eaa4ea1b
--- /dev/null
+++ b/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch
@@ -0,0 +1,74 @@
+From ff91059932401894e6c86341915615c5eb0eca48 Mon Sep 17 00:00:00 2001
+From: Jakub Sitnicki <jakub@cloudflare.com>
+Date: Tue, 2 Apr 2024 12:46:21 +0200
+Subject: bpf, sockmap: Prevent lock inversion deadlock in map delete elem
+
+From: Jakub Sitnicki <jakub@cloudflare.com>
+
+commit ff91059932401894e6c86341915615c5eb0eca48 upstream.
+
+syzkaller started using corpuses where a BPF tracing program deletes
+elements from a sockmap/sockhash map. Because BPF tracing programs can be
+invoked from any interrupt context, locks taken during a map_delete_elem
+operation must be hardirq-safe. Otherwise a deadlock due to lock inversion
+is possible, as reported by lockdep:
+
+ CPU0 CPU1
+ ---- ----
+ lock(&htab->buckets[i].lock);
+ local_irq_disable();
+ lock(&host->lock);
+ lock(&htab->buckets[i].lock);
+ <Interrupt>
+ lock(&host->lock);
+
+Locks in sockmap are hardirq-unsafe by design. We expects elements to be
+deleted from sockmap/sockhash only in task (normal) context with interrupts
+enabled, or in softirq context.
+
+Detect when map_delete_elem operation is invoked from a context which is
+_not_ hardirq-unsafe, that is interrupts are disabled, and bail out with an
+error.
+
+Note that map updates are not affected by this issue. BPF verifier does not
+allow updating sockmap/sockhash from a BPF tracing program today.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Reported-by: xingwei lee <xrivendell7@gmail.com>
+Reported-by: yue sun <samsun1006219@gmail.com>
+Reported-by: syzbot+bc922f476bd65abbd466@syzkaller.appspotmail.com
+Reported-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com
+Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=d4066896495db380182e
+Closes: https://syzkaller.appspot.com/bug?extid=bc922f476bd65abbd466
+Link: https://lore.kernel.org/bpf/20240402104621.1050319-1-jakub@cloudflare.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -321,6 +321,9 @@ static int __sock_map_delete(struct bpf_
+ struct sock *sk;
+ int err = 0;
+
++ if (irqs_disabled())
++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+ raw_spin_lock_bh(&stab->lock);
+ sk = *psk;
+ if (!sk_test || sk_test == sk)
+@@ -654,6 +657,9 @@ static int sock_hash_delete_elem(struct
+ struct bpf_htab_elem *elem;
+ int ret = -ENOENT;
+
++ if (irqs_disabled())
++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+ hash = sock_hash_bucket_hash(key, key_size);
+ bucket = sock_hash_select_bucket(htab, hash);
+
diff --git a/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch b/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
new file mode 100644
index 0000000000..00d63e40ff
--- /dev/null
+++ b/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
@@ -0,0 +1,58 @@
+From 24225011d81b471acc0e1e315b7d9905459a6304 Mon Sep 17 00:00:00 2001
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+Date: Wed, 3 Apr 2024 15:22:04 +0800
+Subject: netfilter: nf_tables: Fix potential data-race in __nft_flowtable_type_get()
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+commit 24225011d81b471acc0e1e315b7d9905459a6304 upstream.
+
+nft_unregister_flowtable_type() within nf_flow_inet_module_exit() can
+concurrent with __nft_flowtable_type_get() within nf_tables_newflowtable().
+And thhere is not any protection when iterate over nf_tables_flowtables
+list in __nft_flowtable_type_get(). Therefore, there is pertential
+data-race of nf_tables_flowtables list entry.
+
+Use list_for_each_entry_rcu() to iterate over nf_tables_flowtables list
+in __nft_flowtable_type_get(), and use rcu_read_lock() in the caller
+nft_flowtable_type_get() to protect the entire type query process.
+
+Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend")
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6041,11 +6041,12 @@ static int nf_tables_flowtable_parse_hoo
+ return err;
+ }
+
++/* call under rcu_read_lock */
+ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+ {
+ const struct nf_flowtable_type *type;
+
+- list_for_each_entry(type, &nf_tables_flowtables, list) {
++ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
+ if (family == type->family)
+ return type;
+ }
+@@ -6057,9 +6058,13 @@ nft_flowtable_type_get(struct net *net,
+ {
+ const struct nf_flowtable_type *type;
+
++ rcu_read_lock();
+ type = __nft_flowtable_type_get(family);
+- if (type != NULL && try_module_get(type->owner))
++ if (type != NULL && try_module_get(type->owner)) {
++ rcu_read_unlock();
+ return type;
++ }
++ rcu_read_unlock();
+
+ lockdep_nfnl_nft_mutex_not_held();
+ #ifdef CONFIG_MODULES
diff --git a/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch b/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
new file mode 100644
index 0000000000..3b1b92a842
--- /dev/null
+++ b/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
@@ -0,0 +1,125 @@
+From 24cea9677025e0de419989ecb692acd4bb34cac2 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 2 Apr 2024 18:04:36 +0200
+Subject: netfilter: nf_tables: flush pending destroy work before exit_net release
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 24cea9677025e0de419989ecb692acd4bb34cac2 upstream.
+
+Similar to 2c9f0293280e ("netfilter: nf_tables: flush pending destroy
+work before netlink notifier") to address a race between exit_net and
+the destroy workqueue.
+
+The trace below shows an element to be released via destroy workqueue
+while exit_net path (triggered via module removal) has already released
+the set that is used in such transaction.
+
+[ 1360.547789] BUG: KASAN: slab-use-after-free in nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.547861] Read of size 8 at addr ffff888140500cc0 by task kworker/4:1/152465
+[ 1360.547870] CPU: 4 PID: 152465 Comm: kworker/4:1 Not tainted 6.8.0+ #359
+[ 1360.547882] Workqueue: events nf_tables_trans_destroy_work [nf_tables]
+[ 1360.547984] Call Trace:
+[ 1360.547991] <TASK>
+[ 1360.547998] dump_stack_lvl+0x53/0x70
+[ 1360.548014] print_report+0xc4/0x610
+[ 1360.548026] ? __virt_addr_valid+0xba/0x160
+[ 1360.548040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
+[ 1360.548054] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.548176] kasan_report+0xae/0xe0
+[ 1360.548189] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.548312] nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.548447] ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [nf_tables]
+[ 1360.548577] ? _raw_spin_unlock_irq+0x18/0x30
+[ 1360.548591] process_one_work+0x2f1/0x670
+[ 1360.548610] worker_thread+0x4d3/0x760
+[ 1360.548627] ? __pfx_worker_thread+0x10/0x10
+[ 1360.548640] kthread+0x16b/0x1b0
+[ 1360.548653] ? __pfx_kthread+0x10/0x10
+[ 1360.548665] ret_from_fork+0x2f/0x50
+[ 1360.548679] ? __pfx_kthread+0x10/0x10
+[ 1360.548690] ret_from_fork_asm+0x1a/0x30
+[ 1360.548707] </TASK>
+
+[ 1360.548719] Allocated by task 192061:
+[ 1360.548726] kasan_save_stack+0x20/0x40
+[ 1360.548739] kasan_save_track+0x14/0x30
+[ 1360.548750] __kasan_kmalloc+0x8f/0xa0
+[ 1360.548760] __kmalloc_node+0x1f1/0x450
+[ 1360.548771] nf_tables_newset+0x10c7/0x1b50 [nf_tables]
+[ 1360.548883] nfnetlink_rcv_batch+0xbc4/0xdc0 [nfnetlink]
+[ 1360.548909] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink]
+[ 1360.548927] netlink_unicast+0x367/0x4f0
+[ 1360.548935] netlink_sendmsg+0x34b/0x610
+[ 1360.548944] ____sys_sendmsg+0x4d4/0x510
+[ 1360.548953] ___sys_sendmsg+0xc9/0x120
+[ 1360.548961] __sys_sendmsg+0xbe/0x140
+[ 1360.548971] do_syscall_64+0x55/0x120
+[ 1360.548982] entry_SYSCALL_64_after_hwframe+0x55/0x5d
+
+[ 1360.548994] Freed by task 192222:
+[ 1360.548999] kasan_save_stack+0x20/0x40
+[ 1360.549009] kasan_save_track+0x14/0x30
+[ 1360.549019] kasan_save_free_info+0x3b/0x60
+[ 1360.549028] poison_slab_object+0x100/0x180
+[ 1360.549036] __kasan_slab_free+0x14/0x30
+[ 1360.549042] kfree+0xb6/0x260
+[ 1360.549049] __nft_release_table+0x473/0x6a0 [nf_tables]
+[ 1360.549131] nf_tables_exit_net+0x170/0x240 [nf_tables]
+[ 1360.549221] ops_exit_list+0x50/0xa0
+[ 1360.549229] free_exit_list+0x101/0x140
+[ 1360.549236] unregister_pernet_operations+0x107/0x160
+[ 1360.549245] unregister_pernet_subsys+0x1c/0x30
+[ 1360.549254] nf_tables_module_exit+0x43/0x80 [nf_tables]
+[ 1360.549345] __do_sys_delete_module+0x253/0x370
+[ 1360.549352] do_syscall_64+0x55/0x120
+[ 1360.549360] entry_SYSCALL_64_after_hwframe+0x55/0x5d
+
+(gdb) list *__nft_release_table+0x473
+0x1e033 is in __nft_release_table (net/netfilter/nf_tables_api.c:11354).
+11349 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+11350 list_del(&flowtable->list);
+11351 nft_use_dec(&table->use);
+11352 nf_tables_flowtable_destroy(flowtable);
+11353 }
+11354 list_for_each_entry_safe(set, ns, &table->sets, list) {
+11355 list_del(&set->list);
+11356 nft_use_dec(&table->use);
+11357 if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+11358 nft_map_deactivate(&ctx, set);
+(gdb)
+
+[ 1360.549372] Last potentially related work creation:
+[ 1360.549376] kasan_save_stack+0x20/0x40
+[ 1360.549384] __kasan_record_aux_stack+0x9b/0xb0
+[ 1360.549392] __queue_work+0x3fb/0x780
+[ 1360.549399] queue_work_on+0x4f/0x60
+[ 1360.549407] nft_rhash_remove+0x33b/0x340 [nf_tables]
+[ 1360.549516] nf_tables_commit+0x1c6a/0x2620 [nf_tables]
+[ 1360.549625] nfnetlink_rcv_batch+0x728/0xdc0 [nfnetlink]
+[ 1360.549647] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink]
+[ 1360.549671] netlink_unicast+0x367/0x4f0
+[ 1360.549680] netlink_sendmsg+0x34b/0x610
+[ 1360.549690] ____sys_sendmsg+0x4d4/0x510
+[ 1360.549697] ___sys_sendmsg+0xc9/0x120
+[ 1360.549706] __sys_sendmsg+0xbe/0x140
+[ 1360.549715] do_syscall_64+0x55/0x120
+[ 1360.549725] entry_SYSCALL_64_after_hwframe+0x55/0x5d
+
+Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -8476,6 +8476,7 @@ static void __exit nf_tables_module_exit
+ unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+ nft_chain_filter_fini();
+ nft_chain_route_fini();
++ nf_tables_trans_destroy_flush_work();
+ unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
+ cancel_work_sync(&trans_destroy_work);
diff --git a/queue-5.4/series b/queue-5.4/series
index e6e4aab934..caa725abfe 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -146,3 +146,6 @@ vfio-pci-create-persistent-intx-handler.patch
vfio-platform-create-persistent-irq-handlers.patch
revert-x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch
mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch
+netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
+netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
+bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch