diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-04-05 12:07:05 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-04-05 12:07:05 +0200 |
commit | a685af9cc132bd469bbde3728411ee68213be7bf (patch) | |
tree | 1f5067f0b89a17174f796d12a28dd83dc96a3602 /queue-5.4 | |
parent | b30efd7d6a93b735de02caaaa2d59728b975dbe4 (diff) | |
download | stable-queue-a685af9cc132bd469bbde3728411ee68213be7bf.tar.gz |
5.4-stable patches
added patches:
bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch
netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
Diffstat (limited to 'queue-5.4')
4 files changed, 260 insertions, 0 deletions
diff --git a/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch b/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch new file mode 100644 index 0000000000..90eaa4ea1b --- /dev/null +++ b/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch @@ -0,0 +1,74 @@ +From ff91059932401894e6c86341915615c5eb0eca48 Mon Sep 17 00:00:00 2001 +From: Jakub Sitnicki <jakub@cloudflare.com> +Date: Tue, 2 Apr 2024 12:46:21 +0200 +Subject: bpf, sockmap: Prevent lock inversion deadlock in map delete elem + +From: Jakub Sitnicki <jakub@cloudflare.com> + +commit ff91059932401894e6c86341915615c5eb0eca48 upstream. + +syzkaller started using corpuses where a BPF tracing program deletes +elements from a sockmap/sockhash map. Because BPF tracing programs can be +invoked from any interrupt context, locks taken during a map_delete_elem +operation must be hardirq-safe. Otherwise a deadlock due to lock inversion +is possible, as reported by lockdep: + + CPU0 CPU1 + ---- ---- + lock(&htab->buckets[i].lock); + local_irq_disable(); + lock(&host->lock); + lock(&htab->buckets[i].lock); + <Interrupt> + lock(&host->lock); + +Locks in sockmap are hardirq-unsafe by design. We expects elements to be +deleted from sockmap/sockhash only in task (normal) context with interrupts +enabled, or in softirq context. + +Detect when map_delete_elem operation is invoked from a context which is +_not_ hardirq-unsafe, that is interrupts are disabled, and bail out with an +error. + +Note that map updates are not affected by this issue. BPF verifier does not +allow updating sockmap/sockhash from a BPF tracing program today. + +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Reported-by: xingwei lee <xrivendell7@gmail.com> +Reported-by: yue sun <samsun1006219@gmail.com> +Reported-by: syzbot+bc922f476bd65abbd466@syzkaller.appspotmail.com +Reported-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com +Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Tested-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com +Acked-by: John Fastabend <john.fastabend@gmail.com> +Closes: https://syzkaller.appspot.com/bug?extid=d4066896495db380182e +Closes: https://syzkaller.appspot.com/bug?extid=bc922f476bd65abbd466 +Link: https://lore.kernel.org/bpf/20240402104621.1050319-1-jakub@cloudflare.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/core/sock_map.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -321,6 +321,9 @@ static int __sock_map_delete(struct bpf_ + struct sock *sk; + int err = 0; + ++ if (irqs_disabled()) ++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ ++ + raw_spin_lock_bh(&stab->lock); + sk = *psk; + if (!sk_test || sk_test == sk) +@@ -654,6 +657,9 @@ static int sock_hash_delete_elem(struct + struct bpf_htab_elem *elem; + int ret = -ENOENT; + ++ if (irqs_disabled()) ++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ ++ + hash = sock_hash_bucket_hash(key, key_size); + bucket = sock_hash_select_bucket(htab, hash); + diff --git a/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch b/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch new file mode 100644 index 0000000000..00d63e40ff --- /dev/null +++ b/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch @@ -0,0 +1,58 @@ +From 24225011d81b471acc0e1e315b7d9905459a6304 Mon Sep 17 00:00:00 2001 +From: Ziyang Xuan <william.xuanziyang@huawei.com> +Date: Wed, 3 Apr 2024 15:22:04 +0800 +Subject: netfilter: nf_tables: Fix potential data-race in __nft_flowtable_type_get() + +From: Ziyang Xuan <william.xuanziyang@huawei.com> + +commit 24225011d81b471acc0e1e315b7d9905459a6304 upstream. + +nft_unregister_flowtable_type() within nf_flow_inet_module_exit() can +concurrent with __nft_flowtable_type_get() within nf_tables_newflowtable(). +And thhere is not any protection when iterate over nf_tables_flowtables +list in __nft_flowtable_type_get(). Therefore, there is pertential +data-race of nf_tables_flowtables list entry. + +Use list_for_each_entry_rcu() to iterate over nf_tables_flowtables list +in __nft_flowtable_type_get(), and use rcu_read_lock() in the caller +nft_flowtable_type_get() to protect the entire type query process. + +Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") +Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/netfilter/nf_tables_api.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -6041,11 +6041,12 @@ static int nf_tables_flowtable_parse_hoo + return err; + } + ++/* call under rcu_read_lock */ + static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) + { + const struct nf_flowtable_type *type; + +- list_for_each_entry(type, &nf_tables_flowtables, list) { ++ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { + if (family == type->family) + return type; + } +@@ -6057,9 +6058,13 @@ nft_flowtable_type_get(struct net *net, + { + const struct nf_flowtable_type *type; + ++ rcu_read_lock(); + type = __nft_flowtable_type_get(family); +- if (type != NULL && try_module_get(type->owner)) ++ if (type != NULL && try_module_get(type->owner)) { ++ rcu_read_unlock(); + return type; ++ } ++ rcu_read_unlock(); + + lockdep_nfnl_nft_mutex_not_held(); + #ifdef CONFIG_MODULES diff --git a/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch b/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch new file mode 100644 index 0000000000..3b1b92a842 --- /dev/null +++ b/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch @@ -0,0 +1,125 @@ +From 24cea9677025e0de419989ecb692acd4bb34cac2 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 2 Apr 2024 18:04:36 +0200 +Subject: netfilter: nf_tables: flush pending destroy work before exit_net release + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +commit 24cea9677025e0de419989ecb692acd4bb34cac2 upstream. + +Similar to 2c9f0293280e ("netfilter: nf_tables: flush pending destroy +work before netlink notifier") to address a race between exit_net and +the destroy workqueue. + +The trace below shows an element to be released via destroy workqueue +while exit_net path (triggered via module removal) has already released +the set that is used in such transaction. + +[ 1360.547789] BUG: KASAN: slab-use-after-free in nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] +[ 1360.547861] Read of size 8 at addr ffff888140500cc0 by task kworker/4:1/152465 +[ 1360.547870] CPU: 4 PID: 152465 Comm: kworker/4:1 Not tainted 6.8.0+ #359 +[ 1360.547882] Workqueue: events nf_tables_trans_destroy_work [nf_tables] +[ 1360.547984] Call Trace: +[ 1360.547991] <TASK> +[ 1360.547998] dump_stack_lvl+0x53/0x70 +[ 1360.548014] print_report+0xc4/0x610 +[ 1360.548026] ? __virt_addr_valid+0xba/0x160 +[ 1360.548040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 +[ 1360.548054] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] +[ 1360.548176] kasan_report+0xae/0xe0 +[ 1360.548189] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] +[ 1360.548312] nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] +[ 1360.548447] ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [nf_tables] +[ 1360.548577] ? _raw_spin_unlock_irq+0x18/0x30 +[ 1360.548591] process_one_work+0x2f1/0x670 +[ 1360.548610] worker_thread+0x4d3/0x760 +[ 1360.548627] ? __pfx_worker_thread+0x10/0x10 +[ 1360.548640] kthread+0x16b/0x1b0 +[ 1360.548653] ? __pfx_kthread+0x10/0x10 +[ 1360.548665] ret_from_fork+0x2f/0x50 +[ 1360.548679] ? __pfx_kthread+0x10/0x10 +[ 1360.548690] ret_from_fork_asm+0x1a/0x30 +[ 1360.548707] </TASK> + +[ 1360.548719] Allocated by task 192061: +[ 1360.548726] kasan_save_stack+0x20/0x40 +[ 1360.548739] kasan_save_track+0x14/0x30 +[ 1360.548750] __kasan_kmalloc+0x8f/0xa0 +[ 1360.548760] __kmalloc_node+0x1f1/0x450 +[ 1360.548771] nf_tables_newset+0x10c7/0x1b50 [nf_tables] +[ 1360.548883] nfnetlink_rcv_batch+0xbc4/0xdc0 [nfnetlink] +[ 1360.548909] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] +[ 1360.548927] netlink_unicast+0x367/0x4f0 +[ 1360.548935] netlink_sendmsg+0x34b/0x610 +[ 1360.548944] ____sys_sendmsg+0x4d4/0x510 +[ 1360.548953] ___sys_sendmsg+0xc9/0x120 +[ 1360.548961] __sys_sendmsg+0xbe/0x140 +[ 1360.548971] do_syscall_64+0x55/0x120 +[ 1360.548982] entry_SYSCALL_64_after_hwframe+0x55/0x5d + +[ 1360.548994] Freed by task 192222: +[ 1360.548999] kasan_save_stack+0x20/0x40 +[ 1360.549009] kasan_save_track+0x14/0x30 +[ 1360.549019] kasan_save_free_info+0x3b/0x60 +[ 1360.549028] poison_slab_object+0x100/0x180 +[ 1360.549036] __kasan_slab_free+0x14/0x30 +[ 1360.549042] kfree+0xb6/0x260 +[ 1360.549049] __nft_release_table+0x473/0x6a0 [nf_tables] +[ 1360.549131] nf_tables_exit_net+0x170/0x240 [nf_tables] +[ 1360.549221] ops_exit_list+0x50/0xa0 +[ 1360.549229] free_exit_list+0x101/0x140 +[ 1360.549236] unregister_pernet_operations+0x107/0x160 +[ 1360.549245] unregister_pernet_subsys+0x1c/0x30 +[ 1360.549254] nf_tables_module_exit+0x43/0x80 [nf_tables] +[ 1360.549345] __do_sys_delete_module+0x253/0x370 +[ 1360.549352] do_syscall_64+0x55/0x120 +[ 1360.549360] entry_SYSCALL_64_after_hwframe+0x55/0x5d + +(gdb) list *__nft_release_table+0x473 +0x1e033 is in __nft_release_table (net/netfilter/nf_tables_api.c:11354). +11349 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { +11350 list_del(&flowtable->list); +11351 nft_use_dec(&table->use); +11352 nf_tables_flowtable_destroy(flowtable); +11353 } +11354 list_for_each_entry_safe(set, ns, &table->sets, list) { +11355 list_del(&set->list); +11356 nft_use_dec(&table->use); +11357 if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) +11358 nft_map_deactivate(&ctx, set); +(gdb) + +[ 1360.549372] Last potentially related work creation: +[ 1360.549376] kasan_save_stack+0x20/0x40 +[ 1360.549384] __kasan_record_aux_stack+0x9b/0xb0 +[ 1360.549392] __queue_work+0x3fb/0x780 +[ 1360.549399] queue_work_on+0x4f/0x60 +[ 1360.549407] nft_rhash_remove+0x33b/0x340 [nf_tables] +[ 1360.549516] nf_tables_commit+0x1c6a/0x2620 [nf_tables] +[ 1360.549625] nfnetlink_rcv_batch+0x728/0xdc0 [nfnetlink] +[ 1360.549647] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] +[ 1360.549671] netlink_unicast+0x367/0x4f0 +[ 1360.549680] netlink_sendmsg+0x34b/0x610 +[ 1360.549690] ____sys_sendmsg+0x4d4/0x510 +[ 1360.549697] ___sys_sendmsg+0xc9/0x120 +[ 1360.549706] __sys_sendmsg+0xbe/0x140 +[ 1360.549715] do_syscall_64+0x55/0x120 +[ 1360.549725] entry_SYSCALL_64_after_hwframe+0x55/0x5d + +Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/netfilter/nf_tables_api.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -8476,6 +8476,7 @@ static void __exit nf_tables_module_exit + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); + nft_chain_filter_fini(); + nft_chain_route_fini(); ++ nf_tables_trans_destroy_flush_work(); + unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); + cancel_work_sync(&trans_destroy_work); diff --git a/queue-5.4/series b/queue-5.4/series index e6e4aab934..caa725abfe 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -146,3 +146,6 @@ vfio-pci-create-persistent-intx-handler.patch vfio-platform-create-persistent-irq-handlers.patch revert-x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch +netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch +netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch +bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch |