diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-02-20 15:51:37 +0100 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-02-20 15:51:37 +0100 |
commit | c4c746d2b3a1884470bb152879a59546f32d97b6 (patch) | |
tree | 2c989aa5b96c560e52e3b037cce5fc8f38512ef5 /queue-5.4 | |
parent | 8e6df488109d1a8c344a30516897860472e8c599 (diff) | |
download | stable-queue-c4c746d2b3a1884470bb152879a59546f32d97b6.tar.gz |
5.4-stable patches
added patches:
netfilter-ipset-fix-performance-regression-in-swap-operation.patch
netfilter-ipset-missing-gc-cancellations-fixed.patch
Diffstat (limited to 'queue-5.4')
-rw-r--r-- | queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch | 287 | ||||
-rw-r--r-- | queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch | 72 | ||||
-rw-r--r-- | queue-5.4/series | 2 |
3 files changed, 361 insertions, 0 deletions
diff --git a/queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch b/queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch new file mode 100644 index 0000000000..5c0006ad8a --- /dev/null +++ b/queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch @@ -0,0 +1,287 @@ +From 97f7cf1cd80eeed3b7c808b7c12463295c751001 Mon Sep 17 00:00:00 2001 +From: Jozsef Kadlecsik <kadlec@netfilter.org> +Date: Mon, 29 Jan 2024 10:57:01 +0100 +Subject: netfilter: ipset: fix performance regression in swap operation + +From: Jozsef Kadlecsik <kadlec@netfilter.org> + +commit 97f7cf1cd80eeed3b7c808b7c12463295c751001 upstream. + +The patch "netfilter: ipset: fix race condition between swap/destroy +and kernel side add/del/test", commit 28628fa9 fixes a race condition. +But the synchronize_rcu() added to the swap function unnecessarily slows +it down: it can safely be moved to destroy and use call_rcu() instead. + +Eric Dumazet pointed out that simply calling the destroy functions as +rcu callback does not work: sets with timeout use garbage collectors +which need cancelling at destroy which can wait. Therefore the destroy +functions are split into two: cancelling garbage collectors safely at +executing the command received by netlink and moving the remaining +part only into the rcu callback. + +Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ +Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") +Reported-by: Ale Crismani <ale.crismani@automattic.com> +Reported-by: David Wang <00107082@163.com> +Tested-by: David Wang <00107082@163.com> +Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/netfilter/ipset/ip_set.h | 4 +++ + net/netfilter/ipset/ip_set_bitmap_gen.h | 14 +++++++++--- + net/netfilter/ipset/ip_set_core.c | 37 ++++++++++++++++++++++++-------- + net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++++-- + net/netfilter/ipset/ip_set_list_set.c | 13 ++++++++--- + 5 files changed, 65 insertions(+), 18 deletions(-) + +--- a/include/linux/netfilter/ipset/ip_set.h ++++ b/include/linux/netfilter/ipset/ip_set.h +@@ -188,6 +188,8 @@ struct ip_set_type_variant { + /* Return true if "b" set is the same as "a" + * according to the create set parameters */ + bool (*same_set)(const struct ip_set *a, const struct ip_set *b); ++ /* Cancel ongoing garbage collectors before destroying the set*/ ++ void (*cancel_gc)(struct ip_set *set); + /* Region-locking is used */ + bool region_lock; + }; +@@ -236,6 +238,8 @@ extern void ip_set_type_unregister(struc + + /* A generic IP set */ + struct ip_set { ++ /* For call_cru in destroy */ ++ struct rcu_head rcu; + /* The name of the set */ + char name[IPSET_MAXNAMELEN]; + /* Lock protecting the set data */ +--- a/net/netfilter/ipset/ip_set_bitmap_gen.h ++++ b/net/netfilter/ipset/ip_set_bitmap_gen.h +@@ -28,6 +28,7 @@ + #define mtype_del IPSET_TOKEN(MTYPE, _del) + #define mtype_list IPSET_TOKEN(MTYPE, _list) + #define mtype_gc IPSET_TOKEN(MTYPE, _gc) ++#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) + #define mtype MTYPE + + #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) +@@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) + { + struct mtype *map = set->data; + +- if (SET_WITH_TIMEOUT(set)) +- del_timer_sync(&map->gc); +- + if (set->dsize && set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); + ip_set_free(map->members); +@@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) + add_timer(&map->gc); + } + ++static void ++mtype_cancel_gc(struct ip_set *set) ++{ ++ struct mtype *map = set->data; ++ ++ if (SET_WITH_TIMEOUT(set)) ++ del_timer_sync(&map->gc); ++} ++ + static const struct ip_set_type_variant mtype = { + .kadt = mtype_kadt, + .uadt = mtype_uadt, +@@ -301,6 +308,7 @@ static const struct ip_set_type_variant + .head = mtype_head, + .list = mtype_list, + .same_set = mtype_same_set, ++ .cancel_gc = mtype_cancel_gc, + }; + + #endif /* __IP_SET_BITMAP_IP_GEN_H */ +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1034,6 +1034,14 @@ ip_set_destroy_set(struct ip_set *set) + kfree(set); + } + ++static void ++ip_set_destroy_set_rcu(struct rcu_head *head) ++{ ++ struct ip_set *set = container_of(head, struct ip_set, rcu); ++ ++ ip_set_destroy_set(set); ++} ++ + static int ip_set_destroy(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], +@@ -1047,8 +1055,6 @@ static int ip_set_destroy(struct net *ne + if (unlikely(protocol_min_failed(attr))) + return -IPSET_ERR_PROTOCOL; + +- /* Must wait for flush to be really finished in list:set */ +- rcu_barrier(); + + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. +@@ -1060,8 +1066,10 @@ static int ip_set_destroy(struct net *ne + * counter, so if it's already zero, we can proceed + * without holding the lock. + */ +- read_lock_bh(&ip_set_ref_lock); + if (!attr[IPSET_ATTR_SETNAME]) { ++ /* Must wait for flush to be really finished in list:set */ ++ rcu_barrier(); ++ read_lock_bh(&ip_set_ref_lock); + for (i = 0; i < inst->ip_set_max; i++) { + s = ip_set(inst, i); + if (s && (s->ref || s->ref_netlink)) { +@@ -1075,12 +1083,17 @@ static int ip_set_destroy(struct net *ne + s = ip_set(inst, i); + if (s) { + ip_set(inst, i) = NULL; ++ /* Must cancel garbage collectors */ ++ s->variant->cancel_gc(s); + ip_set_destroy_set(s); + } + } + /* Modified by ip_set_destroy() only, which is serialized */ + inst->is_destroyed = false; + } else { ++ u16 features = 0; ++ ++ read_lock_bh(&ip_set_ref_lock); + s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &i); + if (!s) { +@@ -1090,10 +1103,16 @@ static int ip_set_destroy(struct net *ne + ret = -IPSET_ERR_BUSY; + goto out; + } ++ features = s->type->features; + ip_set(inst, i) = NULL; + read_unlock_bh(&ip_set_ref_lock); +- +- ip_set_destroy_set(s); ++ if (features & IPSET_TYPE_NAME) { ++ /* Must wait for flush to be really finished */ ++ rcu_barrier(); ++ } ++ /* Must cancel garbage collectors */ ++ s->variant->cancel_gc(s); ++ call_rcu(&s->rcu, ip_set_destroy_set_rcu); + } + return 0; + out: +@@ -1252,9 +1271,6 @@ static int ip_set_swap(struct net *net, + ip_set(inst, to_id) = from; + write_unlock_bh(&ip_set_ref_lock); + +- /* Make sure all readers of the old set pointers are completed. */ +- synchronize_rcu(); +- + return 0; + } + +@@ -2267,8 +2283,11 @@ ip_set_fini(void) + { + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); +- + unregister_pernet_subsys(&ip_set_net_ops); ++ ++ /* Wait for call_rcu() in destroy */ ++ rcu_barrier(); ++ + pr_debug("these are the famous last words\n"); + } + +--- a/net/netfilter/ipset/ip_set_hash_gen.h ++++ b/net/netfilter/ipset/ip_set_hash_gen.h +@@ -235,6 +235,7 @@ htable_size(u8 hbits) + #undef mtype_gc_do + #undef mtype_gc + #undef mtype_gc_init ++#undef mtype_cancel_gc + #undef mtype_variant + #undef mtype_data_match + +@@ -279,6 +280,7 @@ htable_size(u8 hbits) + #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) + #define mtype_gc IPSET_TOKEN(MTYPE, _gc) + #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) ++#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) + #define mtype_variant IPSET_TOKEN(MTYPE, _variant) + #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) + +@@ -464,9 +466,6 @@ mtype_destroy(struct ip_set *set) + struct htype *h = set->data; + struct list_head *l, *lt; + +- if (SET_WITH_TIMEOUT(set)) +- cancel_delayed_work_sync(&h->gc.dwork); +- + mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + list_for_each_safe(l, lt, &h->ad) { + list_del(l); +@@ -613,6 +612,15 @@ mtype_gc_init(struct htable_gc *gc) + queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); + } + ++static void ++mtype_cancel_gc(struct ip_set *set) ++{ ++ struct htype *h = set->data; ++ ++ if (SET_WITH_TIMEOUT(set)) ++ cancel_delayed_work_sync(&h->gc.dwork); ++} ++ + static int + mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); +@@ -1433,6 +1441,7 @@ static const struct ip_set_type_variant + .uref = mtype_uref, + .resize = mtype_resize, + .same_set = mtype_same_set, ++ .cancel_gc = mtype_cancel_gc, + .region_lock = true, + }; + +--- a/net/netfilter/ipset/ip_set_list_set.c ++++ b/net/netfilter/ipset/ip_set_list_set.c +@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) + struct list_set *map = set->data; + struct set_elem *e, *n; + +- if (SET_WITH_TIMEOUT(set)) +- del_timer_sync(&map->gc); +- + list_for_each_entry_safe(e, n, &map->members, list) { + list_del(&e->list); + ip_set_put_byindex(map->net, e->id); +@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a + a->extensions == b->extensions; + } + ++static void ++list_set_cancel_gc(struct ip_set *set) ++{ ++ struct list_set *map = set->data; ++ ++ if (SET_WITH_TIMEOUT(set)) ++ del_timer_sync(&map->gc); ++} ++ + static const struct ip_set_type_variant set_variant = { + .kadt = list_set_kadt, + .uadt = list_set_uadt, +@@ -558,6 +564,7 @@ static const struct ip_set_type_variant + .head = list_set_head, + .list = list_set_list, + .same_set = list_set_same_set, ++ .cancel_gc = list_set_cancel_gc, + }; + + static void diff --git a/queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch b/queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch new file mode 100644 index 0000000000..2de96d9830 --- /dev/null +++ b/queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch @@ -0,0 +1,72 @@ +From 27c5a095e2518975e20a10102908ae8231699879 Mon Sep 17 00:00:00 2001 +From: Jozsef Kadlecsik <kadlec@netfilter.org> +Date: Sun, 4 Feb 2024 16:26:42 +0100 +Subject: netfilter: ipset: Missing gc cancellations fixed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jozsef Kadlecsik <kadlec@netfilter.org> + +commit 27c5a095e2518975e20a10102908ae8231699879 upstream. + +The patch fdb8e12cc2cc ("netfilter: ipset: fix performance regression +in swap operation") missed to add the calls to gc cancellations +at the error path of create operations and at module unload. Also, +because the half of the destroy operations now executed by a +function registered by call_rcu(), neither NFNL_SUBSYS_IPSET mutex +or rcu read lock is held and therefore the checking of them results +false warnings. + +Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation") +Reported-by: syzbot+52bbc0ad036f6f0d4a25@syzkaller.appspotmail.com +Reported-by: Brad Spengler <spender@grsecurity.net> +Reported-by: Стас Ничипорович <stasn77@gmail.com> +Tested-by: Brad Spengler <spender@grsecurity.net> +Tested-by: Стас Ничипорович <stasn77@gmail.com> +Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/netfilter/ipset/ip_set_core.c | 2 ++ + net/netfilter/ipset/ip_set_hash_gen.h | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1006,6 +1006,7 @@ static int ip_set_create(struct net *net + return ret; + + cleanup: ++ set->variant->cancel_gc(set); + set->variant->destroy(set); + put_out: + module_put(set->type->me); +@@ -2236,6 +2237,7 @@ ip_set_net_exit(struct net *net) + set = ip_set(inst, i); + if (set) { + ip_set(inst, i) = NULL; ++ set->variant->cancel_gc(set); + ip_set_destroy_set(set); + } + } +--- a/net/netfilter/ipset/ip_set_hash_gen.h ++++ b/net/netfilter/ipset/ip_set_hash_gen.h +@@ -446,7 +446,7 @@ mtype_ahash_destroy(struct ip_set *set, + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { +- n = __ipset_dereference(hbucket(t, i)); ++ n = (__force struct hbucket *)hbucket(t, i); + if (!n) + continue; + if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) +@@ -466,7 +466,7 @@ mtype_destroy(struct ip_set *set) + struct htype *h = set->data; + struct list_head *l, *lt; + +- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); ++ mtype_ahash_destroy(set, (__force struct htable *)h->table, true); + list_for_each_safe(l, lt, &h->ad) { + list_del(l); + kfree(l); diff --git a/queue-5.4/series b/queue-5.4/series index fd80d498f6..d62af7e616 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -250,3 +250,5 @@ bus-moxtet-add-spi-device-table.patch arch-mm-remove-stale-mentions-of-disconigmem.patch mips-fix-max_mapnr-being-uninitialized-on-early-stag.patch kvm-arm64-vgic-its-avoid-potential-uaf-in-lpi-transl.patch +netfilter-ipset-fix-performance-regression-in-swap-operation.patch +netfilter-ipset-missing-gc-cancellations-fixed.patch |