aboutsummaryrefslogtreecommitdiffstats
path: root/queue-5.4
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-02-20 15:51:37 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-02-20 15:51:37 +0100
commitc4c746d2b3a1884470bb152879a59546f32d97b6 (patch)
tree2c989aa5b96c560e52e3b037cce5fc8f38512ef5 /queue-5.4
parent8e6df488109d1a8c344a30516897860472e8c599 (diff)
downloadstable-queue-c4c746d2b3a1884470bb152879a59546f32d97b6.tar.gz
5.4-stable patches
added patches: netfilter-ipset-fix-performance-regression-in-swap-operation.patch netfilter-ipset-missing-gc-cancellations-fixed.patch
Diffstat (limited to 'queue-5.4')
-rw-r--r--queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch287
-rw-r--r--queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch72
-rw-r--r--queue-5.4/series2
3 files changed, 361 insertions, 0 deletions
diff --git a/queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch b/queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch
new file mode 100644
index 0000000000..5c0006ad8a
--- /dev/null
+++ b/queue-5.4/netfilter-ipset-fix-performance-regression-in-swap-operation.patch
@@ -0,0 +1,287 @@
+From 97f7cf1cd80eeed3b7c808b7c12463295c751001 Mon Sep 17 00:00:00 2001
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+Date: Mon, 29 Jan 2024 10:57:01 +0100
+Subject: netfilter: ipset: fix performance regression in swap operation
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+commit 97f7cf1cd80eeed3b7c808b7c12463295c751001 upstream.
+
+The patch "netfilter: ipset: fix race condition between swap/destroy
+and kernel side add/del/test", commit 28628fa9 fixes a race condition.
+But the synchronize_rcu() added to the swap function unnecessarily slows
+it down: it can safely be moved to destroy and use call_rcu() instead.
+
+Eric Dumazet pointed out that simply calling the destroy functions as
+rcu callback does not work: sets with timeout use garbage collectors
+which need cancelling at destroy which can wait. Therefore the destroy
+functions are split into two: cancelling garbage collectors safely at
+executing the command received by netlink and moving the remaining
+part only into the rcu callback.
+
+Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/
+Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test")
+Reported-by: Ale Crismani <ale.crismani@automattic.com>
+Reported-by: David Wang <00107082@163.com>
+Tested-by: David Wang <00107082@163.com>
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netfilter/ipset/ip_set.h | 4 +++
+ net/netfilter/ipset/ip_set_bitmap_gen.h | 14 +++++++++---
+ net/netfilter/ipset/ip_set_core.c | 37 ++++++++++++++++++++++++--------
+ net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++++--
+ net/netfilter/ipset/ip_set_list_set.c | 13 ++++++++---
+ 5 files changed, 65 insertions(+), 18 deletions(-)
+
+--- a/include/linux/netfilter/ipset/ip_set.h
++++ b/include/linux/netfilter/ipset/ip_set.h
+@@ -188,6 +188,8 @@ struct ip_set_type_variant {
+ /* Return true if "b" set is the same as "a"
+ * according to the create set parameters */
+ bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
++ /* Cancel ongoing garbage collectors before destroying the set*/
++ void (*cancel_gc)(struct ip_set *set);
+ /* Region-locking is used */
+ bool region_lock;
+ };
+@@ -236,6 +238,8 @@ extern void ip_set_type_unregister(struc
+
+ /* A generic IP set */
+ struct ip_set {
++ /* For call_cru in destroy */
++ struct rcu_head rcu;
+ /* The name of the set */
+ char name[IPSET_MAXNAMELEN];
+ /* Lock protecting the set data */
+--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
++++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
+@@ -28,6 +28,7 @@
+ #define mtype_del IPSET_TOKEN(MTYPE, _del)
+ #define mtype_list IPSET_TOKEN(MTYPE, _list)
+ #define mtype_gc IPSET_TOKEN(MTYPE, _gc)
++#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
+ #define mtype MTYPE
+
+ #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
+@@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set)
+ {
+ struct mtype *map = set->data;
+
+- if (SET_WITH_TIMEOUT(set))
+- del_timer_sync(&map->gc);
+-
+ if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
+ ip_set_free(map->members);
+@@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t)
+ add_timer(&map->gc);
+ }
+
++static void
++mtype_cancel_gc(struct ip_set *set)
++{
++ struct mtype *map = set->data;
++
++ if (SET_WITH_TIMEOUT(set))
++ del_timer_sync(&map->gc);
++}
++
+ static const struct ip_set_type_variant mtype = {
+ .kadt = mtype_kadt,
+ .uadt = mtype_uadt,
+@@ -301,6 +308,7 @@ static const struct ip_set_type_variant
+ .head = mtype_head,
+ .list = mtype_list,
+ .same_set = mtype_same_set,
++ .cancel_gc = mtype_cancel_gc,
+ };
+
+ #endif /* __IP_SET_BITMAP_IP_GEN_H */
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -1034,6 +1034,14 @@ ip_set_destroy_set(struct ip_set *set)
+ kfree(set);
+ }
+
++static void
++ip_set_destroy_set_rcu(struct rcu_head *head)
++{
++ struct ip_set *set = container_of(head, struct ip_set, rcu);
++
++ ip_set_destroy_set(set);
++}
++
+ static int ip_set_destroy(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[],
+@@ -1047,8 +1055,6 @@ static int ip_set_destroy(struct net *ne
+ if (unlikely(protocol_min_failed(attr)))
+ return -IPSET_ERR_PROTOCOL;
+
+- /* Must wait for flush to be really finished in list:set */
+- rcu_barrier();
+
+ /* Commands are serialized and references are
+ * protected by the ip_set_ref_lock.
+@@ -1060,8 +1066,10 @@ static int ip_set_destroy(struct net *ne
+ * counter, so if it's already zero, we can proceed
+ * without holding the lock.
+ */
+- read_lock_bh(&ip_set_ref_lock);
+ if (!attr[IPSET_ATTR_SETNAME]) {
++ /* Must wait for flush to be really finished in list:set */
++ rcu_barrier();
++ read_lock_bh(&ip_set_ref_lock);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = ip_set(inst, i);
+ if (s && (s->ref || s->ref_netlink)) {
+@@ -1075,12 +1083,17 @@ static int ip_set_destroy(struct net *ne
+ s = ip_set(inst, i);
+ if (s) {
+ ip_set(inst, i) = NULL;
++ /* Must cancel garbage collectors */
++ s->variant->cancel_gc(s);
+ ip_set_destroy_set(s);
+ }
+ }
+ /* Modified by ip_set_destroy() only, which is serialized */
+ inst->is_destroyed = false;
+ } else {
++ u16 features = 0;
++
++ read_lock_bh(&ip_set_ref_lock);
+ s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+ &i);
+ if (!s) {
+@@ -1090,10 +1103,16 @@ static int ip_set_destroy(struct net *ne
+ ret = -IPSET_ERR_BUSY;
+ goto out;
+ }
++ features = s->type->features;
+ ip_set(inst, i) = NULL;
+ read_unlock_bh(&ip_set_ref_lock);
+-
+- ip_set_destroy_set(s);
++ if (features & IPSET_TYPE_NAME) {
++ /* Must wait for flush to be really finished */
++ rcu_barrier();
++ }
++ /* Must cancel garbage collectors */
++ s->variant->cancel_gc(s);
++ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
+ }
+ return 0;
+ out:
+@@ -1252,9 +1271,6 @@ static int ip_set_swap(struct net *net,
+ ip_set(inst, to_id) = from;
+ write_unlock_bh(&ip_set_ref_lock);
+
+- /* Make sure all readers of the old set pointers are completed. */
+- synchronize_rcu();
+-
+ return 0;
+ }
+
+@@ -2267,8 +2283,11 @@ ip_set_fini(void)
+ {
+ nf_unregister_sockopt(&so_set);
+ nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+-
+ unregister_pernet_subsys(&ip_set_net_ops);
++
++ /* Wait for call_rcu() in destroy */
++ rcu_barrier();
++
+ pr_debug("these are the famous last words\n");
+ }
+
+--- a/net/netfilter/ipset/ip_set_hash_gen.h
++++ b/net/netfilter/ipset/ip_set_hash_gen.h
+@@ -235,6 +235,7 @@ htable_size(u8 hbits)
+ #undef mtype_gc_do
+ #undef mtype_gc
+ #undef mtype_gc_init
++#undef mtype_cancel_gc
+ #undef mtype_variant
+ #undef mtype_data_match
+
+@@ -279,6 +280,7 @@ htable_size(u8 hbits)
+ #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
+ #define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+ #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
++#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
+ #define mtype_variant IPSET_TOKEN(MTYPE, _variant)
+ #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
+
+@@ -464,9 +466,6 @@ mtype_destroy(struct ip_set *set)
+ struct htype *h = set->data;
+ struct list_head *l, *lt;
+
+- if (SET_WITH_TIMEOUT(set))
+- cancel_delayed_work_sync(&h->gc.dwork);
+-
+ mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ list_for_each_safe(l, lt, &h->ad) {
+ list_del(l);
+@@ -613,6 +612,15 @@ mtype_gc_init(struct htable_gc *gc)
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
+ }
+
++static void
++mtype_cancel_gc(struct ip_set *set)
++{
++ struct htype *h = set->data;
++
++ if (SET_WITH_TIMEOUT(set))
++ cancel_delayed_work_sync(&h->gc.dwork);
++}
++
+ static int
+ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+@@ -1433,6 +1441,7 @@ static const struct ip_set_type_variant
+ .uref = mtype_uref,
+ .resize = mtype_resize,
+ .same_set = mtype_same_set,
++ .cancel_gc = mtype_cancel_gc,
+ .region_lock = true,
+ };
+
+--- a/net/netfilter/ipset/ip_set_list_set.c
++++ b/net/netfilter/ipset/ip_set_list_set.c
+@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
+ struct list_set *map = set->data;
+ struct set_elem *e, *n;
+
+- if (SET_WITH_TIMEOUT(set))
+- del_timer_sync(&map->gc);
+-
+ list_for_each_entry_safe(e, n, &map->members, list) {
+ list_del(&e->list);
+ ip_set_put_byindex(map->net, e->id);
+@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a
+ a->extensions == b->extensions;
+ }
+
++static void
++list_set_cancel_gc(struct ip_set *set)
++{
++ struct list_set *map = set->data;
++
++ if (SET_WITH_TIMEOUT(set))
++ del_timer_sync(&map->gc);
++}
++
+ static const struct ip_set_type_variant set_variant = {
+ .kadt = list_set_kadt,
+ .uadt = list_set_uadt,
+@@ -558,6 +564,7 @@ static const struct ip_set_type_variant
+ .head = list_set_head,
+ .list = list_set_list,
+ .same_set = list_set_same_set,
++ .cancel_gc = list_set_cancel_gc,
+ };
+
+ static void
diff --git a/queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch b/queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch
new file mode 100644
index 0000000000..2de96d9830
--- /dev/null
+++ b/queue-5.4/netfilter-ipset-missing-gc-cancellations-fixed.patch
@@ -0,0 +1,72 @@
+From 27c5a095e2518975e20a10102908ae8231699879 Mon Sep 17 00:00:00 2001
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+Date: Sun, 4 Feb 2024 16:26:42 +0100
+Subject: netfilter: ipset: Missing gc cancellations fixed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+commit 27c5a095e2518975e20a10102908ae8231699879 upstream.
+
+The patch fdb8e12cc2cc ("netfilter: ipset: fix performance regression
+in swap operation") missed to add the calls to gc cancellations
+at the error path of create operations and at module unload. Also,
+because the half of the destroy operations now executed by a
+function registered by call_rcu(), neither NFNL_SUBSYS_IPSET mutex
+or rcu read lock is held and therefore the checking of them results
+false warnings.
+
+Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation")
+Reported-by: syzbot+52bbc0ad036f6f0d4a25@syzkaller.appspotmail.com
+Reported-by: Brad Spengler <spender@grsecurity.net>
+Reported-by: Стас Ничипорович <stasn77@gmail.com>
+Tested-by: Brad Spengler <spender@grsecurity.net>
+Tested-by: Стас Ничипорович <stasn77@gmail.com>
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/ipset/ip_set_core.c | 2 ++
+ net/netfilter/ipset/ip_set_hash_gen.h | 4 ++--
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -1006,6 +1006,7 @@ static int ip_set_create(struct net *net
+ return ret;
+
+ cleanup:
++ set->variant->cancel_gc(set);
+ set->variant->destroy(set);
+ put_out:
+ module_put(set->type->me);
+@@ -2236,6 +2237,7 @@ ip_set_net_exit(struct net *net)
+ set = ip_set(inst, i);
+ if (set) {
+ ip_set(inst, i) = NULL;
++ set->variant->cancel_gc(set);
+ ip_set_destroy_set(set);
+ }
+ }
+--- a/net/netfilter/ipset/ip_set_hash_gen.h
++++ b/net/netfilter/ipset/ip_set_hash_gen.h
+@@ -446,7 +446,7 @@ mtype_ahash_destroy(struct ip_set *set,
+ u32 i;
+
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+- n = __ipset_dereference(hbucket(t, i));
++ n = (__force struct hbucket *)hbucket(t, i);
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
+@@ -466,7 +466,7 @@ mtype_destroy(struct ip_set *set)
+ struct htype *h = set->data;
+ struct list_head *l, *lt;
+
+- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
++ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
+ list_for_each_safe(l, lt, &h->ad) {
+ list_del(l);
+ kfree(l);
diff --git a/queue-5.4/series b/queue-5.4/series
index fd80d498f6..d62af7e616 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -250,3 +250,5 @@ bus-moxtet-add-spi-device-table.patch
arch-mm-remove-stale-mentions-of-disconigmem.patch
mips-fix-max_mapnr-being-uninitialized-on-early-stag.patch
kvm-arm64-vgic-its-avoid-potential-uaf-in-lpi-transl.patch
+netfilter-ipset-fix-performance-regression-in-swap-operation.patch
+netfilter-ipset-missing-gc-cancellations-fixed.patch