diff options
author | Sasha Levin <sashal@kernel.org> | 2024-04-15 04:56:18 -0400 |
---|---|---|
committer | Sasha Levin <sashal@kernel.org> | 2024-04-15 04:56:18 -0400 |
commit | 26151b8eb2bb8b0d67b7a1948bf5c5a30b2e38ce (patch) | |
tree | 5c47fc692522c4f6f072e29be31931cfbd372a45 | |
parent | 7f87a3cd95e1c51fd624a4d3ed5bb7cc0da611e1 (diff) | |
download | stable-queue-26151b8eb2bb8b0d67b7a1948bf5c5a30b2e38ce.tar.gz |
Fixes for 5.4
Signed-off-by: Sasha Levin <sashal@kernel.org>
16 files changed, 1650 insertions, 0 deletions
diff --git a/queue-5.4/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch b/queue-5.4/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch new file mode 100644 index 0000000000..63ac8b4889 --- /dev/null +++ b/queue-5.4/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch @@ -0,0 +1,147 @@ +From 9728e96aea3c32621dbcf8d7b8945a6548a50f61 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 23 Jan 2024 09:08:53 -0800 +Subject: af_unix: Do not use atomic ops for unix_sk(sk)->inflight. + +From: Kuniyuki Iwashima <kuniyu@amazon.com> + +[ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ] + +When touching unix_sk(sk)->inflight, we are always under +spin_lock(&unix_gc_lock). + +Let's convert unix_sk(sk)->inflight to the normal unsigned long. + +Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> +Reviewed-by: Simon Horman <horms@kernel.org> +Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/af_unix.h | 2 +- + net/unix/af_unix.c | 4 ++-- + net/unix/garbage.c | 17 ++++++++--------- + net/unix/scm.c | 8 +++++--- + 4 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/include/net/af_unix.h b/include/net/af_unix.h +index 6cb5026cf7272..e7612295b2626 100644 +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -52,7 +52,7 @@ struct unix_sock { + struct mutex iolock, bindlock; + struct sock *peer; + struct list_head link; +- atomic_long_t inflight; ++ unsigned long inflight; + spinlock_t lock; + unsigned long gc_flags; + #define UNIX_GC_CANDIDATE 0 +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 9b1dd845bca19..53335989a6f0c 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -809,11 +809,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) + sk->sk_write_space = unix_write_space; + sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; + sk->sk_destruct = unix_sock_destructor; +- u = unix_sk(sk); ++ u = unix_sk(sk); ++ u->inflight = 0; + u->path.dentry = NULL; + u->path.mnt = NULL; + spin_lock_init(&u->lock); +- atomic_long_set(&u->inflight, 0); + INIT_LIST_HEAD(&u->link); + mutex_init(&u->iolock); /* single task reading lock */ + mutex_init(&u->bindlock); /* single task binding lock */ +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 9121a4d5436d5..675fbe594dbb3 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), + + static void dec_inflight(struct unix_sock *usk) + { +- atomic_long_dec(&usk->inflight); ++ usk->inflight--; + } + + static void inc_inflight(struct unix_sock *usk) + { +- atomic_long_inc(&usk->inflight); ++ usk->inflight++; + } + + static void inc_inflight_move_tail(struct unix_sock *u) + { +- atomic_long_inc(&u->inflight); ++ u->inflight++; ++ + /* If this still might be part of a cycle, move it to the end + * of the list, so that it's checked even if it was already + * passed over +@@ -237,14 +238,12 @@ void unix_gc(void) + */ + list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + long total_refs; +- long inflight_refs; + + total_refs = file_count(u->sk.sk_socket->file); +- inflight_refs = atomic_long_read(&u->inflight); + +- BUG_ON(inflight_refs < 1); +- BUG_ON(total_refs < inflight_refs); +- if (total_refs == inflight_refs) { ++ BUG_ON(!u->inflight); ++ BUG_ON(total_refs < u->inflight); ++ if (total_refs == u->inflight) { + list_move_tail(&u->link, &gc_candidates); + __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); + __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); +@@ -271,7 +270,7 @@ void unix_gc(void) + /* Move cursor to after the current position. */ + list_move(&cursor, &u->link); + +- if (atomic_long_read(&u->inflight) > 0) { ++ if (u->inflight) { + list_move_tail(&u->link, ¬_cycle_list); + __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + scan_children(&u->sk, inc_inflight_move_tail, NULL); +diff --git a/net/unix/scm.c b/net/unix/scm.c +index 51b623de3be5f..785e8c4669e23 100644 +--- a/net/unix/scm.c ++++ b/net/unix/scm.c +@@ -51,12 +51,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) + if (s) { + struct unix_sock *u = unix_sk(s); + +- if (atomic_long_inc_return(&u->inflight) == 1) { ++ if (!u->inflight) { + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); + } else { + BUG_ON(list_empty(&u->link)); + } ++ u->inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); + } +@@ -73,10 +74,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) + if (s) { + struct unix_sock *u = unix_sk(s); + +- BUG_ON(!atomic_long_read(&u->inflight)); ++ BUG_ON(!u->inflight); + BUG_ON(list_empty(&u->link)); + +- if (atomic_long_dec_and_test(&u->inflight)) ++ u->inflight--; ++ if (!u->inflight) + list_del_init(&u->link); + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); +-- +2.43.0 + diff --git a/queue-5.4/af_unix-fix-garbage-collector-racing-against-connect.patch b/queue-5.4/af_unix-fix-garbage-collector-racing-against-connect.patch new file mode 100644 index 0000000000..81d607cf17 --- /dev/null +++ b/queue-5.4/af_unix-fix-garbage-collector-racing-against-connect.patch @@ -0,0 +1,122 @@ +From 3a08abbc29ff0325ab2e8cc9eff1d2278778cdd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 22:09:39 +0200 +Subject: af_unix: Fix garbage collector racing against connect() + +From: Michal Luczaj <mhal@rbox.co> + +[ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ] + +Garbage collector does not take into account the risk of embryo getting +enqueued during the garbage collection. If such embryo has a peer that +carries SCM_RIGHTS, two consecutive passes of scan_children() may see a +different set of children. Leading to an incorrectly elevated inflight +count, and then a dangling pointer within the gc_inflight_list. + +sockets are AF_UNIX/SOCK_STREAM +S is an unconnected socket +L is a listening in-flight socket bound to addr, not in fdtable +V's fd will be passed via sendmsg(), gets inflight count bumped + +connect(S, addr) sendmsg(S, [V]); close(V) __unix_gc() +---------------- ------------------------- ----------- + +NS = unix_create1() +skb1 = sock_wmalloc(NS) +L = unix_find_other(addr) +unix_state_lock(L) +unix_peer(S) = NS + // V count=1 inflight=0 + + NS = unix_peer(S) + skb2 = sock_alloc() + skb_queue_tail(NS, skb2[V]) + + // V became in-flight + // V count=2 inflight=1 + + close(V) + + // V count=1 inflight=1 + // GC candidate condition met + + for u in gc_inflight_list: + if (total_refs == inflight_refs) + add u to gc_candidates + + // gc_candidates={L, V} + + for u in gc_candidates: + scan_children(u, dec_inflight) + + // embryo (skb1) was not + // reachable from L yet, so V's + // inflight remains unchanged +__skb_queue_tail(L, skb1) +unix_state_unlock(L) + for u in gc_candidates: + if (u.inflight) + scan_children(u, inc_inflight_move_tail) + + // V count=1 inflight=2 (!) + +If there is a GC-candidate listening socket, lock/unlock its state. This +makes GC wait until the end of any ongoing connect() to that socket. After +flipping the lock, a possibly SCM-laden embryo is already enqueued. And if +there is another embryo coming, it can not possibly carry SCM_RIGHTS. At +this point, unix_inflight() can not happen because unix_gc_lock is already +taken. Inflight graph remains unaffected. + +Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.") +Signed-off-by: Michal Luczaj <mhal@rbox.co> +Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> +Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/unix/garbage.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 675fbe594dbb3..58525311e903a 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -235,11 +235,22 @@ void unix_gc(void) + * receive queues. Other, non candidate sockets _can_ be + * added to queue, so we must make sure only to touch + * candidates. ++ * ++ * Embryos, though never candidates themselves, affect which ++ * candidates are reachable by the garbage collector. Before ++ * being added to a listener's queue, an embryo may already ++ * receive data carrying SCM_RIGHTS, potentially making the ++ * passed socket a candidate that is not yet reachable by the ++ * collector. It becomes reachable once the embryo is ++ * enqueued. Therefore, we must ensure that no SCM-laden ++ * embryo appears in a (candidate) listener's queue between ++ * consecutive scan_children() calls. + */ + list_for_each_entry_safe(u, next, &gc_inflight_list, link) { ++ struct sock *sk = &u->sk; + long total_refs; + +- total_refs = file_count(u->sk.sk_socket->file); ++ total_refs = file_count(sk->sk_socket->file); + + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); +@@ -247,6 +258,11 @@ void unix_gc(void) + list_move_tail(&u->link, &gc_candidates); + __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); + __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); ++ ++ if (sk->sk_state == TCP_LISTEN) { ++ unix_state_lock(sk); ++ unix_state_unlock(sk); ++ } + } + } + +-- +2.43.0 + diff --git a/queue-5.4/geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch b/queue-5.4/geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch new file mode 100644 index 0000000000..8addf5180f --- /dev/null +++ b/queue-5.4/geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch @@ -0,0 +1,166 @@ +From decfdb7f1f82d138f5300ecd7d0ba10f1f0d91ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 5 Apr 2024 10:30:34 +0000 +Subject: geneve: fix header validation in geneve[6]_xmit_skb + +From: Eric Dumazet <edumazet@google.com> + +[ Upstream commit d8a6213d70accb403b82924a1c229e733433a5ef ] + +syzbot is able to trigger an uninit-value in geneve_xmit() [1] + +Problem : While most ip tunnel helpers (like ip_tunnel_get_dsfield()) +uses skb_protocol(skb, true), pskb_inet_may_pull() is only using +skb->protocol. + +If anything else than ETH_P_IPV6 or ETH_P_IP is found in skb->protocol, +pskb_inet_may_pull() does nothing at all. + +If a vlan tag was provided by the caller (af_packet in the syzbot case), +the network header might not point to the correct location, and skb +linear part could be smaller than expected. + +Add skb_vlan_inet_prepare() to perform a complete mac validation. + +Use this in geneve for the moment, I suspect we need to adopt this +more broadly. + +v4 - Jakub reported v3 broke l2_tos_ttl_inherit.sh selftest + - Only call __vlan_get_protocol() for vlan types. +Link: https://lore.kernel.org/netdev/20240404100035.3270a7d5@kernel.org/ + +v2,v3 - Addressed Sabrina comments on v1 and v2 +Link: https://lore.kernel.org/netdev/Zg1l9L2BNoZWZDZG@hog/ + +[1] + +BUG: KMSAN: uninit-value in geneve_xmit_skb drivers/net/geneve.c:910 [inline] + BUG: KMSAN: uninit-value in geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030 + geneve_xmit_skb drivers/net/geneve.c:910 [inline] + geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030 + __netdev_start_xmit include/linux/netdevice.h:4903 [inline] + netdev_start_xmit include/linux/netdevice.h:4917 [inline] + xmit_one net/core/dev.c:3531 [inline] + dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3547 + __dev_queue_xmit+0x348d/0x52c0 net/core/dev.c:4335 + dev_queue_xmit include/linux/netdevice.h:3091 [inline] + packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 + packet_snd net/packet/af_packet.c:3081 [inline] + packet_sendmsg+0x8bb0/0x9ef0 net/packet/af_packet.c:3113 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg+0x30f/0x380 net/socket.c:745 + __sys_sendto+0x685/0x830 net/socket.c:2191 + __do_sys_sendto net/socket.c:2203 [inline] + __se_sys_sendto net/socket.c:2199 [inline] + __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199 + do_syscall_64+0xd5/0x1f0 + entry_SYSCALL_64_after_hwframe+0x6d/0x75 + +Uninit was created at: + slab_post_alloc_hook mm/slub.c:3804 [inline] + slab_alloc_node mm/slub.c:3845 [inline] + kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888 + kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577 + __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668 + alloc_skb include/linux/skbuff.h:1318 [inline] + alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504 + sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795 + packet_alloc_skb net/packet/af_packet.c:2930 [inline] + packet_snd net/packet/af_packet.c:3024 [inline] + packet_sendmsg+0x722d/0x9ef0 net/packet/af_packet.c:3113 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg+0x30f/0x380 net/socket.c:745 + __sys_sendto+0x685/0x830 net/socket.c:2191 + __do_sys_sendto net/socket.c:2203 [inline] + __se_sys_sendto net/socket.c:2199 [inline] + __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199 + do_syscall_64+0xd5/0x1f0 + entry_SYSCALL_64_after_hwframe+0x6d/0x75 + +CPU: 0 PID: 5033 Comm: syz-executor346 Not tainted 6.9.0-rc1-syzkaller-00005-g928a87efa423 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 + +Fixes: d13f048dd40e ("net: geneve: modify IP header check in geneve6_xmit_skb and geneve_xmit_skb") +Reported-by: syzbot+9ee20ec1de7b3168db09@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/000000000000d19c3a06152f9ee4@google.com/ +Signed-off-by: Eric Dumazet <edumazet@google.com> +Cc: Phillip Potter <phil@philpotter.co.uk> +Cc: Sabrina Dubroca <sd@queasysnail.net> +Reviewed-by: Sabrina Dubroca <sd@queasysnail.net> +Reviewed-by: Phillip Potter <phil@philpotter.co.uk> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/geneve.c | 4 ++-- + include/net/ip_tunnels.h | 33 +++++++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c +index f932e4120cb6c..961cbd2b377d1 100644 +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -904,7 +904,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + __be16 sport; + int err; + +- if (!pskb_inet_may_pull(skb)) ++ if (!skb_vlan_inet_prepare(skb)) + return -EINVAL; + + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); +@@ -970,7 +970,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + __be16 sport; + int err; + +- if (!pskb_inet_may_pull(skb)) ++ if (!skb_vlan_inet_prepare(skb)) + return -EINVAL; + + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); +diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h +index 36376f8b84dac..3a04e2ccfb393 100644 +--- a/include/net/ip_tunnels.h ++++ b/include/net/ip_tunnels.h +@@ -329,6 +329,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) + return pskb_network_may_pull(skb, nhlen); + } + ++/* Variant of pskb_inet_may_pull(). ++ */ ++static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) ++{ ++ int nhlen = 0, maclen = ETH_HLEN; ++ __be16 type = skb->protocol; ++ ++ /* Essentially this is skb_protocol(skb, true) ++ * And we get MAC len. ++ */ ++ if (eth_type_vlan(type)) ++ type = __vlan_get_protocol(skb, type, &maclen); ++ ++ switch (type) { ++#if IS_ENABLED(CONFIG_IPV6) ++ case htons(ETH_P_IPV6): ++ nhlen = sizeof(struct ipv6hdr); ++ break; ++#endif ++ case htons(ETH_P_IP): ++ nhlen = sizeof(struct iphdr); ++ break; ++ } ++ /* For ETH_P_IPV6/ETH_P_IP we make sure to pull ++ * a base network header in skb->head. ++ */ ++ if (!pskb_may_pull(skb, maclen + nhlen)) ++ return false; ++ ++ skb_set_network_header(skb, maclen); ++ return true; ++} ++ + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) + { + const struct ip_tunnel_encap_ops *ops; +-- +2.43.0 + diff --git a/queue-5.4/ipv4-route-avoid-unused-but-set-variable-warning.patch b/queue-5.4/ipv4-route-avoid-unused-but-set-variable-warning.patch new file mode 100644 index 0000000000..32b7647a15 --- /dev/null +++ b/queue-5.4/ipv4-route-avoid-unused-but-set-variable-warning.patch @@ -0,0 +1,51 @@ +From 94fbe22eb7fb3bf222346dc9a9be5cc08bcae77b Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Mon, 8 Apr 2024 09:42:03 +0200 +Subject: ipv4/route: avoid unused-but-set-variable warning + +From: Arnd Bergmann <arnd@arndb.de> + +[ Upstream commit cf1b7201df59fb936f40f4a807433fe3f2ce310a ] + +The log_martians variable is only used in an #ifdef, causing a 'make W=1' +warning with gcc: + +net/ipv4/route.c: In function 'ip_rt_send_redirect': +net/ipv4/route.c:880:13: error: variable 'log_martians' set but not used [-Werror=unused-but-set-variable] + +Change the #ifdef to an equivalent IS_ENABLED() to let the compiler +see where the variable is used. + +Fixes: 30038fc61adf ("net: ip_rt_send_redirect() optimization") +Reviewed-by: David Ahern <dsahern@kernel.org> +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://lore.kernel.org/r/20240408074219.3030256-2-arnd@kernel.org +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/ipv4/route.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 902296ef3e5aa..5b008d838e2b9 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -945,13 +945,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); + peer->rate_last = jiffies; + ++peer->n_redirects; +-#ifdef CONFIG_IP_ROUTE_VERBOSE +- if (log_martians && ++ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && + peer->n_redirects == ip_rt_redirect_number) + net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", + &ip_hdr(skb)->saddr, inet_iif(skb), + &ip_hdr(skb)->daddr, &gw); +-#endif + } + out_put_peer: + inet_putpeer(peer); +-- +2.43.0 + diff --git a/queue-5.4/ipv6-fib-hide-unused-pn-variable.patch b/queue-5.4/ipv6-fib-hide-unused-pn-variable.patch new file mode 100644 index 0000000000..10d4ad8472 --- /dev/null +++ b/queue-5.4/ipv6-fib-hide-unused-pn-variable.patch @@ -0,0 +1,60 @@ +From d28465ef0860ad7c0f0f3bcd529dc1acd8348645 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Mon, 8 Apr 2024 09:42:02 +0200 +Subject: ipv6: fib: hide unused 'pn' variable + +From: Arnd Bergmann <arnd@arndb.de> + +[ Upstream commit 74043489fcb5e5ca4074133582b5b8011b67f9e7 ] + +When CONFIG_IPV6_SUBTREES is disabled, the only user is hidden, causing +a 'make W=1' warning: + +net/ipv6/ip6_fib.c: In function 'fib6_add': +net/ipv6/ip6_fib.c:1388:32: error: variable 'pn' set but not used [-Werror=unused-but-set-variable] + +Add another #ifdef around the variable declaration, matching the other +uses in this file. + +Fixes: 66729e18df08 ("[IPV6] ROUTE: Make sure we have fn->leaf when adding a node on subtree.") +Link: https://lore.kernel.org/netdev/20240322131746.904943-1-arnd@kernel.org/ +Reviewed-by: David Ahern <dsahern@kernel.org> +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://lore.kernel.org/r/20240408074219.3030256-1-arnd@kernel.org +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/ipv6/ip6_fib.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c +index 7d593e50977cf..3afc32fe9b07b 100644 +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1307,7 +1307,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) + { + struct fib6_table *table = rt->fib6_table; +- struct fib6_node *fn, *pn = NULL; ++ struct fib6_node *fn; ++#ifdef CONFIG_IPV6_SUBTREES ++ struct fib6_node *pn = NULL; ++#endif + int err = -ENOMEM; + int allow_create = 1; + int replace_required = 0; +@@ -1331,9 +1334,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, + goto out; + } + ++#ifdef CONFIG_IPV6_SUBTREES + pn = fn; + +-#ifdef CONFIG_IPV6_SUBTREES + if (rt->fib6_src.plen) { + struct fib6_node *sn; + +-- +2.43.0 + diff --git a/queue-5.4/ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch b/queue-5.4/ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch new file mode 100644 index 0000000000..0ac4fd36c9 --- /dev/null +++ b/queue-5.4/ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch @@ -0,0 +1,133 @@ +From b518c1d658f3c2089500eb848c6d6b186f4f9427 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Mon, 8 Apr 2024 16:18:21 +0200 +Subject: ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr + +From: Jiri Benc <jbenc@redhat.com> + +[ Upstream commit 7633c4da919ad51164acbf1aa322cc1a3ead6129 ] + +Although ipv6_get_ifaddr walks inet6_addr_lst under the RCU lock, it +still means hlist_for_each_entry_rcu can return an item that got removed +from the list. The memory itself of such item is not freed thanks to RCU +but nothing guarantees the actual content of the memory is sane. + +In particular, the reference count can be zero. This can happen if +ipv6_del_addr is called in parallel. ipv6_del_addr removes the entry +from inet6_addr_lst (hlist_del_init_rcu(&ifp->addr_lst)) and drops all +references (__in6_ifa_put(ifp) + in6_ifa_put(ifp)). With bad enough +timing, this can happen: + +1. In ipv6_get_ifaddr, hlist_for_each_entry_rcu returns an entry. + +2. Then, the whole ipv6_del_addr is executed for the given entry. The + reference count drops to zero and kfree_rcu is scheduled. + +3. ipv6_get_ifaddr continues and tries to increments the reference count + (in6_ifa_hold). + +4. The rcu is unlocked and the entry is freed. + +5. The freed entry is returned. + +Prevent increasing of the reference count in such case. The name +in6_ifa_hold_safe is chosen to mimic the existing fib6_info_hold_safe. + +[ 41.506330] refcount_t: addition on 0; use-after-free. +[ 41.506760] WARNING: CPU: 0 PID: 595 at lib/refcount.c:25 refcount_warn_saturate+0xa5/0x130 +[ 41.507413] Modules linked in: veth bridge stp llc +[ 41.507821] CPU: 0 PID: 595 Comm: python3 Not tainted 6.9.0-rc2.main-00208-g49563be82afa #14 +[ 41.508479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) +[ 41.509163] RIP: 0010:refcount_warn_saturate+0xa5/0x130 +[ 41.509586] Code: ad ff 90 0f 0b 90 90 c3 cc cc cc cc 80 3d c0 30 ad 01 00 75 a0 c6 05 b7 30 ad 01 01 90 48 c7 c7 38 cc 7a 8c e8 cc 18 ad ff 90 <0f> 0b 90 90 c3 cc cc cc cc 80 3d 98 30 ad 01 00 0f 85 75 ff ff ff +[ 41.510956] RSP: 0018:ffffbda3c026baf0 EFLAGS: 00010282 +[ 41.511368] RAX: 0000000000000000 RBX: ffff9e9c46914800 RCX: 0000000000000000 +[ 41.511910] RDX: ffff9e9c7ec29c00 RSI: ffff9e9c7ec1c900 RDI: ffff9e9c7ec1c900 +[ 41.512445] RBP: ffff9e9c43660c9c R08: 0000000000009ffb R09: 00000000ffffdfff +[ 41.512998] R10: 00000000ffffdfff R11: ffffffff8ca58a40 R12: ffff9e9c4339a000 +[ 41.513534] R13: 0000000000000001 R14: ffff9e9c438a0000 R15: ffffbda3c026bb48 +[ 41.514086] FS: 00007fbc4cda1740(0000) GS:ffff9e9c7ec00000(0000) knlGS:0000000000000000 +[ 41.514726] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 41.515176] CR2: 000056233b337d88 CR3: 000000000376e006 CR4: 0000000000370ef0 +[ 41.515713] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 41.516252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 41.516799] Call Trace: +[ 41.517037] <TASK> +[ 41.517249] ? __warn+0x7b/0x120 +[ 41.517535] ? refcount_warn_saturate+0xa5/0x130 +[ 41.517923] ? report_bug+0x164/0x190 +[ 41.518240] ? handle_bug+0x3d/0x70 +[ 41.518541] ? exc_invalid_op+0x17/0x70 +[ 41.520972] ? asm_exc_invalid_op+0x1a/0x20 +[ 41.521325] ? refcount_warn_saturate+0xa5/0x130 +[ 41.521708] ipv6_get_ifaddr+0xda/0xe0 +[ 41.522035] inet6_rtm_getaddr+0x342/0x3f0 +[ 41.522376] ? __pfx_inet6_rtm_getaddr+0x10/0x10 +[ 41.522758] rtnetlink_rcv_msg+0x334/0x3d0 +[ 41.523102] ? netlink_unicast+0x30f/0x390 +[ 41.523445] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 +[ 41.523832] netlink_rcv_skb+0x53/0x100 +[ 41.524157] netlink_unicast+0x23b/0x390 +[ 41.524484] netlink_sendmsg+0x1f2/0x440 +[ 41.524826] __sys_sendto+0x1d8/0x1f0 +[ 41.525145] __x64_sys_sendto+0x1f/0x30 +[ 41.525467] do_syscall_64+0xa5/0x1b0 +[ 41.525794] entry_SYSCALL_64_after_hwframe+0x72/0x7a +[ 41.526213] RIP: 0033:0x7fbc4cfcea9a +[ 41.526528] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 +[ 41.527942] RSP: 002b:00007ffcf54012a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +[ 41.528593] RAX: ffffffffffffffda RBX: 00007ffcf5401368 RCX: 00007fbc4cfcea9a +[ 41.529173] RDX: 000000000000002c RSI: 00007fbc4b9d9bd0 RDI: 0000000000000005 +[ 41.529786] RBP: 00007fbc4bafb040 R08: 00007ffcf54013e0 R09: 000000000000000c +[ 41.530375] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +[ 41.530977] R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007fbc4ca85d1b +[ 41.531573] </TASK> + +Fixes: 5c578aedcb21d ("IPv6: convert addrconf hash list to RCU") +Reviewed-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: David Ahern <dsahern@kernel.org> +Signed-off-by: Jiri Benc <jbenc@redhat.com> +Link: https://lore.kernel.org/r/8ab821e36073a4a406c50ec83c9e8dc586c539e4.1712585809.git.jbenc@redhat.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/addrconf.h | 4 ++++ + net/ipv6/addrconf.c | 7 ++++--- + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/include/net/addrconf.h b/include/net/addrconf.h +index 32685eaba28fc..95990fa93d2f0 100644 +--- a/include/net/addrconf.h ++++ b/include/net/addrconf.h +@@ -431,6 +431,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) + refcount_inc(&ifp->refcnt); + } + ++static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) ++{ ++ return refcount_inc_not_zero(&ifp->refcnt); ++} + + /* + * compute link-local solicited-node multicast address +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 974e650e749e6..2720e5d931e8a 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -2026,9 +2026,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add + if (ipv6_addr_equal(&ifp->addr, addr)) { + if (!dev || ifp->idev->dev == dev || + !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { +- result = ifp; +- in6_ifa_hold(ifp); +- break; ++ if (in6_ifa_hold_safe(ifp)) { ++ result = ifp; ++ break; ++ } + } + } + } +-- +2.43.0 + diff --git a/queue-5.4/net-ena-fix-potential-sign-extension-issue.patch b/queue-5.4/net-ena-fix-potential-sign-extension-issue.patch new file mode 100644 index 0000000000..7fcca68cd1 --- /dev/null +++ b/queue-5.4/net-ena-fix-potential-sign-extension-issue.patch @@ -0,0 +1,66 @@ +From 2dd8d79f9feedb6ea969c2398bc73ca307f5509c Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 09:13:55 +0000 +Subject: net: ena: Fix potential sign extension issue + +From: David Arinzon <darinzon@amazon.com> + +[ Upstream commit 713a85195aad25d8a26786a37b674e3e5ec09e3c ] + +Small unsigned types are promoted to larger signed types in +the case of multiplication, the result of which may overflow. +In case the result of such a multiplication has its MSB +turned on, it will be sign extended with '1's. +This changes the multiplication result. + +Code example of the phenomenon: +------------------------------- +u16 x, y; +size_t z1, z2; + +x = y = 0xffff; +printk("x=%x y=%x\n",x,y); + +z1 = x*y; +z2 = (size_t)x*y; + +printk("z1=%lx z2=%lx\n", z1, z2); + +Output: +------- +x=ffff y=ffff +z1=fffffffffffe0001 z2=fffe0001 + +The expected result of ffff*ffff is fffe0001, and without the +explicit casting to avoid the unwanted sign extension we got +fffffffffffe0001. + +This commit adds an explicit casting to avoid the sign extension +issue. + +Fixes: 689b2bdaaa14 ("net: ena: add functions for handling Low Latency Queues in ena_com") +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David Arinzon <darinzon@amazon.com> +Reviewed-by: Shannon Nelson <shannon.nelson@amd.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c +index 29700fee42e9d..46a796576f6b6 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -374,7 +374,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; + io_sq->bounce_buf_ctrl.next_to_use = 0; + +- size = io_sq->bounce_buf_ctrl.buffer_size * ++ size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * + io_sq->bounce_buf_ctrl.buffers_num; + + dev_node = dev_to_node(ena_dev->dmadev); +-- +2.43.0 + diff --git a/queue-5.4/net-mlx5-properly-link-new-fs-rules-into-the-tree.patch b/queue-5.4/net-mlx5-properly-link-new-fs-rules-into-the-tree.patch new file mode 100644 index 0000000000..4ccfb75dfa --- /dev/null +++ b/queue-5.4/net-mlx5-properly-link-new-fs-rules-into-the-tree.patch @@ -0,0 +1,66 @@ +From 7e17942d78e06ea8de6c47fa5efd1e9717a74c21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 22:08:12 +0300 +Subject: net/mlx5: Properly link new fs rules into the tree + +From: Cosmin Ratiu <cratiu@nvidia.com> + +[ Upstream commit 7c6782ad4911cbee874e85630226ed389ff2e453 ] + +Previously, add_rule_fg would only add newly created rules from the +handle into the tree when they had a refcount of 1. On the other hand, +create_flow_handle tries hard to find and reference already existing +identical rules instead of creating new ones. + +These two behaviors can result in a situation where create_flow_handle +1) creates a new rule and references it, then +2) in a subsequent step during the same handle creation references it + again, +resulting in a rule with a refcount of 2 that is not linked into the +tree, will have a NULL parent and root and will result in a crash when +the flow group is deleted because del_sw_hw_rule, invoked on rule +deletion, assumes node->parent is != NULL. + +This happened in the wild, due to another bug related to incorrect +handling of duplicate pkt_reformat ids, which lead to the code in +create_flow_handle incorrectly referencing a just-added rule in the same +flow handle, resulting in the problem described above. Full details are +at [1]. + +This patch changes add_rule_fg to add new rules without parents into +the tree, properly initializing them and avoiding the crash. This makes +it more consistent with how rules are added to an FTE in +create_flow_handle. + +Fixes: 74491de93712 ("net/mlx5: Add multi dest support") +Link: https://lore.kernel.org/netdev/ea5264d6-6b55-4449-a602-214c6f509c1e@163.com/T/#u [1] +Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com> +Reviewed-by: Tariq Toukan <tariqt@nvidia.com> +Reviewed-by: Mark Bloch <mbloch@nvidia.com> +Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> +Signed-off-by: Tariq Toukan <tariqt@nvidia.com> +Link: https://lore.kernel.org/r/20240409190820.227554-5-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +index 41087c0618c11..93fcde150a42f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -1549,8 +1549,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, + } + trace_mlx5_fs_set_fte(fte, false); + ++ /* Link newly added rules into the tree. */ + for (i = 0; i < handle->num_rules; i++) { +- if (refcount_read(&handle->rule[i]->node.refcount) == 1) { ++ if (!handle->rule[i]->node.parent) { + tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } +-- +2.43.0 + diff --git a/queue-5.4/net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch b/queue-5.4/net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch new file mode 100644 index 0000000000..51a48b7748 --- /dev/null +++ b/queue-5.4/net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch @@ -0,0 +1,60 @@ +From d5285424ce2119f2373c5b0cd4be468dace18210 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 3 Apr 2024 22:38:01 +0200 +Subject: net: openvswitch: fix unwanted error log on timeout policy probing + +From: Ilya Maximets <i.maximets@ovn.org> + +[ Upstream commit 4539f91f2a801c0c028c252bffae56030cfb2cae ] + +On startup, ovs-vswitchd probes different datapath features including +support for timeout policies. While probing, it tries to execute +certain operations with OVS_PACKET_ATTR_PROBE or OVS_FLOW_ATTR_PROBE +attributes set. These attributes tell the openvswitch module to not +log any errors when they occur as it is expected that some of the +probes will fail. + +For some reason, setting the timeout policy ignores the PROBE attribute +and logs a failure anyway. This is causing the following kernel log +on each re-start of ovs-vswitchd: + + kernel: Failed to associated timeout policy `ovs_test_tp' + +Fix that by using the same logging macro that all other messages are +using. The message will still be printed at info level when needed +and will be rate limited, but with a net rate limiter instead of +generic printk one. + +The nf_ct_set_timeout() itself will still print some info messages, +but at least this change makes logging in openvswitch module more +consistent. + +Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") +Signed-off-by: Ilya Maximets <i.maximets@ovn.org> +Acked-by: Eelco Chaudron <echaudro@redhat.com> +Link: https://lore.kernel.org/r/20240403203803.2137962-1-i.maximets@ovn.org +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/openvswitch/conntrack.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c +index 78448b6888ddc..90c5f53007281 100644 +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -1687,8 +1687,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, + if (ct_info.timeout[0]) { + if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, + ct_info.timeout)) +- pr_info_ratelimited("Failed to associated timeout " +- "policy `%s'\n", ct_info.timeout); ++ OVS_NLERR(log, ++ "Failed to associated timeout policy '%s'", ++ ct_info.timeout); + else + ct_info.nf_ct_timeout = rcu_dereference( + nf_ct_timeout_find(ct_info.ct)->timeout); +-- +2.43.0 + diff --git a/queue-5.4/nouveau-fix-function-cast-warning.patch b/queue-5.4/nouveau-fix-function-cast-warning.patch new file mode 100644 index 0000000000..9be34d87d9 --- /dev/null +++ b/queue-5.4/nouveau-fix-function-cast-warning.patch @@ -0,0 +1,51 @@ +From c353e81dd443657f12d18b9237d0c2c805b88130 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 4 Apr 2024 18:02:25 +0200 +Subject: nouveau: fix function cast warning + +From: Arnd Bergmann <arnd@arndb.de> + +[ Upstream commit 185fdb4697cc9684a02f2fab0530ecdd0c2f15d4 ] + +Calling a function through an incompatible pointer type causes breaks +kcfi, so clang warns about the assignment: + +drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c:73:10: error: cast from 'void (*)(const void *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] + 73 | .fini = (void(*)(void *))kfree, + +Avoid this with a trivial wrapper. + +Fixes: c39f472e9f14 ("drm/nouveau: remove symlinks, move core/ to nvkm/ (no code changes)") +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Danilo Krummrich <dakr@redhat.com> +Link: https://patchwork.freedesktop.org/patch/msgid/20240404160234.2923554-1-arnd@kernel.org +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +index 4bf486b571013..cb05f7f48a98b 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) + return ERR_PTR(-EINVAL); + } + ++static void of_fini(void *p) ++{ ++ kfree(p); ++} ++ + const struct nvbios_source + nvbios_of = { + .name = "OpenFirmware", + .init = of_init, +- .fini = (void(*)(void *))kfree, ++ .fini = of_fini, + .read = of_read, + .size = of_size, + .rw = false, +-- +2.43.0 + diff --git a/queue-5.4/series b/queue-5.4/series index e7e060a244..7cf7542952 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -1,2 +1,17 @@ batman-adv-avoid-infinite-loop-trying-to-resize-local-tt.patch bluetooth-fix-memory-leak-in-hci_req_sync_complete.patch +nouveau-fix-function-cast-warning.patch +net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch +u64_stats-provide-u64_stats_t-type.patch +u64_stats-document-writer-non-preemptibility-require.patch +u64_stats-disable-preemption-on-32bit-up-smp-preempt.patch +u64_stats-streamline-the-implementation.patch +u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch +geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch +ipv6-fib-hide-unused-pn-variable.patch +ipv4-route-avoid-unused-but-set-variable-warning.patch +ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch +net-mlx5-properly-link-new-fs-rules-into-the-tree.patch +af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch +af_unix-fix-garbage-collector-racing-against-connect.patch +net-ena-fix-potential-sign-extension-issue.patch diff --git a/queue-5.4/u64_stats-disable-preemption-on-32bit-up-smp-preempt.patch b/queue-5.4/u64_stats-disable-preemption-on-32bit-up-smp-preempt.patch new file mode 100644 index 0000000000..75cdb2494b --- /dev/null +++ b/queue-5.4/u64_stats-disable-preemption-on-32bit-up-smp-preempt.patch @@ -0,0 +1,164 @@ +From e52ce42c17d9dc844bdc8c461fd2319e3c0d7e07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 10 Dec 2021 21:29:59 +0100 +Subject: u64_stats: Disable preemption on 32bit UP+SMP PREEMPT_RT during + updates. + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +[ Upstream commit 3c118547f87e930d45a5787e386734015dd93b32 ] + +On PREEMPT_RT the seqcount_t for synchronisation is required on 32bit +architectures even on UP because the softirq (and the threaded IRQ handler) can +be preempted. + +With the seqcount_t for synchronisation, a reader with higher priority can +preempt the writer and then spin endlessly in read_seqcount_begin() while the +writer can't make progress. + +To avoid such a lock up on PREEMPT_RT the writer must disable preemption during +the update. There is no need to disable interrupts because no writer is using +this API in hard-IRQ context on PREEMPT_RT. + +Disable preemption on 32bit-RT within the u64_stats write section. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: David S. Miller <davem@davemloft.net> +Stable-dep-of: 38a15d0a50e0 ("u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/u64_stats_sync.h | 42 ++++++++++++++++++++++------------ + 1 file changed, 28 insertions(+), 14 deletions(-) + +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index e81856c0ba134..6a0f2097d3709 100644 +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -66,7 +66,7 @@ + #include <linux/seqlock.h> + + struct u64_stats_sync { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + seqcount_t seq; + #endif + }; +@@ -115,7 +115,7 @@ static inline void u64_stats_inc(u64_stats_t *p) + } + #endif + +-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) + #else + static inline void u64_stats_init(struct u64_stats_sync *syncp) +@@ -125,15 +125,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) + + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + write_seqcount_begin(&syncp->seq); + #endif + } + + static inline void u64_stats_update_end(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + write_seqcount_end(&syncp->seq); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); + #endif + } + +@@ -142,8 +146,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) + { + unsigned long flags = 0; + +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +- local_irq_save(flags); ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); ++ else ++ local_irq_save(flags); + write_seqcount_begin(&syncp->seq); + #endif + return flags; +@@ -153,15 +160,18 @@ static inline void + u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + write_seqcount_end(&syncp->seq); +- local_irq_restore(flags); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); ++ else ++ local_irq_restore(flags); + #endif + } + + static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + return read_seqcount_begin(&syncp->seq); + #else + return 0; +@@ -170,7 +180,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * + + static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) + preempt_disable(); + #endif + return __u64_stats_fetch_begin(syncp); +@@ -179,7 +189,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy + static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + return read_seqcount_retry(&syncp->seq, start); + #else + return false; +@@ -189,7 +199,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) + preempt_enable(); + #endif + return __u64_stats_fetch_retry(syncp, start); +@@ -203,7 +213,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + */ + static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) ++ preempt_disable(); ++#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) + local_irq_disable(); + #endif + return __u64_stats_fetch_begin(syncp); +@@ -212,7 +224,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync + static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) ++ preempt_enable(); ++#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) + local_irq_enable(); + #endif + return __u64_stats_fetch_retry(syncp, start); +-- +2.43.0 + diff --git a/queue-5.4/u64_stats-document-writer-non-preemptibility-require.patch b/queue-5.4/u64_stats-document-writer-non-preemptibility-require.patch new file mode 100644 index 0000000000..e7f5f89cc4 --- /dev/null +++ b/queue-5.4/u64_stats-document-writer-non-preemptibility-require.patch @@ -0,0 +1,109 @@ +From 4569bd2e0123de30826d9e83d88e7c7f6802f253 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 3 Jun 2020 16:49:46 +0200 +Subject: u64_stats: Document writer non-preemptibility requirement +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ahmed S. Darwish <a.darwish@linutronix.de> + +[ Upstream commit 6501bf87602f799b7e502014f8bc0aa58b868277 ] + +The u64_stats mechanism uses sequence counters to protect against 64-bit +values tearing on 32-bit architectures. Updating such statistics is a +sequence counter write side critical section. + +Preemption must be disabled before entering this seqcount write critical +section. Failing to do so, the seqcount read side can preempt the write +side section and spin for the entire scheduler tick. If that reader +belongs to a real-time scheduling class, it can spin forever and the +kernel will livelock. + +Document this statistics update side non-preemptibility requirement. + +Reword the introductory paragraph to highlight u64_stats raison d'ĂȘtre: +64-bit values tearing protection on 32-bit architectures. Divide +documentation on a basis of internal design vs. usage constraints. + +Reword the u64_stats header file top comment to always mention "Reader" +or "Writer" at the start of each bullet point, making it easier to +follow which side each point is actually for. + +Clarify the statement "whole thing is a NOOP on 64bit arches or UP +kernels". For 32-bit UP kernels, preemption is always disabled for the +statistics read side section. + +Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> +Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: David S. Miller <davem@davemloft.net> +Stable-dep-of: 38a15d0a50e0 ("u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/u64_stats_sync.h | 43 ++++++++++++++++++---------------- + 1 file changed, 23 insertions(+), 20 deletions(-) + +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index 7c316a9fb3ae5..e81856c0ba134 100644 +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -3,33 +3,36 @@ + #define _LINUX_U64_STATS_SYNC_H + + /* +- * To properly implement 64bits network statistics on 32bit and 64bit hosts, +- * we provide a synchronization point, that is a noop on 64bit or UP kernels. ++ * Protect against 64-bit values tearing on 32-bit architectures. This is ++ * typically used for statistics read/update in different subsystems. + * + * Key points : +- * 1) Use a seqcount on SMP 32bits, with low overhead. +- * 2) Whole thing is a noop on 64bit arches or UP kernels. +- * 3) Write side must ensure mutual exclusion or one seqcount update could ++ * ++ * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP. ++ * - The whole thing is a no-op on 64-bit architectures. ++ * ++ * Usage constraints: ++ * ++ * 1) Write side must ensure mutual exclusion, or one seqcount update could + * be lost, thus blocking readers forever. +- * If this synchronization point is not a mutex, but a spinlock or +- * spinlock_bh() or disable_bh() : +- * 3.1) Write side should not sleep. +- * 3.2) Write side should not allow preemption. +- * 3.3) If applicable, interrupts should be disabled. + * +- * 4) If reader fetches several counters, there is no guarantee the whole values +- * are consistent (remember point 1) : this is a noop on 64bit arches anyway) ++ * 2) Write side must disable preemption, or a seqcount reader can preempt the ++ * writer and also spin forever. ++ * ++ * 3) Write side must use the _irqsave() variant if other writers, or a reader, ++ * can be invoked from an IRQ context. + * +- * 5) readers are allowed to sleep or be preempted/interrupted : They perform +- * pure reads. But if they have to fetch many values, it's better to not allow +- * preemptions/interruptions to avoid many retries. ++ * 4) If reader fetches several counters, there is no guarantee the whole values ++ * are consistent w.r.t. each other (remember point #2: seqcounts are not ++ * used for 64bit architectures). + * +- * 6) If counter might be written by an interrupt, readers should block interrupts. +- * (On UP, there is no seqcount_t protection, a reader allowing interrupts could +- * read partial values) ++ * 5) Readers are allowed to sleep or be preempted/interrupted: they perform ++ * pure reads. + * +- * 7) For irq and softirq uses, readers can use u64_stats_fetch_begin_irq() and +- * u64_stats_fetch_retry_irq() helpers ++ * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats ++ * might be updated from a hardirq or softirq context (remember point #1: ++ * seqcounts are not used for UP kernels). 32-bit UP stat readers could read ++ * corrupted 64-bit values otherwise. + * + * Usage : + * +-- +2.43.0 + diff --git a/queue-5.4/u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch b/queue-5.4/u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch new file mode 100644 index 0000000000..a9203f847c --- /dev/null +++ b/queue-5.4/u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch @@ -0,0 +1,56 @@ +From 57a453fd0d2c0878818807a76fda9f9fa4353f32 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 4 Apr 2024 09:57:40 +0200 +Subject: u64_stats: fix u64_stats_init() for lockdep when used repeatedly in + one file + +From: Petr Tesarik <petr@tesarici.cz> + +[ Upstream commit 38a15d0a50e0a43778561a5861403851f0b0194c ] + +Fix bogus lockdep warnings if multiple u64_stats_sync variables are +initialized in the same file. + +With CONFIG_LOCKDEP, seqcount_init() is a macro which declares: + + static struct lock_class_key __key; + +Since u64_stats_init() is a function (albeit an inline one), all calls +within the same file end up using the same instance, effectively treating +them all as a single lock-class. + +Fixes: 9464ca650008 ("net: make u64_stats_init() a function") +Closes: https://lore.kernel.org/netdev/ea1567d9-ce66-45e6-8168-ac40a47d1821@roeck-us.net/ +Signed-off-by: Petr Tesarik <petr@tesarici.cz> +Reviewed-by: Simon Horman <horms@kernel.org> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://lore.kernel.org/r/20240404075740.30682-1-petr@tesarici.cz +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/u64_stats_sync.h | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index 51f2e16b9540b..11c3162dade3b 100644 +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -125,10 +125,11 @@ static inline void u64_stats_inc(u64_stats_t *p) + p->v++; + } + +-static inline void u64_stats_init(struct u64_stats_sync *syncp) +-{ +- seqcount_init(&syncp->seq); +-} ++#define u64_stats_init(syncp) \ ++ do { \ ++ struct u64_stats_sync *__s = (syncp); \ ++ seqcount_init(&__s->seq); \ ++ } while (0) + + static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) + { +-- +2.43.0 + diff --git a/queue-5.4/u64_stats-provide-u64_stats_t-type.patch b/queue-5.4/u64_stats-provide-u64_stats_t-type.patch new file mode 100644 index 0000000000..eecc7bba20 --- /dev/null +++ b/queue-5.4/u64_stats-provide-u64_stats_t-type.patch @@ -0,0 +1,110 @@ +From ad77147ffaa2c9efa50a5c47d029e694a1d08c59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 7 Nov 2019 16:27:20 -0800 +Subject: u64_stats: provide u64_stats_t type + +From: Eric Dumazet <edumazet@google.com> + +[ Upstream commit 316580b69d0a7aeeee5063af47438b626bc47cbd ] + +On 64bit arches, struct u64_stats_sync is empty and provides +no help against load/store tearing. + +Using READ_ONCE()/WRITE_ONCE() would be needed. + +But the update side would be slightly more expensive. + +local64_t was defined so that we could use regular adds +in a manner which is atomic wrt IRQs. + +However the u64_stats infra means we do not have to use +local64_t on 32bit arches since the syncp provides the needed +protection. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Stable-dep-of: 38a15d0a50e0 ("u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/u64_stats_sync.h | 51 +++++++++++++++++++++++++++++++--- + 1 file changed, 47 insertions(+), 4 deletions(-) + +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index 11096b561dab6..7c316a9fb3ae5 100644 +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -40,8 +40,8 @@ + * spin_lock_bh(...) or other synchronization to get exclusive access + * ... + * u64_stats_update_begin(&stats->syncp); +- * stats->bytes64 += len; // non atomic operation +- * stats->packets64++; // non atomic operation ++ * u64_stats_add(&stats->bytes64, len); // non atomic operation ++ * u64_stats_inc(&stats->packets64); // non atomic operation + * u64_stats_update_end(&stats->syncp); + * + * While a consumer (reader) should use following template to get consistent +@@ -52,8 +52,8 @@ + * + * do { + * start = u64_stats_fetch_begin(&stats->syncp); +- * tbytes = stats->bytes64; // non atomic operation +- * tpackets = stats->packets64; // non atomic operation ++ * tbytes = u64_stats_read(&stats->bytes64); // non atomic operation ++ * tpackets = u64_stats_read(&stats->packets64); // non atomic operation + * } while (u64_stats_fetch_retry(&stats->syncp, start)); + * + * +@@ -68,6 +68,49 @@ struct u64_stats_sync { + #endif + }; + ++#if BITS_PER_LONG == 64 ++#include <asm/local64.h> ++ ++typedef struct { ++ local64_t v; ++} u64_stats_t ; ++ ++static inline u64 u64_stats_read(const u64_stats_t *p) ++{ ++ return local64_read(&p->v); ++} ++ ++static inline void u64_stats_add(u64_stats_t *p, unsigned long val) ++{ ++ local64_add(val, &p->v); ++} ++ ++static inline void u64_stats_inc(u64_stats_t *p) ++{ ++ local64_inc(&p->v); ++} ++ ++#else ++ ++typedef struct { ++ u64 v; ++} u64_stats_t; ++ ++static inline u64 u64_stats_read(const u64_stats_t *p) ++{ ++ return p->v; ++} ++ ++static inline void u64_stats_add(u64_stats_t *p, unsigned long val) ++{ ++ p->v += val; ++} ++ ++static inline void u64_stats_inc(u64_stats_t *p) ++{ ++ p->v++; ++} ++#endif + + #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +-- +2.43.0 + diff --git a/queue-5.4/u64_stats-streamline-the-implementation.patch b/queue-5.4/u64_stats-streamline-the-implementation.patch new file mode 100644 index 0000000000..f25d91643c --- /dev/null +++ b/queue-5.4/u64_stats-streamline-the-implementation.patch @@ -0,0 +1,274 @@ +From ad3e1e2f65363db776c16c0367741b8a772f3e48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 25 Aug 2022 18:41:31 +0200 +Subject: u64_stats: Streamline the implementation + +From: Thomas Gleixner <tglx@linutronix.de> + +[ Upstream commit 44b0c2957adc62b86fcd51adeaf8e993171bc319 ] + +The u64 stats code handles 3 different cases: + + - 32bit UP + - 32bit SMP + - 64bit + +with an unreadable #ifdef maze, which was recently expanded with PREEMPT_RT +conditionals. + +Reduce it to two cases (32bit and 64bit) and drop the optimization for +32bit UP as suggested by Linus. + +Use the new preempt_disable/enable_nested() helpers to get rid of the +CONFIG_PREEMPT_RT conditionals. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Link: https://lore.kernel.org/r/20220825164131.402717-9-bigeasy@linutronix.de +Stable-dep-of: 38a15d0a50e0 ("u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/u64_stats_sync.h | 145 +++++++++++++++------------------ + 1 file changed, 64 insertions(+), 81 deletions(-) + +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index 6a0f2097d3709..51f2e16b9540b 100644 +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -8,7 +8,7 @@ + * + * Key points : + * +- * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP. ++ * - Use a seqcount on 32-bit + * - The whole thing is a no-op on 64-bit architectures. + * + * Usage constraints: +@@ -20,7 +20,8 @@ + * writer and also spin forever. + * + * 3) Write side must use the _irqsave() variant if other writers, or a reader, +- * can be invoked from an IRQ context. ++ * can be invoked from an IRQ context. On 64bit systems this variant does not ++ * disable interrupts. + * + * 4) If reader fetches several counters, there is no guarantee the whole values + * are consistent w.r.t. each other (remember point #2: seqcounts are not +@@ -29,11 +30,6 @@ + * 5) Readers are allowed to sleep or be preempted/interrupted: they perform + * pure reads. + * +- * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats +- * might be updated from a hardirq or softirq context (remember point #1: +- * seqcounts are not used for UP kernels). 32-bit UP stat readers could read +- * corrupted 64-bit values otherwise. +- * + * Usage : + * + * Stats producer (writer) should use following template granted it already got +@@ -66,7 +62,7 @@ + #include <linux/seqlock.h> + + struct u64_stats_sync { +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) ++#if BITS_PER_LONG == 32 + seqcount_t seq; + #endif + }; +@@ -93,7 +89,22 @@ static inline void u64_stats_inc(u64_stats_t *p) + local64_inc(&p->v); + } + +-#else ++static inline void u64_stats_init(struct u64_stats_sync *syncp) { } ++static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { } ++static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { } ++static inline unsigned long __u64_stats_irqsave(void) { return 0; } ++static inline void __u64_stats_irqrestore(unsigned long flags) { } ++static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) ++{ ++ return 0; ++} ++static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, ++ unsigned int start) ++{ ++ return false; ++} ++ ++#else /* 64 bit */ + + typedef struct { + u64 v; +@@ -113,123 +124,95 @@ static inline void u64_stats_inc(u64_stats_t *p) + { + p->v++; + } +-#endif + +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +-#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +-#else + static inline void u64_stats_init(struct u64_stats_sync *syncp) + { ++ seqcount_init(&syncp->seq); + } +-#endif + +-static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) ++static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- preempt_disable(); ++ preempt_disable_nested(); + write_seqcount_begin(&syncp->seq); +-#endif + } + +-static inline void u64_stats_update_end(struct u64_stats_sync *syncp) ++static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + write_seqcount_end(&syncp->seq); +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- preempt_enable(); +-#endif ++ preempt_enable_nested(); + } + +-static inline unsigned long +-u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) ++static inline unsigned long __u64_stats_irqsave(void) + { +- unsigned long flags = 0; ++ unsigned long flags; + +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- preempt_disable(); +- else +- local_irq_save(flags); +- write_seqcount_begin(&syncp->seq); +-#endif ++ local_irq_save(flags); + return flags; + } + +-static inline void +-u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, +- unsigned long flags) ++static inline void __u64_stats_irqrestore(unsigned long flags) + { +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +- write_seqcount_end(&syncp->seq); +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- preempt_enable(); +- else +- local_irq_restore(flags); +-#endif ++ local_irq_restore(flags); + } + + static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + return read_seqcount_begin(&syncp->seq); +-#else +- return 0; +-#endif + } + +-static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) ++static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, ++ unsigned int start) + { +-#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) +- preempt_disable(); +-#endif +- return __u64_stats_fetch_begin(syncp); ++ return read_seqcount_retry(&syncp->seq, start); + } ++#endif /* !64 bit */ + +-static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, +- unsigned int start) ++static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +- return read_seqcount_retry(&syncp->seq, start); +-#else +- return false; +-#endif ++ __u64_stats_update_begin(syncp); ++} ++ ++static inline void u64_stats_update_end(struct u64_stats_sync *syncp) ++{ ++ __u64_stats_update_end(syncp); ++} ++ ++static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) ++{ ++ unsigned long flags = __u64_stats_irqsave(); ++ ++ __u64_stats_update_begin(syncp); ++ return flags; ++} ++ ++static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, ++ unsigned long flags) ++{ ++ __u64_stats_update_end(syncp); ++ __u64_stats_irqrestore(flags); ++} ++ ++static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) ++{ ++ return __u64_stats_fetch_begin(syncp); + } + + static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) +- preempt_enable(); +-#endif + return __u64_stats_fetch_retry(syncp, start); + } + +-/* +- * In case irq handlers can update u64 counters, readers can use following helpers +- * - SMP 32bit arches use seqcount protection, irq safe. +- * - UP 32bit must disable irqs. +- * - 64bit have no problem atomically reading u64 values, irq safe. +- */ ++/* Obsolete interfaces */ + static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) +- preempt_disable(); +-#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) +- local_irq_disable(); +-#endif +- return __u64_stats_fetch_begin(syncp); ++ return u64_stats_fetch_begin(syncp); + } + + static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) +- preempt_enable(); +-#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) +- local_irq_enable(); +-#endif +- return __u64_stats_fetch_retry(syncp, start); ++ return u64_stats_fetch_retry(syncp, start); + } + + #endif /* _LINUX_U64_STATS_SYNC_H */ +-- +2.43.0 + |