diff options
author | Sasha Levin <sashal@kernel.org> | 2024-04-19 07:43:48 -0400 |
---|---|---|
committer | Sasha Levin <sashal@kernel.org> | 2024-04-19 07:43:48 -0400 |
commit | 518c4dcb5169d5be5c1567c26a6ced679ec52bc9 (patch) | |
tree | 2522bed58dabe74c532b2b4f9cba8ec5c640d56d | |
parent | d2083a3152716c3a280a8dad1b93fb9092294639 (diff) | |
download | stable-queue-518c4dcb5169d5be5c1567c26a6ced679ec52bc9.tar.gz |
Fixes for 6.8
Signed-off-by: Sasha Levin <sashal@kernel.org>
42 files changed, 4245 insertions, 0 deletions
diff --git a/queue-6.8/af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch b/queue-6.8/af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch new file mode 100644 index 0000000000..3d79c48634 --- /dev/null +++ b/queue-6.8/af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch @@ -0,0 +1,73 @@ +From 9110dde20a06afa3c316efb6df1f86844d595b6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 10:10:15 -0700 +Subject: af_unix: Call manage_oob() for every skb in + unix_stream_read_generic(). + +From: Kuniyuki Iwashima <kuniyu@amazon.com> + +[ Upstream commit 283454c8a123072e5c386a5a2b5fc576aa455b6f ] + +When we call recv() for AF_UNIX socket, we first peek one skb and +calls manage_oob() to check if the skb is sent with MSG_OOB. + +However, when we fetch the next (and the following) skb, manage_oob() +is not called now, leading a wrong behaviour. + +Let's say a socket send()s "hello" with MSG_OOB and the peer tries +to recv() 5 bytes with MSG_PEEK. Here, we should get only "hell" +without 'o', but actually not: + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'hello', MSG_OOB) + 5 + >>> c2.recv(5, MSG_PEEK) + b'hello' + +The first skb fills 4 bytes, and the next skb is peeked but not +properly checked by manage_oob(). + +Let's move up the again label to call manage_oob() for evry skb. + +With this patch: + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'hello', MSG_OOB) + 5 + >>> c2.recv(5, MSG_PEEK) + b'hell' + +Fixes: 314001f0bf92 ("af_unix: Add OOB support") +Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> +Link: https://lore.kernel.org/r/20240410171016.7621-2-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/unix/af_unix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index e37cf913818a1..fd931f3005cd8 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2680,6 +2680,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + last = skb = skb_peek(&sk->sk_receive_queue); + last_len = last ? last->len : 0; + ++again: + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); +@@ -2691,7 +2692,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + } + } + #endif +-again: + if (skb == NULL) { + if (copied >= target) + goto unlock; +-- +2.43.0 + diff --git a/queue-6.8/af_unix-don-t-peek-oob-data-without-msg_oob.patch b/queue-6.8/af_unix-don-t-peek-oob-data-without-msg_oob.patch new file mode 100644 index 0000000000..213cd7cbef --- /dev/null +++ b/queue-6.8/af_unix-don-t-peek-oob-data-without-msg_oob.patch @@ -0,0 +1,86 @@ +From 3a21de41a3147560ffba6c22aacee1aaa160b055 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 10:10:16 -0700 +Subject: af_unix: Don't peek OOB data without MSG_OOB. + +From: Kuniyuki Iwashima <kuniyu@amazon.com> + +[ Upstream commit 22dd70eb2c3d754862964377a75abafd3167346b ] + +Currently, we can read OOB data without MSG_OOB by using MSG_PEEK +when OOB data is sitting on the front row, which is apparently +wrong. + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'a', MSG_OOB) + 1 + >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) + b'a' + +If manage_oob() is called when no data has been copied, we only +check if the socket enables SO_OOBINLINE or MSG_PEEK is not used. +Otherwise, the skb is returned as is. + +However, here we should return NULL if MSG_PEEK is set and no data +has been copied. + +Also, in such a case, we should not jump to the redo label because +we will be caught in the loop and hog the CPU until normal data +comes in. + +Then, we need to handle skb == NULL case with the if-clause below +the manage_oob() block. + +With this patch: + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'a', MSG_OOB) + 1 + >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + BlockingIOError: [Errno 11] Resource temporarily unavailable + +Fixes: 314001f0bf92 ("af_unix: Add OOB support") +Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> +Link: https://lore.kernel.org/r/20240410171016.7621-3-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/unix/af_unix.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index fd931f3005cd8..9df15a7bc2569 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2602,7 +2602,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + WRITE_ONCE(u->oob_skb, NULL); + consume_skb(skb); + } +- } else if (!(flags & MSG_PEEK)) { ++ } else if (flags & MSG_PEEK) { ++ skb = NULL; ++ } else { + skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) +@@ -2684,11 +2686,9 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); +- if (!skb) { ++ if (!skb && copied) { + unix_state_unlock(sk); +- if (copied) +- break; +- goto redo; ++ break; + } + } + #endif +-- +2.43.0 + diff --git a/queue-6.8/gpiolib-swnode-remove-wrong-header-inclusion.patch b/queue-6.8/gpiolib-swnode-remove-wrong-header-inclusion.patch new file mode 100644 index 0000000000..2e2b8ec818 --- /dev/null +++ b/queue-6.8/gpiolib-swnode-remove-wrong-header-inclusion.patch @@ -0,0 +1,37 @@ +From 274050bc458734e370408f4f74eb425ba3ed9ec6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 17 Apr 2024 17:19:13 +0300 +Subject: gpiolib: swnode: Remove wrong header inclusion + +From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + +[ Upstream commit 69ffed4b62523bbc85511f150500329d28aba356 ] + +The flags in the software node properties are supposed to be +the GPIO lookup flags, which are provided by gpio/machine.h, +as the software nodes are the kernel internal thing and doesn't +need to rely to any of ABIs. + +Fixes: e7f9ff5dc90c ("gpiolib: add support for software nodes") +Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> +Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/gpio/property.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h +index 6c75c8bd44a0b..1a14e239221f7 100644 +--- a/include/linux/gpio/property.h ++++ b/include/linux/gpio/property.h +@@ -2,7 +2,6 @@ + #ifndef __LINUX_GPIO_PROPERTY_H + #define __LINUX_GPIO_PROPERTY_H + +-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */ + #include <linux/property.h> + + #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ +-- +2.43.0 + diff --git a/queue-6.8/ice-fix-checking-for-unsupported-keys-on-non-tunnel-.patch b/queue-6.8/ice-fix-checking-for-unsupported-keys-on-non-tunnel-.patch new file mode 100644 index 0000000000..360d1a0ca7 --- /dev/null +++ b/queue-6.8/ice-fix-checking-for-unsupported-keys-on-non-tunnel-.patch @@ -0,0 +1,51 @@ +From c12a75fbfb7d0fd445a9f184756ad0d0eeebbaf4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 17:45:44 +0200 +Subject: ice: Fix checking for unsupported keys on non-tunnel device + +From: Marcin Szycik <marcin.szycik@linux.intel.com> + +[ Upstream commit 2cca35f5dd78b9f8297c879c5db5ab137c5d86c3 ] + +Add missing FLOW_DISSECTOR_KEY_ENC_* checks to TC flower filter parsing. +Without these checks, it would be possible to add filters with tunnel +options on non-tunnel devices. enc_* options are only valid for tunnel +devices. + +Example: + devlink dev eswitch set $PF1_PCI mode switchdev + echo 1 > /sys/class/net/$PF1/device/sriov_numvfs + tc qdisc add dev $VF1_PR ingress + ethtool -K $PF1 hw-tc-offload on + tc filter add dev $VF1_PR ingress flower enc_ttl 12 skip_sw action drop + +Fixes: 9e300987d4a8 ("ice: VXLAN and Geneve TC support") +Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com> +Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com> +Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> +Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com> +Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +index bcbcfc67e5606..688ccb0615ab9 100644 +--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +@@ -1489,7 +1489,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, + (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | +- BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) { ++ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | ++ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | ++ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) | ++ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel"); + return -EOPNOTSUPP; + } else { +-- +2.43.0 + diff --git a/queue-6.8/ice-tc-allow-zero-flags-in-parsing-tc-flower.patch b/queue-6.8/ice-tc-allow-zero-flags-in-parsing-tc-flower.patch new file mode 100644 index 0000000000..62a1b08d04 --- /dev/null +++ b/queue-6.8/ice-tc-allow-zero-flags-in-parsing-tc-flower.patch @@ -0,0 +1,49 @@ +From 871bb4ba0e1badda42a67799b8a9c1b2c0d2c02c Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 15 Mar 2024 12:08:21 +0100 +Subject: ice: tc: allow zero flags in parsing tc flower + +From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com> + +[ Upstream commit 73278715725a8347032acf233082ca4eb31e6a56 ] + +The check for flags is done to not pass empty lookups to adding switch +rule functions. Since metadata is always added to lookups there is no +need to check against the flag. + +It is also fixing the problem with such rule: +$ tc filter add dev gtp_dev ingress protocol ip prio 0 flower \ + enc_dst_port 2123 action drop +Switch block in case of GTP can't parse the destination port, because it +should always be set to GTP specific value. The same with ethertype. The +result is that there is no other matching criteria than GTP tunnel. In +this case flags is 0, rule can't be added only because of defensive +check against flags. + +Fixes: 9a225f81f540 ("ice: Support GTP-U and GTP-C offload in switchdev") +Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com> +Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com> +Reviewed-by: Simon Horman <horms@kernel.org> +Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com> +Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +index 49ed5fd7db107..bcbcfc67e5606 100644 +--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +@@ -779,7 +779,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) + int ret; + int i; + +- if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) { ++ if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); + return -EOPNOTSUPP; + } +-- +2.43.0 + diff --git a/queue-6.8/ice-tc-check-src_vsi-in-case-of-traffic-from-vf.patch b/queue-6.8/ice-tc-check-src_vsi-in-case-of-traffic-from-vf.patch new file mode 100644 index 0000000000..f3c8c79a8a --- /dev/null +++ b/queue-6.8/ice-tc-check-src_vsi-in-case-of-traffic-from-vf.patch @@ -0,0 +1,71 @@ +From acd41c5805cb79f343891e82f2347a8d92b3cf57 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 15 Mar 2024 12:08:20 +0100 +Subject: ice: tc: check src_vsi in case of traffic from VF + +From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com> + +[ Upstream commit 428051600cb4e5a61d81aba3f8009b6c4f5e7582 ] + +In case of traffic going from the VF (so ingress for port representor) +source VSI should be consider during packet classification. It is +needed for hardware to not match packets from different ports with +filters added on other port. + +It is only for "from VF" traffic, because other traffic direction +doesn't have source VSI. + +Set correct ::src_vsi in rule_info to pass it to the hardware filter. + +For example this rule should drop only ipv4 packets from eth10, not from +the others VF PRs. It is needed to check source VSI in this case. +$tc filter add dev eth10 ingress protocol ip flower skip_sw action drop + +Fixes: 0d08a441fb1a ("ice: ndo_setup_tc implementation for PF") +Reviewed-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com> +Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com> +Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com> +Reviewed-by: Simon Horman <horms@kernel.org> +Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com> +Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +index b890410a2bc0b..49ed5fd7db107 100644 +--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +@@ -28,6 +28,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, + * - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified) + * - Tunnel flag (present if tunnel) + */ ++ if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) ++ lkups_cnt++; + + if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) + lkups_cnt++; +@@ -363,6 +365,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, + /* Always add direction metadata */ + ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]); + ++ if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { ++ ice_rule_add_src_vsi_metadata(&list[i]); ++ i++; ++ } ++ + rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type); + if (tc_fltr->tunnel_type != TNL_LAST) { + i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i); +@@ -820,6 +827,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) + + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = fltr->cookie; ++ rule_info.src_vsi = vsi->idx; + + ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); + if (ret == -EEXIST) { +-- +2.43.0 + diff --git a/queue-6.8/net-change-maximum-number-of-udp-segments-to-128.patch b/queue-6.8/net-change-maximum-number-of-udp-segments-to-128.patch new file mode 100644 index 0000000000..538fae67f6 --- /dev/null +++ b/queue-6.8/net-change-maximum-number-of-udp-segments-to-128.patch @@ -0,0 +1,71 @@ +From c540ddaf074a16d6e4e55416a3c5212dcdae5c93 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 08:11:24 +0300 +Subject: net: change maximum number of UDP segments to 128 + +From: Yuri Benditovich <yuri.benditovich@daynix.com> + +[ Upstream commit 1382e3b6a3500c245e5278c66d210c02926f804f ] + +The commit fc8b2a619469 +("net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation") +adds check of potential number of UDP segments vs +UDP_MAX_SEGMENTS in linux/virtio_net.h. +After this change certification test of USO guest-to-guest +transmit on Windows driver for virtio-net device fails, +for example with packet size of ~64K and mss of 536 bytes. +In general the USO should not be more restrictive than TSO. +Indeed, in case of unreasonably small mss a lot of segments +can cause queue overflow and packet loss on the destination. +Limit of 128 segments is good for any practical purpose, +with minimal meaningful mss of 536 the maximal UDP packet will +be divided to ~120 segments. +The number of segments for UDP packets is validated vs +UDP_MAX_SEGMENTS also in udp.c (v4,v6), this does not affect +quest-to-guest path but does affect packets sent to host, for +example. +It is important to mention that UDP_MAX_SEGMENTS is kernel-only +define and not available to user mode socket applications. +In order to request MSS smaller than MTU the applications +just uses setsockopt with SOL_UDP and UDP_SEGMENT and there is +no limitations on socket API level. + +Fixes: fc8b2a619469 ("net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation") +Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/linux/udp.h | 2 +- + tools/testing/selftests/net/udpgso.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/udp.h b/include/linux/udp.h +index 94e63b2695406..00790bb5cbde6 100644 +--- a/include/linux/udp.h ++++ b/include/linux/udp.h +@@ -105,7 +105,7 @@ struct udp_sock { + #define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + +-#define UDP_MAX_SEGMENTS (1 << 6UL) ++#define UDP_MAX_SEGMENTS (1 << 7UL) + + #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) + +diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c +index 7badaf215de28..b02080d09fbc0 100644 +--- a/tools/testing/selftests/net/udpgso.c ++++ b/tools/testing/selftests/net/udpgso.c +@@ -34,7 +34,7 @@ + #endif + + #ifndef UDP_MAX_SEGMENTS +-#define UDP_MAX_SEGMENTS (1 << 6UL) ++#define UDP_MAX_SEGMENTS (1 << 7UL) + #endif + + #define CONST_MTU_TEST 1500 +-- +2.43.0 + diff --git a/queue-6.8/net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch b/queue-6.8/net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch new file mode 100644 index 0000000000..8c927aa556 --- /dev/null +++ b/queue-6.8/net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch @@ -0,0 +1,81 @@ +From 06aba6c7152363d11f0a7c5d410879e4e432ebc8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 16:01:39 +0300 +Subject: net: dsa: mt7530: fix mirroring frames received on local port +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Arınç ÜNAL <arinc.unal@arinc9.com> + +[ Upstream commit d59cf049c8378677053703e724808836f180888e ] + +This switch intellectual property provides a bit on the ARL global control +register which controls allowing mirroring frames which are received on the +local port (monitor port). This bit is unset after reset. + +This ability must be enabled to fully support the port mirroring feature on +this switch intellectual property. + +Therefore, this patch fixes the traffic not being reflected on a port, +which would be configured like below: + + tc qdisc add dev swp0 clsact + + tc filter add dev swp0 ingress matchall skip_sw \ + action mirred egress mirror dev swp0 + +As a side note, this configuration provides the hairpinning feature for a +single port. + +Fixes: 37feab6076aa ("net: dsa: mt7530: add support for port mirroring") +Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/dsa/mt7530.c | 6 ++++++ + drivers/net/dsa/mt7530.h | 4 ++++ + 2 files changed, 10 insertions(+) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index 22b97505fa536..14f1b8d08153f 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -2517,6 +2517,9 @@ mt7530_setup(struct dsa_switch *ds) + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + } + ++ /* Allow mirroring frames received on the local port (monitor port). */ ++ mt7530_set(priv, MT753X_AGC, LOCAL_EN); ++ + /* Setup VLAN ID 0 for VLAN-unaware bridges */ + ret = mt7530_setup_vlan0(priv); + if (ret) +@@ -2625,6 +2628,9 @@ mt7531_setup_common(struct dsa_switch *ds) + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + } + ++ /* Allow mirroring frames received on the local port (monitor port). */ ++ mt7530_set(priv, MT753X_AGC, LOCAL_EN); ++ + /* Flush the FDB table */ + ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); + if (ret < 0) +diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h +index ddefeb69afda1..9388c1205bea2 100644 +--- a/drivers/net/dsa/mt7530.h ++++ b/drivers/net/dsa/mt7530.h +@@ -32,6 +32,10 @@ enum mt753x_id { + #define SYSC_REG_RSTCTRL 0x34 + #define RESET_MCM BIT(2) + ++/* Register for ARL global control */ ++#define MT753X_AGC 0xc ++#define LOCAL_EN BIT(7) ++ + /* Registers to mac forward control for unknown frames */ + #define MT7530_MFC 0x10 + #define BC_FFP(x) (((x) & 0xff) << 24) +-- +2.43.0 + diff --git a/queue-6.8/net-dsa-mt7530-fix-port-mirroring-for-mt7988-soc-swi.patch b/queue-6.8/net-dsa-mt7530-fix-port-mirroring-for-mt7988-soc-swi.patch new file mode 100644 index 0000000000..da07852281 --- /dev/null +++ b/queue-6.8/net-dsa-mt7530-fix-port-mirroring-for-mt7988-soc-swi.patch @@ -0,0 +1,58 @@ +From bca2507582817ce5aea896dc19b6a9d6600ac05b Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 16:01:40 +0300 +Subject: net: dsa: mt7530: fix port mirroring for MT7988 SoC switch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Arınç ÜNAL <arinc.unal@arinc9.com> + +[ Upstream commit 2c606d138518cc69f09c35929abc414a99e3a28f ] + +The "MT7988A Wi-Fi 7 Generation Router Platform: Datasheet (Open Version) +v0.1" document shows bits 16 to 18 as the MIRROR_PORT field of the CPU +forward control register. Currently, the MT7530 DSA subdriver configures +bits 0 to 2 of the CPU forward control register which breaks the port +mirroring feature for the MT7988 SoC switch. + +Fix this by using the MT7531_MIRROR_PORT_GET() and MT7531_MIRROR_PORT_SET() +macros which utilise the correct bits. + +Fixes: 110c18bfed41 ("net: dsa: mt7530: introduce driver for MT7988 built-in switch") +Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com> +Acked-by: Daniel Golle <daniel@makrotopia.org> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/dsa/mt7530.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index 14f1b8d08153f..f37dc22356f15 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -1947,14 +1947,16 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, + + static int mt753x_mirror_port_get(unsigned int id, u32 val) + { +- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) : +- MIRROR_PORT(val); ++ return (id == ID_MT7531 || id == ID_MT7988) ? ++ MT7531_MIRROR_PORT_GET(val) : ++ MIRROR_PORT(val); + } + + static int mt753x_mirror_port_set(unsigned int id, u32 val) + { +- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) : +- MIRROR_PORT(val); ++ return (id == ID_MT7531 || id == ID_MT7988) ? ++ MT7531_MIRROR_PORT_SET(val) : ++ MIRROR_PORT(val); + } + + static int mt753x_port_mirror_add(struct dsa_switch *ds, int port, +-- +2.43.0 + diff --git a/queue-6.8/net-ethernet-mtk_eth_soc-fix-wed-wifi-reset.patch b/queue-6.8/net-ethernet-mtk_eth_soc-fix-wed-wifi-reset.patch new file mode 100644 index 0000000000..fa376a0df3 --- /dev/null +++ b/queue-6.8/net-ethernet-mtk_eth_soc-fix-wed-wifi-reset.patch @@ -0,0 +1,64 @@ +From 4622690aa546304aa3ef21e5509175e02341dc38 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 16 Apr 2024 10:23:29 +0200 +Subject: net: ethernet: mtk_eth_soc: fix WED + wifi reset + +From: Felix Fietkau <nbd@nbd.name> + +[ Upstream commit 94667949ec3bbb2218c46ad0a0e7274c8832e494 ] + +The WLAN + WED reset sequence relies on being able to receive interrupts from +the card, in order to synchronize individual steps with the firmware. +When WED is stopped, leave interrupts running and rely on the driver turning +off unwanted ones. +WED DMA also needs to be disabled before resetting. + +Fixes: f78cd9c783e0 ("net: ethernet: mtk_wed: update mtk_wed_stop") +Signed-off-by: Felix Fietkau <nbd@nbd.name> +Link: https://lore.kernel.org/r/20240416082330.82564-1-nbd@nbd.name +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/mediatek/mtk_wed.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c +index c895e265ae0eb..61334a71058c7 100644 +--- a/drivers/net/ethernet/mediatek/mtk_wed.c ++++ b/drivers/net/ethernet/mediatek/mtk_wed.c +@@ -1074,13 +1074,13 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev) + static void + mtk_wed_stop(struct mtk_wed_device *dev) + { ++ mtk_wed_dma_disable(dev); + mtk_wed_set_ext_int(dev, false); + + wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0); + wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0); + wdma_w32(dev, MTK_WDMA_INT_MASK, 0); + wdma_w32(dev, MTK_WDMA_INT_GRP2, 0); +- wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0); + + if (!mtk_wed_get_rx_capa(dev)) + return; +@@ -1093,7 +1093,6 @@ static void + mtk_wed_deinit(struct mtk_wed_device *dev) + { + mtk_wed_stop(dev); +- mtk_wed_dma_disable(dev); + + wed_clr(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_EN | +@@ -2605,9 +2604,6 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask) + static void + mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask) + { +- if (!dev->running) +- return; +- + mtk_wed_set_ext_int(dev, !!mask); + wed_w32(dev, MTK_WED_INT_MASK, mask); + } +-- +2.43.0 + diff --git a/queue-6.8/net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch b/queue-6.8/net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch new file mode 100644 index 0000000000..fdfa346b23 --- /dev/null +++ b/queue-6.8/net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch @@ -0,0 +1,66 @@ +From fb1f720797d86801d226024bdab9a3ab53f49a81 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 17 Apr 2024 15:24:25 +0530 +Subject: net: ethernet: ti: am65-cpsw-nuss: cleanup DMA Channels before using + them + +From: Siddharth Vadapalli <s-vadapalli@ti.com> + +[ Upstream commit c24cd679b075b0e953ea167b0aa2b2d59e4eba7f ] + +The TX and RX DMA Channels used by the driver to exchange data with CPSW +are not guaranteed to be in a clean state during driver initialization. +The Bootloader could have used the same DMA Channels without cleaning them +up in the event of failure. Thus, reset and disable the DMA Channels to +ensure that they are in a clean state before using them. + +Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") +Reported-by: Schuyler Patton <spatton@ti.com> +Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com> +Reviewed-by: Roger Quadros <rogerq@kernel.org> +Link: https://lore.kernel.org/r/20240417095425.2253876-1-s-vadapalli@ti.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/ti/am65-cpsw-nuss.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c +index 2939a21ca74f3..1d00e21808c1c 100644 +--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c ++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c +@@ -2793,6 +2793,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) + + static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) + { ++ struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns; ++ struct am65_cpsw_tx_chn *tx_chan = common->tx_chns; + struct device *dev = common->dev; + struct am65_cpsw_port *port; + int ret = 0, i; +@@ -2805,6 +2807,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) + if (ret) + return ret; + ++ /* The DMA Channels are not guaranteed to be in a clean state. ++ * Reset and disable them to ensure that they are back to the ++ * clean state and ready to be used. ++ */ ++ for (i = 0; i < common->tx_ch_num; i++) { ++ k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i], ++ am65_cpsw_nuss_tx_cleanup); ++ k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn); ++ } ++ ++ for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) ++ k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, ++ am65_cpsw_nuss_rx_cleanup, !!i); ++ ++ k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); ++ + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + return ret; +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5-lag-restore-buckets-number-to-default-after.patch b/queue-6.8/net-mlx5-lag-restore-buckets-number-to-default-after.patch new file mode 100644 index 0000000000..0c11faca34 --- /dev/null +++ b/queue-6.8/net-mlx5-lag-restore-buckets-number-to-default-after.patch @@ -0,0 +1,49 @@ +From 190cccac507d4919a1281f7e1dce1bb884f87c7b Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 14:54:39 +0300 +Subject: net/mlx5: Lag, restore buckets number to default after hash LAG + deactivation + +From: Shay Drory <shayd@nvidia.com> + +[ Upstream commit 37cc10da3a50e6d0cb9808a90b7da9b4868794dd ] + +The cited patch introduces the concept of buckets in LAG in hash mode. +However, the patch doesn't clear the number of buckets in the LAG +deactivation. This results in using the wrong number of buckets in +case user create a hash mode LAG and afterwards create a non-hash +mode LAG. + +Hence, restore buckets number to default after hash mode LAG +deactivation. + +Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode") +Signed-off-by: Shay Drory <shayd@nvidia.com> +Reviewed-by: Maor Gottlieb <maorg@nvidia.com> +Signed-off-by: Tariq Toukan <tariqt@nvidia.com> +Link: https://lore.kernel.org/r/20240411115444.374475-2-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +index d14459e5c04fc..69d482f7c5a29 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +@@ -703,8 +703,10 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) + return err; + } + +- if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) ++ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + mlx5_lag_port_sel_destroy(ldev); ++ ldev->buckets = 1; ++ } + if (mlx5_lag_has_drop_rule(ldev)) + mlx5_lag_drop_rule_cleanup(ldev); + +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5-restore-mistakenly-dropped-parts-in-registe.patch b/queue-6.8/net-mlx5-restore-mistakenly-dropped-parts-in-registe.patch new file mode 100644 index 0000000000..4a3ad5ce06 --- /dev/null +++ b/queue-6.8/net-mlx5-restore-mistakenly-dropped-parts-in-registe.patch @@ -0,0 +1,59 @@ +From 1f9ef64ddb81faa78e86b2779fac1dabb607074e Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 14:54:41 +0300 +Subject: net/mlx5: Restore mistakenly dropped parts in register devlink flow + +From: Shay Drory <shayd@nvidia.com> + +[ Upstream commit bf729988303a27833a86acb561f42b9a3cc12728 ] + +Code parts from cited commit were mistakenly dropped while rebasing +before submission. Add them here. + +Fixes: c6e77aa9dd82 ("net/mlx5: Register devlink first under devlink lock") +Signed-off-by: Shay Drory <shayd@nvidia.com> +Signed-off-by: Tariq Toukan <tariqt@nvidia.com> +Link: https://lore.kernel.org/r/20240411115444.374475-4-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 ++++- + drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c | 1 - + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 131a836c127e3..e285823bd08f0 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1699,12 +1699,15 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev) + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) { + mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); +- goto query_hca_caps_err; ++ goto params_reg_err; + } + + devl_unlock(devlink); + return 0; + ++params_reg_err: ++ devl_unregister(devlink); ++ devl_unlock(devlink); + query_hca_caps_err: + devl_unregister(devlink); + devl_unlock(devlink); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +index e3bf8c7e4baa6..7ebe712808275 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +@@ -75,7 +75,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia + goto peer_devlink_set_err; + } + +- devlink_register(devlink); + return 0; + + peer_devlink_set_err: +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5e-prevent-deadlock-while-disabling-arfs.patch b/queue-6.8/net-mlx5e-prevent-deadlock-while-disabling-arfs.patch new file mode 100644 index 0000000000..53a22e05d6 --- /dev/null +++ b/queue-6.8/net-mlx5e-prevent-deadlock-while-disabling-arfs.patch @@ -0,0 +1,213 @@ +From 7f11948ba6645b467211e4c9af84019529c538a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 14:54:44 +0300 +Subject: net/mlx5e: Prevent deadlock while disabling aRFS + +From: Carolina Jubran <cjubran@nvidia.com> + +[ Upstream commit fef965764cf562f28afb997b626fc7c3cec99693 ] + +When disabling aRFS under the `priv->state_lock`, any scheduled +aRFS works are canceled using the `cancel_work_sync` function, +which waits for the work to end if it has already started. +However, while waiting for the work handler, the handler will +try to acquire the `state_lock` which is already acquired. + +The worker acquires the lock to delete the rules if the state +is down, which is not the worker's responsibility since +disabling aRFS deletes the rules. + +Add an aRFS state variable, which indicates whether the aRFS is +enabled and prevent adding rules when the aRFS is disabled. + +Kernel log: + +====================================================== +WARNING: possible circular locking dependency detected +6.7.0-rc4_net_next_mlx5_5483eb2 #1 Tainted: G I +------------------------------------------------------ +ethtool/386089 is trying to acquire lock: +ffff88810f21ce68 ((work_completion)(&rule->arfs_work)){+.+.}-{0:0}, at: __flush_work+0x74/0x4e0 + +but task is already holding lock: +ffff8884a1808cc0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_ethtool_set_channels+0x53/0x200 [mlx5_core] + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (&priv->state_lock){+.+.}-{3:3}: + __mutex_lock+0x80/0xc90 + arfs_handle_work+0x4b/0x3b0 [mlx5_core] + process_one_work+0x1dc/0x4a0 + worker_thread+0x1bf/0x3c0 + kthread+0xd7/0x100 + ret_from_fork+0x2d/0x50 + ret_from_fork_asm+0x11/0x20 + +-> #0 ((work_completion)(&rule->arfs_work)){+.+.}-{0:0}: + __lock_acquire+0x17b4/0x2c80 + lock_acquire+0xd0/0x2b0 + __flush_work+0x7a/0x4e0 + __cancel_work_timer+0x131/0x1c0 + arfs_del_rules+0x143/0x1e0 [mlx5_core] + mlx5e_arfs_disable+0x1b/0x30 [mlx5_core] + mlx5e_ethtool_set_channels+0xcb/0x200 [mlx5_core] + ethnl_set_channels+0x28f/0x3b0 + ethnl_default_set_doit+0xec/0x240 + genl_family_rcv_msg_doit+0xd0/0x120 + genl_rcv_msg+0x188/0x2c0 + netlink_rcv_skb+0x54/0x100 + genl_rcv+0x24/0x40 + netlink_unicast+0x1a1/0x270 + netlink_sendmsg+0x214/0x460 + __sock_sendmsg+0x38/0x60 + __sys_sendto+0x113/0x170 + __x64_sys_sendto+0x20/0x30 + do_syscall_64+0x40/0xe0 + entry_SYSCALL_64_after_hwframe+0x46/0x4e + +other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&priv->state_lock); + lock((work_completion)(&rule->arfs_work)); + lock(&priv->state_lock); + lock((work_completion)(&rule->arfs_work)); + + *** DEADLOCK *** + +3 locks held by ethtool/386089: + #0: ffffffff82ea7210 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 + #1: ffffffff82e94c88 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xd3/0x240 + #2: ffff8884a1808cc0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_ethtool_set_channels+0x53/0x200 [mlx5_core] + +stack backtrace: +CPU: 15 PID: 386089 Comm: ethtool Tainted: G I 6.7.0-rc4_net_next_mlx5_5483eb2 #1 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +Call Trace: + <TASK> + dump_stack_lvl+0x60/0xa0 + check_noncircular+0x144/0x160 + __lock_acquire+0x17b4/0x2c80 + lock_acquire+0xd0/0x2b0 + ? __flush_work+0x74/0x4e0 + ? save_trace+0x3e/0x360 + ? __flush_work+0x74/0x4e0 + __flush_work+0x7a/0x4e0 + ? __flush_work+0x74/0x4e0 + ? __lock_acquire+0xa78/0x2c80 + ? lock_acquire+0xd0/0x2b0 + ? mark_held_locks+0x49/0x70 + __cancel_work_timer+0x131/0x1c0 + ? mark_held_locks+0x49/0x70 + arfs_del_rules+0x143/0x1e0 [mlx5_core] + mlx5e_arfs_disable+0x1b/0x30 [mlx5_core] + mlx5e_ethtool_set_channels+0xcb/0x200 [mlx5_core] + ethnl_set_channels+0x28f/0x3b0 + ethnl_default_set_doit+0xec/0x240 + genl_family_rcv_msg_doit+0xd0/0x120 + genl_rcv_msg+0x188/0x2c0 + ? ethnl_ops_begin+0xb0/0xb0 + ? genl_family_rcv_msg_dumpit+0xf0/0xf0 + netlink_rcv_skb+0x54/0x100 + genl_rcv+0x24/0x40 + netlink_unicast+0x1a1/0x270 + netlink_sendmsg+0x214/0x460 + __sock_sendmsg+0x38/0x60 + __sys_sendto+0x113/0x170 + ? do_user_addr_fault+0x53f/0x8f0 + __x64_sys_sendto+0x20/0x30 + do_syscall_64+0x40/0xe0 + entry_SYSCALL_64_after_hwframe+0x46/0x4e + </TASK> + +Fixes: 45bf454ae884 ("net/mlx5e: Enabling aRFS mechanism") +Signed-off-by: Carolina Jubran <cjubran@nvidia.com> +Signed-off-by: Tariq Toukan <tariqt@nvidia.com> +Link: https://lore.kernel.org/r/20240411115444.374475-7-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 27 +++++++++++-------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +index e66f486faafe1..415fec7763bd2 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +@@ -45,6 +45,10 @@ struct arfs_table { + struct hlist_head rules_hash[ARFS_HASH_SIZE]; + }; + ++enum { ++ MLX5E_ARFS_STATE_ENABLED, ++}; ++ + enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, +@@ -59,6 +63,7 @@ struct mlx5e_arfs_tables { + spinlock_t arfs_lock; + int last_filter_id; + struct workqueue_struct *wq; ++ unsigned long state; + }; + + struct arfs_tuple { +@@ -169,6 +174,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) + return err; + } + } ++ set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); ++ + return 0; + } + +@@ -454,6 +461,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs) + int i; + int j; + ++ clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); ++ + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { + hlist_del_init(&rule->hlist); +@@ -626,17 +635,8 @@ static void arfs_handle_work(struct work_struct *work) + struct mlx5_flow_handle *rule; + + arfs = mlx5e_fs_get_arfs(priv->fs); +- mutex_lock(&priv->state_lock); +- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { +- spin_lock_bh(&arfs->arfs_lock); +- hlist_del(&arfs_rule->hlist); +- spin_unlock_bh(&arfs->arfs_lock); +- +- mutex_unlock(&priv->state_lock); +- kfree(arfs_rule); +- goto out; +- } +- mutex_unlock(&priv->state_lock); ++ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) ++ return; + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); +@@ -752,6 +752,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); ++ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) { ++ spin_unlock_bh(&arfs->arfs_lock); ++ return -EPERM; ++ } ++ + arfs_rule = arfs_find_rule(arfs_t, &fk); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) { +-- +2.43.0 + diff --git a/queue-6.8/net-ravb-allow-rx-loop-to-move-past-dma-mapping-erro.patch b/queue-6.8/net-ravb-allow-rx-loop-to-move-past-dma-mapping-erro.patch new file mode 100644 index 0000000000..2ce88a9476 --- /dev/null +++ b/queue-6.8/net-ravb-allow-rx-loop-to-move-past-dma-mapping-erro.patch @@ -0,0 +1,106 @@ +From a7dfc8e334cabac0fbd1a464ac51e25ea5c3fa63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 16 Apr 2024 13:02:52 +0100 +Subject: net: ravb: Allow RX loop to move past DMA mapping errors + +From: Paul Barker <paul.barker.ct@bp.renesas.com> + +[ Upstream commit a892493a343494bd6bab9d098593932077ff3c43 ] + +The RX loops in ravb_rx_gbeth() and ravb_rx_rcar() skip to the next loop +iteration if a zero-length descriptor is seen (indicating a DMA mapping +error). However, the current RX descriptor index `priv->cur_rx[q]` was +incremented at the end of the loop and so would not be incremented when +we skip to the next loop iteration. This would cause the loop to keep +seeing the same zero-length descriptor instead of moving on to the next +descriptor. + +As the loop counter `i` still increments, the loop would eventually +terminate so there is no risk of being stuck here forever - but we +should still fix this to avoid wasting cycles. + +To fix this, the RX descriptor index is incremented at the top of the +loop, in the for statement itself. The assignments of `entry` and `desc` +are brought into the loop to avoid the need for duplication. + +Fixes: d8b48911fd24 ("ravb: fix ring memory allocation") +Signed-off-by: Paul Barker <paul.barker.ct@bp.renesas.com> +Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/renesas/ravb_main.c | 25 ++++++++++++------------ + 1 file changed, 13 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c +index 60c1cfc501304..853c2a0d4e259 100644 +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -781,12 +781,15 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) + int limit; + int i; + +- entry = priv->cur_rx[q] % priv->num_rx_ring[q]; + limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; + stats = &priv->stats[q]; + +- desc = &priv->rx_ring[q].desc[entry]; +- for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { ++ for (i = 0; i < limit; i++, priv->cur_rx[q]++) { ++ entry = priv->cur_rx[q] % priv->num_rx_ring[q]; ++ desc = &priv->rx_ring[q].desc[entry]; ++ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY) ++ break; ++ + /* Descriptor type must be checked before all other reads */ + dma_rmb(); + desc_status = desc->msc; +@@ -850,9 +853,6 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) + break; + } + } +- +- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; +- desc = &priv->rx_ring[q].desc[entry]; + } + + /* Refill the RX ring buffers. */ +@@ -894,7 +894,6 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + { + struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; +- int entry = priv->cur_rx[q] % priv->num_rx_ring[q]; + struct net_device_stats *stats = &priv->stats[q]; + struct ravb_ex_rx_desc *desc; + unsigned int limit, i; +@@ -904,10 +903,15 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + int rx_packets = 0; + u8 desc_status; + u16 pkt_len; ++ int entry; + + limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; +- desc = &priv->rx_ring[q].ex_desc[entry]; +- for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { ++ for (i = 0; i < limit; i++, priv->cur_rx[q]++) { ++ entry = priv->cur_rx[q] % priv->num_rx_ring[q]; ++ desc = &priv->rx_ring[q].ex_desc[entry]; ++ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY) ++ break; ++ + /* Descriptor type must be checked before all other reads */ + dma_rmb(); + desc_status = desc->msc; +@@ -961,9 +965,6 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + rx_packets++; + stats->rx_bytes += pkt_len; + } +- +- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; +- desc = &priv->rx_ring[q].ex_desc[entry]; + } + + /* Refill the RX ring buffers. */ +-- +2.43.0 + diff --git a/queue-6.8/net-ravb-count-packets-instead-of-descriptors-in-r-c.patch b/queue-6.8/net-ravb-count-packets-instead-of-descriptors-in-r-c.patch new file mode 100644 index 0000000000..3f098d91fa --- /dev/null +++ b/queue-6.8/net-ravb-count-packets-instead-of-descriptors-in-r-c.patch @@ -0,0 +1,94 @@ +From 600ad67e08ffc19b51b9c6ecffe03e9e12b6db35 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 16 Apr 2024 13:02:51 +0100 +Subject: net: ravb: Count packets instead of descriptors in R-Car RX path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paul Barker <paul.barker.ct@bp.renesas.com> + +[ Upstream commit def52db470df28d6f43cacbd21137f03b9502073 ] + +The units of "work done" in the RX path should be packets instead of +descriptors. + +Descriptors which are used by the hardware to record error conditions or +are empty in the case of a DMA mapping error should not count towards +our RX work budget. + +Also make the limit variable unsigned as it can never be negative. + +Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") +Signed-off-by: Paul Barker <paul.barker.ct@bp.renesas.com> +Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru> +Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/renesas/ravb_main.c | 21 ++++++++------------- + 1 file changed, 8 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c +index e97c98d5eb19c..60c1cfc501304 100644 +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -895,29 +895,24 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; + int entry = priv->cur_rx[q] % priv->num_rx_ring[q]; +- int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) - +- priv->cur_rx[q]; + struct net_device_stats *stats = &priv->stats[q]; + struct ravb_ex_rx_desc *desc; ++ unsigned int limit, i; + struct sk_buff *skb; + dma_addr_t dma_addr; + struct timespec64 ts; ++ int rx_packets = 0; + u8 desc_status; + u16 pkt_len; +- int limit; + +- boguscnt = min(boguscnt, *quota); +- limit = boguscnt; ++ limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; + desc = &priv->rx_ring[q].ex_desc[entry]; +- while (desc->die_dt != DT_FEMPTY) { ++ for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { + /* Descriptor type must be checked before all other reads */ + dma_rmb(); + desc_status = desc->msc; + pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS; + +- if (--boguscnt < 0) +- break; +- + /* We use 0-byte descriptors to mark the DMA mapping errors */ + if (!pkt_len) + continue; +@@ -963,7 +958,7 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + if (ndev->features & NETIF_F_RXCSUM) + ravb_rx_csum(skb); + napi_gro_receive(&priv->napi[q], skb); +- stats->rx_packets++; ++ rx_packets++; + stats->rx_bytes += pkt_len; + } + +@@ -999,9 +994,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + desc->die_dt = DT_FEMPTY; + } + +- *quota -= limit - (++boguscnt); +- +- return boguscnt <= 0; ++ stats->rx_packets += rx_packets; ++ *quota -= rx_packets; ++ return *quota == 0; + } + + /* Packet receive function for Ethernet AVB */ +-- +2.43.0 + diff --git a/queue-6.8/net-sched-fix-mirred-deadlock-on-device-recursion.patch b/queue-6.8/net-sched-fix-mirred-deadlock-on-device-recursion.patch new file mode 100644 index 0000000000..698758d801 --- /dev/null +++ b/queue-6.8/net-sched-fix-mirred-deadlock-on-device-recursion.patch @@ -0,0 +1,127 @@ +From 46de49778bf2b0149b4f742f92c597ff726c2a84 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Mon, 15 Apr 2024 18:07:28 -0300 +Subject: net/sched: Fix mirred deadlock on device recursion + +From: Eric Dumazet <edumazet@google.com> + +[ Upstream commit 0f022d32c3eca477fbf79a205243a6123ed0fe11 ] + +When the mirred action is used on a classful egress qdisc and a packet is +mirrored or redirected to self we hit a qdisc lock deadlock. +See trace below. + +[..... other info removed for brevity....] +[ 82.890906] +[ 82.890906] ============================================ +[ 82.890906] WARNING: possible recursive locking detected +[ 82.890906] 6.8.0-05205-g77fadd89fe2d-dirty #213 Tainted: G W +[ 82.890906] -------------------------------------------- +[ 82.890906] ping/418 is trying to acquire lock: +[ 82.890906] ffff888006994110 (&sch->q.lock){+.-.}-{3:3}, at: +__dev_queue_xmit+0x1778/0x3550 +[ 82.890906] +[ 82.890906] but task is already holding lock: +[ 82.890906] ffff888006994110 (&sch->q.lock){+.-.}-{3:3}, at: +__dev_queue_xmit+0x1778/0x3550 +[ 82.890906] +[ 82.890906] other info that might help us debug this: +[ 82.890906] Possible unsafe locking scenario: +[ 82.890906] +[ 82.890906] CPU0 +[ 82.890906] ---- +[ 82.890906] lock(&sch->q.lock); +[ 82.890906] lock(&sch->q.lock); +[ 82.890906] +[ 82.890906] *** DEADLOCK *** +[ 82.890906] +[..... other info removed for brevity....] + +Example setup (eth0->eth0) to recreate +tc qdisc add dev eth0 root handle 1: htb default 30 +tc filter add dev eth0 handle 1: protocol ip prio 2 matchall \ + action mirred egress redirect dev eth0 + +Another example(eth0->eth1->eth0) to recreate +tc qdisc add dev eth0 root handle 1: htb default 30 +tc filter add dev eth0 handle 1: protocol ip prio 2 matchall \ + action mirred egress redirect dev eth1 + +tc qdisc add dev eth1 root handle 1: htb default 30 +tc filter add dev eth1 handle 1: protocol ip prio 2 matchall \ + action mirred egress redirect dev eth0 + +We fix this by adding an owner field (CPU id) to struct Qdisc set after +root qdisc is entered. When the softirq enters it a second time, if the +qdisc owner is the same CPU, the packet is dropped to break the loop. + +Reported-by: Mingshuai Ren <renmingshuai@huawei.com> +Closes: https://lore.kernel.org/netdev/20240314111713.5979-1-renmingshuai@huawei.com/ +Fixes: 3bcb846ca4cf ("net: get rid of spin_trylock() in net_tx_action()") +Fixes: e578d9c02587 ("net: sched: use counter to break reclassify loops") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Victor Nogueira <victor@mojatatu.com> +Reviewed-by: Pedro Tammela <pctammela@mojatatu.com> +Tested-by: Jamal Hadi Salim <jhs@mojatatu.com> +Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> +Link: https://lore.kernel.org/r/20240415210728.36949-1-victor@mojatatu.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/sch_generic.h | 1 + + net/core/dev.c | 6 ++++++ + net/sched/sch_generic.c | 1 + + 3 files changed, 8 insertions(+) + +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h +index cefe0c4bdae34..41ca14e81d55f 100644 +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -117,6 +117,7 @@ struct Qdisc { + struct qdisc_skb_head q; + struct gnet_stats_basic_sync bstats; + struct gnet_stats_queue qstats; ++ int owner; + unsigned long state; + unsigned long state2; /* must be written under qdisc spinlock */ + struct Qdisc *next_sched; +diff --git a/net/core/dev.c b/net/core/dev.c +index c9b8412f1c9d3..c365aa06f886f 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3791,6 +3791,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + return rc; + } + ++ if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) { ++ kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP); ++ return NET_XMIT_DROP; ++ } + /* + * Heuristic to force contended enqueues to serialize on a + * separate lock before trying to get qdisc main lock. +@@ -3830,7 +3834,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + qdisc_run_end(q); + rc = NET_XMIT_SUCCESS; + } else { ++ WRITE_ONCE(q->owner, smp_processor_id()); + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); ++ WRITE_ONCE(q->owner, -1); + if (qdisc_run_begin(q)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index 9b3e9262040b6..a498b5d7c5d60 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -973,6 +973,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, + sch->enqueue = ops->enqueue; + sch->dequeue = ops->dequeue; + sch->dev_queue = dev_queue; ++ sch->owner = -1; + netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); + refcount_set(&sch->refcnt, 1); + +-- +2.43.0 + diff --git a/queue-6.8/net-sparx5-flower-fix-fragment-flags-handling.patch b/queue-6.8/net-sparx5-flower-fix-fragment-flags-handling.patch new file mode 100644 index 0000000000..5791afd12c --- /dev/null +++ b/queue-6.8/net-sparx5-flower-fix-fragment-flags-handling.patch @@ -0,0 +1,168 @@ +From 66ebdffa921e6c244e213769cd7c7539cda102c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 11:13:18 +0000 +Subject: net: sparx5: flower: fix fragment flags handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Asbjørn Sloth Tønnesen <ast@fiberby.net> + +[ Upstream commit 68aba00483c7c4102429bcdfdece7289a8ab5c8e ] + +I noticed that only 3 out of the 4 input bits were used, +mt.key->flags & FLOW_DIS_IS_FRAGMENT was never checked. + +In order to avoid a complicated maze, I converted it to +use a 16 byte mapping table. + +As shown in the table below the old heuristics doesn't +always do the right thing, ie. when FLOW_DIS_IS_FRAGMENT=1/1 +then it used to only match follow-up fragment packets. + +Here are all the combinations, and their resulting new/old +VCAP key/mask filter: + + /- FLOW_DIS_IS_FRAGMENT (key/mask) + | /- FLOW_DIS_FIRST_FRAG (key/mask) + | | /-- new VCAP fragment (key/mask) + v v v v- old VCAP fragment (key/mask) + + 0/0 0/0 -/- -/- impossible (due to entry cond. on mask) + 0/0 0/1 -/- 0/3 !! invalid (can't match non-fragment + follow-up frag) + 0/0 1/0 -/- -/- impossible (key > mask) + 0/0 1/1 1/3 1/3 first fragment + + 0/1 0/0 0/3 3/3 !! not fragmented + 0/1 0/1 0/3 3/3 !! not fragmented (+ not first fragment) + 0/1 1/0 -/- -/- impossible (key > mask) + 0/1 1/1 -/- 1/3 !! invalid (non-fragment and first frag) + + 1/0 0/0 -/- -/- impossible (key > mask) + 1/0 0/1 -/- -/- impossible (key > mask) + 1/0 1/0 -/- -/- impossible (key > mask) + 1/0 1/1 -/- -/- impossible (key > mask) + + 1/1 0/0 1/1 3/3 !! some fragment + 1/1 0/1 3/3 3/3 follow-up fragment + 1/1 1/0 -/- -/- impossible (key > mask) + 1/1 1/1 1/3 1/3 first fragment + +In the datasheet the VCAP fragment values are documented as: + 0 = no fragment + 1 = initial fragment + 2 = suspicious fragment + 3 = valid follow-up fragment + +Result: 3 combinations match the old behavior, + 3 combinations have been corrected, + 2 combinations are now invalid, and fail, + 8 combinations are impossible. + +It should now be aligned with how FLOW_DIS_IS_FRAGMENT +and FLOW_DIS_FIRST_FRAG is set in __skb_flow_dissect() in +net/core/flow_dissector.c + +Since the VCAP fragment values are not a bitfield, we have +to ignore the suspicious fragment value, eg. when matching +on any kind of fragment with FLOW_DIS_IS_FRAGMENT=1/1. + +Only compile tested, and logic tested in userspace, as I +unfortunately don't have access to this switch chip (yet). + +Fixes: d6c2964db3fe ("net: microchip: sparx5: Adding more tc flower keys for the IS2 VCAP") +Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net> +Reviewed-by: Steen Hegelund <Steen.Hegelund@microchip.com> +Tested-by: Daniel Machon <daniel.machon@microchip.com> +Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> +Link: https://lore.kernel.org/r/20240411111321.114095-1-ast@fiberby.net +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + .../microchip/sparx5/sparx5_tc_flower.c | 61 ++++++++++++------- + 1 file changed, 40 insertions(+), 21 deletions(-) + +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +index 523e0c470894f..55f255a3c9db6 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +@@ -36,6 +36,27 @@ struct sparx5_tc_flower_template { + u16 l3_proto; /* protocol specified in the template */ + }; + ++/* SparX-5 VCAP fragment types: ++ * 0 = no fragment, 1 = initial fragment, ++ * 2 = suspicious fragment, 3 = valid follow-up fragment ++ */ ++enum { /* key / mask */ ++ FRAG_NOT = 0x03, /* 0 / 3 */ ++ FRAG_SOME = 0x11, /* 1 / 1 */ ++ FRAG_FIRST = 0x13, /* 1 / 3 */ ++ FRAG_LATER = 0x33, /* 3 / 3 */ ++ FRAG_INVAL = 0xff, /* invalid */ ++}; ++ ++/* Flower fragment flag to VCAP fragment type mapping */ ++static const u8 sparx5_vcap_frag_map[4][4] = { /* is_frag */ ++ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */ ++ { FRAG_NOT, FRAG_NOT, FRAG_INVAL, FRAG_INVAL }, /* 0/1 */ ++ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */ ++ { FRAG_SOME, FRAG_LATER, FRAG_INVAL, FRAG_FIRST } /* 1/1 */ ++ /* 0/0 0/1 1/0 1/1 <-- first_frag */ ++}; ++ + static int + sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st) + { +@@ -145,29 +166,27 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) + flow_rule_match_control(st->frule, &mt); + + if (mt.mask->flags) { +- if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) { +- if (mt.key->flags & FLOW_DIS_FIRST_FRAG) { +- value = 1; /* initial fragment */ +- mask = 0x3; +- } else { +- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) { +- value = 3; /* follow up fragment */ +- mask = 0x3; +- } else { +- value = 0; /* no fragment */ +- mask = 0x3; +- } +- } +- } else { +- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) { +- value = 3; /* follow up fragment */ +- mask = 0x3; +- } else { +- value = 0; /* no fragment */ +- mask = 0x3; +- } ++ u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT); ++ u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT); ++ u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask; ++ ++ u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG); ++ u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG); ++ u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask; ++ ++ /* Lookup verdict based on the 2 + 2 input bits */ ++ u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx]; ++ ++ if (vdt == FRAG_INVAL) { ++ NL_SET_ERR_MSG_MOD(st->fco->common.extack, ++ "Match on invalid fragment flag combination"); ++ return -EINVAL; + } + ++ /* Extract VCAP fragment key and mask from verdict */ ++ value = (vdt >> 4) & 0x3; ++ mask = vdt & 0x3; ++ + err = vcap_rule_add_key_u32(st->vrule, + VCAP_KF_L3_FRAGMENT_TYPE, + value, mask); +-- +2.43.0 + diff --git a/queue-6.8/net-stmmac-apply-half-duplex-less-constraint-for-dw-.patch b/queue-6.8/net-stmmac-apply-half-duplex-less-constraint-for-dw-.patch new file mode 100644 index 0000000000..0e4610f588 --- /dev/null +++ b/queue-6.8/net-stmmac-apply-half-duplex-less-constraint-for-dw-.patch @@ -0,0 +1,101 @@ +From 4c8d15ab97baf49cc271bd5ba9ddf6eff4daf5d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 12 Apr 2024 21:03:14 +0300 +Subject: net: stmmac: Apply half-duplex-less constraint for DW QoS Eth only + +From: Serge Semin <fancer.lancer@gmail.com> + +[ Upstream commit 0ebd96f5da4410c0cb8fc75e44f1009530b2f90b ] + +There are three DW MAC IP-cores which can have the multiple Tx/Rx queues +enabled: +DW GMAC v3.7+ with AV feature, +DW QoS Eth v4.x/v5.x, +DW XGMAC/XLGMAC +Based on the respective HW databooks, only the DW QoS Eth IP-core doesn't +support the half-duplex link mode in case if more than one queues enabled: + +"In multiple queue/channel configurations, for half-duplex operation, +enable only the Q0/CH0 on Tx and Rx. For single queue/channel in +full-duplex operation, any queue/channel can be enabled." + +The rest of the IP-cores don't have such constraint. Thus in order to have +the constraint applied for the DW QoS Eth MACs only, let's move the it' +implementation to the respective MAC-capabilities getter and make sure the +getter is called in the queues re-init procedure. + +Fixes: b6cfffa7ad92 ("stmmac: fix DMA channel hang in half-duplex mode") +Signed-off-by: Serge Semin <fancer.lancer@gmail.com> +Reviewed-by: Romain Gantois <romain.gantois@bootlin.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 7 +++++++ + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 19 +++---------------- + 2 files changed, 10 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index cef25efbdff99..ec6a13e644b36 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -71,6 +71,13 @@ static void dwmac4_core_init(struct mac_device_info *hw, + static void dwmac4_phylink_get_caps(struct stmmac_priv *priv) + { + priv->phylink_config.mac_capabilities |= MAC_2500FD; ++ ++ if (priv->plat->tx_queues_to_use > 1) ++ priv->phylink_config.mac_capabilities &= ++ ~(MAC_10HD | MAC_100HD | MAC_1000HD); ++ else ++ priv->phylink_config.mac_capabilities |= ++ (MAC_10HD | MAC_100HD | MAC_1000HD); + } + + static void dwmac4_rx_queue_enable(struct mac_device_info *hw, +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 7c6aef033a456..cbb00ca23a7c3 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -1198,17 +1198,6 @@ static int stmmac_init_phy(struct net_device *dev) + return ret; + } + +-static void stmmac_set_half_duplex(struct stmmac_priv *priv) +-{ +- /* Half-Duplex can only work with single tx queue */ +- if (priv->plat->tx_queues_to_use > 1) +- priv->phylink_config.mac_capabilities &= +- ~(MAC_10HD | MAC_100HD | MAC_1000HD); +- else +- priv->phylink_config.mac_capabilities |= +- (MAC_10HD | MAC_100HD | MAC_1000HD); +-} +- + static int stmmac_phy_setup(struct stmmac_priv *priv) + { + struct stmmac_mdio_bus_data *mdio_bus_data; +@@ -1237,10 +1226,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) + priv->phylink_config.supported_interfaces); + + priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | +- MAC_10FD | MAC_100FD | +- MAC_1000FD; +- +- stmmac_set_half_duplex(priv); ++ MAC_10 | MAC_100 | MAC_1000; + + /* Get the MAC specific capabilities */ + stmmac_mac_phylink_get_caps(priv); +@@ -7299,7 +7285,8 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) + priv->rss.table[i] = ethtool_rxfh_indir_default(i, + rx_cnt); + +- stmmac_set_half_duplex(priv); ++ stmmac_mac_phylink_get_caps(priv); ++ + stmmac_napi_add(dev); + + if (netif_running(dev)) +-- +2.43.0 + diff --git a/queue-6.8/net-stmmac-fix-ip-cores-specific-mac-capabilities.patch b/queue-6.8/net-stmmac-fix-ip-cores-specific-mac-capabilities.patch new file mode 100644 index 0000000000..4b1121c7a2 --- /dev/null +++ b/queue-6.8/net-stmmac-fix-ip-cores-specific-mac-capabilities.patch @@ -0,0 +1,245 @@ +From 941126169b5a56bead03f1760dbcc5a5d67861ca Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 12 Apr 2024 21:03:16 +0300 +Subject: net: stmmac: Fix IP-cores specific MAC capabilities + +From: Serge Semin <fancer.lancer@gmail.com> + +[ Upstream commit 9cb54af214a7cdc91577ec083e5569f2ce2c86d8 ] + +Here is the list of the MAC capabilities specific to the particular DW MAC +IP-cores currently supported by the driver: + +DW MAC100: MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 + +DW GMAC: MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000 + +Allwinner sun8i MAC: MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000 + +DW QoS Eth: MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD +if there is more than 1 active Tx/Rx queues: + MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10FD | MAC_100FD | MAC_1000FD | MAC_2500FD + +DW XGMAC: MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_1000FD | MAC_2500FD | MAC_5000FD | MAC_10000FD + +DW XLGMAC: MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_1000FD | MAC_2500FD | MAC_5000FD | MAC_10000FD | + MAC_25000FD | MAC_40000FD | MAC_50000FD | MAC_100000FD + +As you can see there are only two common capabilities: +MAC_ASYM_PAUSE | MAC_SYM_PAUSE. +Meanwhile what is currently implemented defines 10/100/1000 link speeds +for all IP-cores, which is definitely incorrect for DW MAC100, DW XGMAC +and DW XLGMAC devices. + +Seeing the flow-control is implemented as a callback for each MAC IP-core +(see dwmac100_flow_ctrl(), dwmac1000_flow_ctrl(), sun8i_dwmac_flow_ctrl(), +etc) and since the MAC-specific setup() method is supposed to be called +for each available DW MAC-based device, the capabilities initialization +can be freely moved to these setup() functions, thus correctly setting up +the MAC-capabilities for each IP-core (including the Allwinner Sun8i). A +new stmmac_link::caps field was specifically introduced for that so to +have all link-specific info preserved in a single structure. + +Note the suggested change fixes three earlier commits at a time. The +commit 5b0d7d7da64b ("net: stmmac: Add the missing speeds that XGMAC +supports") permitted the 10-100 link speeds and 1G half-duplex mode for DW +XGMAC IP-core even though it doesn't support them. The commit df7699c70c1b +("net: stmmac: Do not cut down 1G modes") incorrectly added the MAC1000 +capability to the DW MAC100 IP-core. Similarly to the DW XGMAC the commit +8a880936e902 ("net: stmmac: Add XLGMII support") incorrectly permitted the +10-100 link speeds and 1G half-duplex mode for DW XLGMAC IP-core. + +Fixes: 5b0d7d7da64b ("net: stmmac: Add the missing speeds that XGMAC supports") +Fixes: df7699c70c1b ("net: stmmac: Do not cut down 1G modes") +Fixes: 8a880936e902 ("net: stmmac: Add XLGMII support") +Suggested-by: Russell King (Oracle) <linux@armlinux.org.uk> +Signed-off-by: Serge Semin <fancer.lancer@gmail.com> +Reviewed-by: Romain Gantois <romain.gantois@bootlin.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/stmicro/stmmac/common.h | 1 + + .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 ++ + .../ethernet/stmicro/stmmac/dwmac1000_core.c | 2 ++ + .../ethernet/stmicro/stmmac/dwmac100_core.c | 2 ++ + .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 10 ++++------ + .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 18 ++++++++---------- + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++--- + 7 files changed, 23 insertions(+), 19 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h +index 5ba606a596e77..5a1d46dcd5de0 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/common.h ++++ b/drivers/net/ethernet/stmicro/stmmac/common.h +@@ -550,6 +550,7 @@ extern const struct stmmac_hwtimestamp stmmac_ptp; + extern const struct stmmac_mode_ops dwmac4_ring_mode_ops; + + struct mac_link { ++ u32 caps; + u32 speed_mask; + u32 speed10; + u32 speed100; +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +index b21d99faa2d04..e1b761dcfa1dd 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +@@ -1096,6 +1096,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) + + priv->dev->priv_flags |= IFF_UNICAST_FLT; + ++ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | ++ MAC_10 | MAC_100 | MAC_1000; + /* The loopback bit seems to be re-set when link change + * Simply mask it each time + * Speed 10/100/1000 are set in BIT(2)/BIT(3) +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +index 3927609abc441..8555299443f4e 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +@@ -539,6 +539,8 @@ int dwmac1000_setup(struct stmmac_priv *priv) + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + ++ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | ++ MAC_10 | MAC_100 | MAC_1000; + mac->link.duplex = GMAC_CONTROL_DM; + mac->link.speed10 = GMAC_CONTROL_PS; + mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES; +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +index a6e8d7bd95886..7667d103cd0eb 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +@@ -175,6 +175,8 @@ int dwmac100_setup(struct stmmac_priv *priv) + dev_info(priv->device, "\tDWMAC100\n"); + + mac->pcsr = priv->ioaddr; ++ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | ++ MAC_10 | MAC_100; + mac->link.duplex = MAC_CONTROL_F; + mac->link.speed10 = 0; + mac->link.speed100 = 0; +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index ec6a13e644b36..a38226d7cc6a9 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -70,14 +70,10 @@ static void dwmac4_core_init(struct mac_device_info *hw, + + static void dwmac4_phylink_get_caps(struct stmmac_priv *priv) + { +- priv->phylink_config.mac_capabilities |= MAC_2500FD; +- + if (priv->plat->tx_queues_to_use > 1) +- priv->phylink_config.mac_capabilities &= +- ~(MAC_10HD | MAC_100HD | MAC_1000HD); ++ priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD); + else +- priv->phylink_config.mac_capabilities |= +- (MAC_10HD | MAC_100HD | MAC_1000HD); ++ priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD); + } + + static void dwmac4_rx_queue_enable(struct mac_device_info *hw, +@@ -1385,6 +1381,8 @@ int dwmac4_setup(struct stmmac_priv *priv) + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + ++ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | ++ MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD; + mac->link.duplex = GMAC_CONFIG_DM; + mac->link.speed10 = GMAC_CONFIG_PS; + mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS; +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +index e841e312077ef..f8e7775bb6336 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +@@ -47,14 +47,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, + writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); + } + +-static void xgmac_phylink_get_caps(struct stmmac_priv *priv) +-{ +- priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD | +- MAC_10000FD | MAC_25000FD | +- MAC_40000FD | MAC_50000FD | +- MAC_100000FD; +-} +- + static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable) + { + u32 tx = readl(ioaddr + XGMAC_TX_CONFIG); +@@ -1540,7 +1532,6 @@ static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg * + + const struct stmmac_ops dwxgmac210_ops = { + .core_init = dwxgmac2_core_init, +- .phylink_get_caps = xgmac_phylink_get_caps, + .set_mac = dwxgmac2_set_mac, + .rx_ipc = dwxgmac2_rx_ipc, + .rx_queue_enable = dwxgmac2_rx_queue_enable, +@@ -1601,7 +1592,6 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode, + + const struct stmmac_ops dwxlgmac2_ops = { + .core_init = dwxgmac2_core_init, +- .phylink_get_caps = xgmac_phylink_get_caps, + .set_mac = dwxgmac2_set_mac, + .rx_ipc = dwxgmac2_rx_ipc, + .rx_queue_enable = dwxlgmac2_rx_queue_enable, +@@ -1661,6 +1651,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv) + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + ++ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | ++ MAC_1000FD | MAC_2500FD | MAC_5000FD | ++ MAC_10000FD; + mac->link.duplex = 0; + mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; + mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; +@@ -1698,6 +1691,11 @@ int dwxlgmac2_setup(struct stmmac_priv *priv) + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + ++ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | ++ MAC_1000FD | MAC_2500FD | MAC_5000FD | ++ MAC_10000FD | MAC_25000FD | ++ MAC_40000FD | MAC_50000FD | ++ MAC_100000FD; + mac->link.duplex = 0; + mac->link.speed1000 = XLGMAC_CONFIG_SS_1000; + mac->link.speed2500 = XLGMAC_CONFIG_SS_2500; +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 2a48277ed614f..83b732c30c1bb 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -1225,12 +1225,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) + xpcs_get_interfaces(priv->hw->xpcs, + priv->phylink_config.supported_interfaces); + +- priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | +- MAC_10 | MAC_100 | MAC_1000; +- + /* Get the MAC specific capabilities */ + stmmac_mac_phylink_get_caps(priv); + ++ priv->phylink_config.mac_capabilities = priv->hw->link.caps; ++ + max_speed = priv->plat->max_speed; + if (max_speed) + phylink_limit_mac_speed(&priv->phylink_config, max_speed); +@@ -7288,6 +7287,8 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) + + stmmac_mac_phylink_get_caps(priv); + ++ priv->phylink_config.mac_capabilities = priv->hw->link.caps; ++ + max_speed = priv->plat->max_speed; + if (max_speed) + phylink_limit_mac_speed(&priv->phylink_config, max_speed); +-- +2.43.0 + diff --git a/queue-6.8/net-stmmac-fix-max-speed-being-ignored-on-queue-re-i.patch b/queue-6.8/net-stmmac-fix-max-speed-being-ignored-on-queue-re-i.patch new file mode 100644 index 0000000000..1e47e13bb4 --- /dev/null +++ b/queue-6.8/net-stmmac-fix-max-speed-being-ignored-on-queue-re-i.patch @@ -0,0 +1,57 @@ +From 2d9f7c1e0acc128fdd6b08ae054d7bf70102789c Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 12 Apr 2024 21:03:15 +0300 +Subject: net: stmmac: Fix max-speed being ignored on queue re-init + +From: Serge Semin <fancer.lancer@gmail.com> + +[ Upstream commit 59c3d6ca6cbded6c6599e975b42a9d6a27fcbaf2 ] + +It's possible to have the maximum link speed being artificially limited on +the platform-specific basis. It's done either by setting up the +plat_stmmacenet_data::max_speed field or by specifying the "max-speed" +DT-property. In such cases it's required that any specific +MAC-capabilities re-initializations would take the limit into account. In +particular the link speed capabilities may change during the number of +active Tx/Rx queues re-initialization. But the currently implemented +procedure doesn't take the speed limit into account. + +Fix that by calling phylink_limit_mac_speed() in the +stmmac_reinit_queues() method if the speed limitation was required in the +same way as it's done in the stmmac_phy_setup() function. + +Fixes: 95201f36f395 ("net: stmmac: update MAC capabilities when tx queues are updated") +Signed-off-by: Serge Semin <fancer.lancer@gmail.com> +Reviewed-by: Romain Gantois <romain.gantois@bootlin.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index cbb00ca23a7c3..2a48277ed614f 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -7272,6 +7272,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) + { + struct stmmac_priv *priv = netdev_priv(dev); + int ret = 0, i; ++ int max_speed; + + if (netif_running(dev)) + stmmac_release(dev); +@@ -7287,6 +7288,10 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) + + stmmac_mac_phylink_get_caps(priv); + ++ max_speed = priv->plat->max_speed; ++ if (max_speed) ++ phylink_limit_mac_speed(&priv->phylink_config, max_speed); ++ + stmmac_napi_add(dev); + + if (netif_running(dev)) +-- +2.43.0 + diff --git a/queue-6.8/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch b/queue-6.8/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch new file mode 100644 index 0000000000..5126c60341 --- /dev/null +++ b/queue-6.8/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch @@ -0,0 +1,220 @@ +From a60bb6e1ec53837b71e5d04f70cf0e40b396de5c Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 11:24:59 +0200 +Subject: netfilter: br_netfilter: skip conntrack input hook for promisc + packets + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 751de2012eafa4d46d8081056761fa0e9cc8a178 ] + +For historical reasons, when bridge device is in promisc mode, packets +that are directed to the taps follow bridge input hook path. This patch +adds a workaround to reset conntrack for these packets. + +Jianbo Liu reports warning splats in their test infrastructure where +cloned packets reach the br_netfilter input hook to confirm the +conntrack object. + +Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has +reached the input hook because it is passed up to the bridge device to +reach the taps. + +[ 57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core +[ 57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19 +[ 57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[ 57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1 +[ 57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202 +[ 57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000 +[ 57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000 +[ 57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003 +[ 57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000 +[ 57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800 +[ 57.582313] FS: 0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000 +[ 57.583040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0 +[ 57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2: +0000000000000000 +[ 57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: +0000000000000400 +[ 57.585440] Call Trace: +[ 57.585721] <IRQ> +[ 57.585976] ? __warn+0x7d/0x130 +[ 57.586323] ? br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.586811] ? report_bug+0xf1/0x1c0 +[ 57.587177] ? handle_bug+0x3f/0x70 +[ 57.587539] ? exc_invalid_op+0x13/0x60 +[ 57.587929] ? asm_exc_invalid_op+0x16/0x20 +[ 57.588336] ? br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.588825] nf_hook_slow+0x3d/0xd0 +[ 57.589188] ? br_handle_vlan+0x4b/0x110 +[ 57.589579] br_pass_frame_up+0xfc/0x150 +[ 57.589970] ? br_port_flags_change+0x40/0x40 +[ 57.590396] br_handle_frame_finish+0x346/0x5e0 +[ 57.590837] ? ipt_do_table+0x32e/0x430 +[ 57.591221] ? br_handle_local_finish+0x20/0x20 +[ 57.591656] br_nf_hook_thresh+0x4b/0xf0 [br_netfilter] +[ 57.592286] ? br_handle_local_finish+0x20/0x20 +[ 57.592802] br_nf_pre_routing_finish+0x178/0x480 [br_netfilter] +[ 57.593348] ? br_handle_local_finish+0x20/0x20 +[ 57.593782] ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat] +[ 57.594279] br_nf_pre_routing+0x24c/0x550 [br_netfilter] +[ 57.594780] ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter] +[ 57.595280] br_handle_frame+0x1f3/0x3d0 +[ 57.595676] ? br_handle_local_finish+0x20/0x20 +[ 57.596118] ? br_handle_frame_finish+0x5e0/0x5e0 +[ 57.596566] __netif_receive_skb_core+0x25b/0xfc0 +[ 57.597017] ? __napi_build_skb+0x37/0x40 +[ 57.597418] __netif_receive_skb_list_core+0xfb/0x220 + +Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") +Reported-by: Jianbo Liu <jianbol@nvidia.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/bridge/br_input.c | 15 +++++++++++---- + net/bridge/br_netfilter_hooks.c | 6 ++++++ + net/bridge/br_private.h | 1 + + net/bridge/netfilter/nf_conntrack_bridge.c | 14 ++++++++++---- + 4 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c +index f21097e734827..ceaa5a89b947f 100644 +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) + return netif_receive_skb(skb); + } + +-static int br_pass_frame_up(struct sk_buff *skb) ++static int br_pass_frame_up(struct sk_buff *skb, bool promisc) + { + struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); +@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) + br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), + BR_MCAST_DIR_TX); + ++ BR_INPUT_SKB_CB(skb)->promisc = promisc; ++ + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(indev), NULL, skb, indev, NULL, + br_netif_receive_skb); +@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + struct net_bridge_mcast *brmctx; + struct net_bridge_vlan *vlan; + struct net_bridge *br; ++ bool promisc; + u16 vid = 0; + u8 state; + +@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); + +- local_rcv = !!(br->dev->flags & IFF_PROMISC); ++ promisc = !!(br->dev->flags & IFF_PROMISC); ++ local_rcv = promisc; ++ + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { + /* by definition the broadcast is also a multicast address */ + if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { +@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + unsigned long now = jiffies; + + if (test_bit(BR_FDB_LOCAL, &dst->flags)) +- return br_pass_frame_up(skb); ++ return br_pass_frame_up(skb, false); + + if (now != dst->used) + dst->used = now; +@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + } + + if (local_rcv) +- return br_pass_frame_up(skb); ++ return br_pass_frame_up(skb, promisc); + + out: + return 0; +@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) + goto forward; + } + ++ BR_INPUT_SKB_CB(skb)->promisc = false; ++ + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index 35e10c5a766d5..22e35623c148a 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) + { ++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc; + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + ++ if (promisc) { ++ nf_reset_ct(skb); ++ return NF_ACCEPT; ++ } ++ + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index 86ea5e6689b5c..d4bedc87b1d8f 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -589,6 +589,7 @@ struct br_input_skb_cb { + #endif + u8 proxyarp_replied:1; + u8 src_port_isolated:1; ++ u8 promisc:1; + #ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_filtered:1; + #endif +diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c +index 6f877e31709ba..c3c51b9a68265 100644 +--- a/net/bridge/netfilter/nf_conntrack_bridge.c ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c +@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, + static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) + { +- enum ip_conntrack_info ctinfo; ++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc; ++ struct nf_conntrack *nfct = skb_nfct(skb); + struct nf_conn *ct; + +- if (skb->pkt_type == PACKET_HOST) ++ if (promisc) { ++ nf_reset_ct(skb); ++ return NF_ACCEPT; ++ } ++ ++ if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ +- ct = nf_ct_get(skb, &ctinfo); +- if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) ++ ct = container_of(nfct, struct nf_conn, ct_general); ++ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ +-- +2.43.0 + diff --git a/queue-6.8/netfilter-flowtable-incorrect-pppoe-tuple.patch b/queue-6.8/netfilter-flowtable-incorrect-pppoe-tuple.patch new file mode 100644 index 0000000000..321ff2a12d --- /dev/null +++ b/queue-6.8/netfilter-flowtable-incorrect-pppoe-tuple.patch @@ -0,0 +1,37 @@ +From d9fc3855a93c62f7ddd62e21ef73af79e862184f Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 00:09:00 +0200 +Subject: netfilter: flowtable: incorrect pppoe tuple + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 6db5dc7b351b9569940cd1cf445e237c42cd6d27 ] + +pppoe traffic reaching ingress path does not match the flowtable entry +because the pppoe header is expected to be at the network header offset. +This bug causes a mismatch in the flow table lookup, so pppoe packets +enter the classical forwarding path. + +Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_flow_table_ip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index 9e9e105052dae..5383bed3d3e00 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, + tuple->encap[i].proto = skb->protocol; + break; + case htons(ETH_P_PPP_SES): +- phdr = (struct pppoe_hdr *)skb_mac_header(skb); ++ phdr = (struct pppoe_hdr *)skb_network_header(skb); + tuple->encap[i].id = ntohs(phdr->sid); + tuple->encap[i].proto = skb->protocol; + break; +-- +2.43.0 + diff --git a/queue-6.8/netfilter-flowtable-validate-pppoe-header.patch b/queue-6.8/netfilter-flowtable-validate-pppoe-header.patch new file mode 100644 index 0000000000..8e7031075c --- /dev/null +++ b/queue-6.8/netfilter-flowtable-validate-pppoe-header.patch @@ -0,0 +1,106 @@ +From 7cf6e02ab2eee0ca1f9fb38861fb024a73fb6668 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 13:47:33 +0200 +Subject: netfilter: flowtable: validate pppoe header + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 87b3593bed1868b2d9fe096c01bcdf0ea86cbebf ] + +Ensure there is sufficient room to access the protocol field of the +PPPoe header. Validate it once before the flowtable lookup, then use a +helper function to access protocol field. + +Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com +Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/netfilter/nf_flow_table.h | 12 +++++++++++- + net/netfilter/nf_flow_table_inet.c | 3 ++- + net/netfilter/nf_flow_table_ip.c | 8 +++++--- + 3 files changed, 18 insertions(+), 5 deletions(-) + +diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h +index a763dd327c6ea..9abb7ee40d72f 100644 +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, + int nf_flow_table_offload_init(void); + void nf_flow_table_offload_exit(void); + +-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) ++static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) + { + __be16 proto; + +@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) + return 0; + } + ++static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) ++{ ++ if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) ++ return false; ++ ++ *inner_proto = __nf_flow_pppoe_proto(skb); ++ ++ return true; ++} ++ + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) + #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) + #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ +diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c +index 9505f9d188ff2..6eef15648b7b0 100644 +--- a/net/netfilter/nf_flow_table_inet.c ++++ b/net/netfilter/nf_flow_table_inet.c +@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, + proto = veth->h_vlan_encapsulated_proto; + break; + case htons(ETH_P_PPP_SES): +- proto = nf_flow_pppoe_proto(skb); ++ if (!nf_flow_pppoe_proto(skb, &proto)) ++ return NF_ACCEPT; + break; + default: + proto = skb->protocol; +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index e45fade764096..9e9e105052dae 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, + return NF_STOLEN; + } + +-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, ++static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, + u32 *offset) + { + struct vlan_ethhdr *veth; ++ __be16 inner_proto; + + switch (skb->protocol) { + case htons(ETH_P_8021Q): +@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, + } + break; + case htons(ETH_P_PPP_SES): +- if (nf_flow_pppoe_proto(skb) == proto) { ++ if (nf_flow_pppoe_proto(skb, &inner_proto) && ++ inner_proto == proto) { + *offset += PPPOE_SES_HLEN; + return true; + } +@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, + skb_reset_network_header(skb); + break; + case htons(ETH_P_PPP_SES): +- skb->protocol = nf_flow_pppoe_proto(skb); ++ skb->protocol = __nf_flow_pppoe_proto(skb); + skb_pull(skb, PPPOE_SES_HLEN); + skb_reset_network_header(skb); + break; +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nf_tables-fix-memleak-in-map-from-abort-pa.patch b/queue-6.8/netfilter-nf_tables-fix-memleak-in-map-from-abort-pa.patch new file mode 100644 index 0000000000..a11bb11a1f --- /dev/null +++ b/queue-6.8/netfilter-nf_tables-fix-memleak-in-map-from-abort-pa.patch @@ -0,0 +1,93 @@ +From 8d8ff6be090d428caabf443af1279cee80ffa3ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 17 Apr 2024 17:43:21 +0200 +Subject: netfilter: nf_tables: fix memleak in map from abort path + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 86a1471d7cde792941109b93b558b5dc078b9ee9 ] + +The delete set command does not rely on the transaction object for +element removal, therefore, a combination of delete element + delete set +from the abort path could result in restoring twice the refcount of the +mapping. + +Check for inactive element in the next generation for the delete element +command in the abort path, skip restoring state if next generation bit +has been already cleared. This is similar to the activate logic using +the set walk iterator. + +[ 6170.286929] ------------[ cut here ]------------ +[ 6170.286939] WARNING: CPU: 6 PID: 790302 at net/netfilter/nf_tables_api.c:2086 nf_tables_chain_destroy+0x1f7/0x220 [nf_tables] +[ 6170.287071] Modules linked in: [...] +[ 6170.287633] CPU: 6 PID: 790302 Comm: kworker/6:2 Not tainted 6.9.0-rc3+ #365 +[ 6170.287768] RIP: 0010:nf_tables_chain_destroy+0x1f7/0x220 [nf_tables] +[ 6170.287886] Code: df 48 8d 7d 58 e8 69 2e 3b df 48 8b 7d 58 e8 80 1b 37 df 48 8d 7d 68 e8 57 2e 3b df 48 8b 7d 68 e8 6e 1b 37 df 48 89 ef eb c4 <0f> 0b 48 83 c4 08 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 0f +[ 6170.287895] RSP: 0018:ffff888134b8fd08 EFLAGS: 00010202 +[ 6170.287904] RAX: 0000000000000001 RBX: ffff888125bffb28 RCX: dffffc0000000000 +[ 6170.287912] RDX: 0000000000000003 RSI: ffffffffa20298ab RDI: ffff88811ebe4750 +[ 6170.287919] RBP: ffff88811ebe4700 R08: ffff88838e812650 R09: fffffbfff0623a55 +[ 6170.287926] R10: ffffffff8311d2af R11: 0000000000000001 R12: ffff888125bffb10 +[ 6170.287933] R13: ffff888125bffb10 R14: dead000000000122 R15: dead000000000100 +[ 6170.287940] FS: 0000000000000000(0000) GS:ffff888390b00000(0000) knlGS:0000000000000000 +[ 6170.287948] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 6170.287955] CR2: 00007fd31fc00710 CR3: 0000000133f60004 CR4: 00000000001706f0 +[ 6170.287962] Call Trace: +[ 6170.287967] <TASK> +[ 6170.287973] ? __warn+0x9f/0x1a0 +[ 6170.287986] ? nf_tables_chain_destroy+0x1f7/0x220 [nf_tables] +[ 6170.288092] ? report_bug+0x1b1/0x1e0 +[ 6170.287986] ? nf_tables_chain_destroy+0x1f7/0x220 [nf_tables] +[ 6170.288092] ? report_bug+0x1b1/0x1e0 +[ 6170.288104] ? handle_bug+0x3c/0x70 +[ 6170.288112] ? exc_invalid_op+0x17/0x40 +[ 6170.288120] ? asm_exc_invalid_op+0x1a/0x20 +[ 6170.288132] ? nf_tables_chain_destroy+0x2b/0x220 [nf_tables] +[ 6170.288243] ? nf_tables_chain_destroy+0x1f7/0x220 [nf_tables] +[ 6170.288366] ? nf_tables_chain_destroy+0x2b/0x220 [nf_tables] +[ 6170.288483] nf_tables_trans_destroy_work+0x588/0x590 [nf_tables] + +Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_tables_api.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 32a73f3670664..0e697e53a7902 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7219,6 +7219,16 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) + } + } + ++static int nft_setelem_active_next(const struct net *net, ++ const struct nft_set *set, ++ struct nft_elem_priv *elem_priv) ++{ ++ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); ++ u8 genmask = nft_genmask_next(net); ++ ++ return nft_set_elem_active(ext, genmask); ++} ++ + static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) +@@ -10636,8 +10646,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + case NFT_MSG_DESTROYSETELEM: + te = (struct nft_trans_elem *)trans->data; + +- nft_setelem_data_activate(net, te->set, te->elem_priv); +- nft_setelem_activate(net, te->set, te->elem_priv); ++ if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { ++ nft_setelem_data_activate(net, te->set, te->elem_priv); ++ nft_setelem_activate(net, te->set, te->elem_priv); ++ } + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) + te->set->ndeact--; + +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch b/queue-6.8/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch new file mode 100644 index 0000000000..843161a71a --- /dev/null +++ b/queue-6.8/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch @@ -0,0 +1,58 @@ +From 622d63a71649beab5f710d04894d2d2e69ef25a5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sun, 7 Apr 2024 14:56:04 +0800 +Subject: netfilter: nf_tables: Fix potential data-race in + __nft_expr_type_get() + +From: Ziyang Xuan <william.xuanziyang@huawei.com> + +[ Upstream commit f969eb84ce482331a991079ab7a5c4dc3b7f89bf ] + +nft_unregister_expr() can concurrent with __nft_expr_type_get(), +and there is not any protection when iterate over nf_tables_expressions +list in __nft_expr_type_get(). Therefore, there is potential data-race +of nf_tables_expressions list entry. + +Use list_for_each_entry_rcu() to iterate over nf_tables_expressions +list in __nft_expr_type_get(), and use rcu_read_lock() in the caller +nft_expr_type_get() to protect the entire type query process. + +Fixes: ef1f7df9170d ("netfilter: nf_tables: expression ops overloading") +Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_tables_api.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 21581bae700c4..a1cf875e5d35f 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3048,7 +3048,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, + { + const struct nft_expr_type *type, *candidate = NULL; + +- list_for_each_entry(type, &nf_tables_expressions, list) { ++ list_for_each_entry_rcu(type, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, type->name)) { + if (!type->family && !candidate) + candidate = type; +@@ -3080,9 +3080,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, + if (nla == NULL) + return ERR_PTR(-EINVAL); + ++ rcu_read_lock(); + type = __nft_expr_type_get(family, nla); +- if (type != NULL && try_module_get(type->owner)) ++ if (type != NULL && try_module_get(type->owner)) { ++ rcu_read_unlock(); + return type; ++ } ++ rcu_read_unlock(); + + lockdep_nfnl_nft_mutex_not_held(); + #ifdef CONFIG_MODULES +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-25621 b/queue-6.8/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-25621 new file mode 100644 index 0000000000..cb0b3ff7a8 --- /dev/null +++ b/queue-6.8/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-25621 @@ -0,0 +1,57 @@ +From 7950774b77f95769740b6f7d6b811c143b6cbdeb Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sun, 7 Apr 2024 14:56:05 +0800 +Subject: netfilter: nf_tables: Fix potential data-race in __nft_obj_type_get() + +From: Ziyang Xuan <william.xuanziyang@huawei.com> + +[ Upstream commit d78d867dcea69c328db30df665be5be7d0148484 ] + +nft_unregister_obj() can concurrent with __nft_obj_type_get(), +and there is not any protection when iterate over nf_tables_objects +list in __nft_obj_type_get(). Therefore, there is potential data-race +of nf_tables_objects list entry. + +Use list_for_each_entry_rcu() to iterate over nf_tables_objects +list in __nft_obj_type_get(), and use rcu_read_lock() in the caller +nft_obj_type_get() to protect the entire type query process. + +Fixes: e50092404c1b ("netfilter: nf_tables: add stateful objects") +Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_tables_api.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index a1cf875e5d35f..ad9fb019684b3 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7607,7 +7607,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) + { + const struct nft_object_type *type; + +- list_for_each_entry(type, &nf_tables_objects, list) { ++ list_for_each_entry_rcu(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; +@@ -7623,9 +7623,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) + { + const struct nft_object_type *type; + ++ rcu_read_lock(); + type = __nft_obj_type_get(objtype, family); +- if (type != NULL && try_module_get(type->owner)) ++ if (type != NULL && try_module_get(type->owner)) { ++ rcu_read_unlock(); + return type; ++ } ++ rcu_read_unlock(); + + lockdep_nfnl_nft_mutex_not_held(); + #ifdef CONFIG_MODULES +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nf_tables-missing-iterator-type-in-lookup-.patch b/queue-6.8/netfilter-nf_tables-missing-iterator-type-in-lookup-.patch new file mode 100644 index 0000000000..d651ab4ab9 --- /dev/null +++ b/queue-6.8/netfilter-nf_tables-missing-iterator-type-in-lookup-.patch @@ -0,0 +1,49 @@ +From eda3ba19371abd3591e25adebf7f16d478e57dcb Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 17 Apr 2024 17:43:01 +0200 +Subject: netfilter: nf_tables: missing iterator type in lookup walk + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit efefd4f00c967d00ad7abe092554ffbb70c1a793 ] + +Add missing decorator type to lookup expression and tighten WARN_ON_ONCE +check in pipapo to spot earlier that this is unset. + +Fixes: 29b359cf6d95 ("netfilter: nft_set_pipapo: walk over current view on netlink dump") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nft_lookup.c | 1 + + net/netfilter/nft_set_pipapo.c | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c +index 870e5b113d13e..87c18eddb0689 100644 +--- a/net/netfilter/nft_lookup.c ++++ b/net/netfilter/nft_lookup.c +@@ -216,6 +216,7 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, + return 0; + + iter.genmask = nft_genmask_next(ctx->net); ++ iter.type = NFT_ITER_UPDATE; + iter.skip = 0; + iter.count = 0; + iter.err = 0; +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 979b5e80c400b..c91efad49c6d5 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -2048,7 +2048,8 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, + const struct nft_pipapo_field *f; + int i, r; + +- WARN_ON_ONCE(iter->type == NFT_ITER_UNSPEC); ++ WARN_ON_ONCE(iter->type != NFT_ITER_READ && ++ iter->type != NFT_ITER_UPDATE); + + rcu_read_lock(); + if (iter->type == NFT_ITER_READ) +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nf_tables-restore-set-elements-when-delete.patch b/queue-6.8/netfilter-nf_tables-restore-set-elements-when-delete.patch new file mode 100644 index 0000000000..0edd5c64d1 --- /dev/null +++ b/queue-6.8/netfilter-nf_tables-restore-set-elements-when-delete.patch @@ -0,0 +1,317 @@ +From 412c2bc4e0219c9d801fb939fdb22fd9f65a136c Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 17 Apr 2024 17:43:11 +0200 +Subject: netfilter: nf_tables: restore set elements when delete set fails + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit e79b47a8615d42c68aaeb68971593333667382ed ] + +From abort path, nft_mapelem_activate() needs to restore refcounters to +the original state. Currently, it uses the set->ops->walk() to iterate +over these set elements. The existing set iterator skips inactive +elements in the next generation, this does not work from the abort path +to restore the original state since it has to skip active elements +instead (not inactive ones). + +This patch moves the check for inactive elements to the set iterator +callback, then it reverses the logic for the .activate case which +needs to skip active elements. + +Toggle next generation bit for elements when delete set command is +invoked and call nft_clear() from .activate (abort) path to restore the +next generation bit. + +The splat below shows an object in mappings memleak: + +[43929.457523] ------------[ cut here ]------------ +[43929.457532] WARNING: CPU: 0 PID: 1139 at include/net/netfilter/nf_tables.h:1237 nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables] +[...] +[43929.458014] RIP: 0010:nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables] +[43929.458076] Code: 83 f8 01 77 ab 49 8d 7c 24 08 e8 37 5e d0 de 49 8b 6c 24 08 48 8d 7d 50 e8 e9 5c d0 de 8b 45 50 8d 50 ff 89 55 50 85 c0 75 86 <0f> 0b eb 82 0f 0b eb b3 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 +[43929.458081] RSP: 0018:ffff888140f9f4b0 EFLAGS: 00010246 +[43929.458086] RAX: 0000000000000000 RBX: ffff8881434f5288 RCX: dffffc0000000000 +[43929.458090] RDX: 00000000ffffffff RSI: ffffffffa26d28a7 RDI: ffff88810ecc9550 +[43929.458093] RBP: ffff88810ecc9500 R08: 0000000000000001 R09: ffffed10281f3e8f +[43929.458096] R10: 0000000000000003 R11: ffff0000ffff0000 R12: ffff8881434f52a0 +[43929.458100] R13: ffff888140f9f5f4 R14: ffff888151c7a800 R15: 0000000000000002 +[43929.458103] FS: 00007f0c687c4740(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 +[43929.458107] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[43929.458111] CR2: 00007f58dbe5b008 CR3: 0000000123602005 CR4: 00000000001706f0 +[43929.458114] Call Trace: +[43929.458118] <TASK> +[43929.458121] ? __warn+0x9f/0x1a0 +[43929.458127] ? nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables] +[43929.458188] ? report_bug+0x1b1/0x1e0 +[43929.458196] ? handle_bug+0x3c/0x70 +[43929.458200] ? exc_invalid_op+0x17/0x40 +[43929.458211] ? nft_setelem_data_deactivate+0xd7/0xf0 [nf_tables] +[43929.458271] ? nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables] +[43929.458332] nft_mapelem_deactivate+0x24/0x30 [nf_tables] +[43929.458392] nft_rhash_walk+0xdd/0x180 [nf_tables] +[43929.458453] ? __pfx_nft_rhash_walk+0x10/0x10 [nf_tables] +[43929.458512] ? rb_insert_color+0x2e/0x280 +[43929.458520] nft_map_deactivate+0xdc/0x1e0 [nf_tables] +[43929.458582] ? __pfx_nft_map_deactivate+0x10/0x10 [nf_tables] +[43929.458642] ? __pfx_nft_mapelem_deactivate+0x10/0x10 [nf_tables] +[43929.458701] ? __rcu_read_unlock+0x46/0x70 +[43929.458709] nft_delset+0xff/0x110 [nf_tables] +[43929.458769] nft_flush_table+0x16f/0x460 [nf_tables] +[43929.458830] nf_tables_deltable+0x501/0x580 [nf_tables] + +Fixes: 628bd3e49cba ("netfilter: nf_tables: drop map element references from preparation phase") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_tables_api.c | 44 ++++++++++++++++++++++++++++++---- + net/netfilter/nft_set_bitmap.c | 4 +--- + net/netfilter/nft_set_hash.c | 8 ++----- + net/netfilter/nft_set_pipapo.c | 5 +--- + net/netfilter/nft_set_rbtree.c | 4 +--- + 5 files changed, 45 insertions(+), 20 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 8a3fa7f5b456d..32a73f3670664 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -594,6 +594,12 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + const struct nft_set_iter *iter, + struct nft_elem_priv *elem_priv) + { ++ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); ++ ++ if (!nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ ++ nft_set_elem_change_active(ctx->net, set, ext); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); + + return 0; +@@ -617,6 +623,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, + if (!nft_set_elem_active(ext, genmask)) + continue; + ++ nft_set_elem_change_active(ctx->net, set, ext); + nft_setelem_data_deactivate(ctx->net, set, catchall->elem); + break; + } +@@ -3868,6 +3875,9 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, + const struct nft_data *data; + int err; + ++ if (!nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) + return 0; +@@ -3891,17 +3901,20 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, + + int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) + { +- u8 genmask = nft_genmask_next(ctx->net); ++ struct nft_set_iter dummy_iter = { ++ .genmask = nft_genmask_next(ctx->net), ++ }; + struct nft_set_elem_catchall *catchall; ++ + struct nft_set_ext *ext; + int ret = 0; + + list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); +- if (!nft_set_elem_active(ext, genmask)) ++ if (!nft_set_elem_active(ext, dummy_iter.genmask)) + continue; + +- ret = nft_setelem_validate(ctx, set, NULL, catchall->elem); ++ ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem); + if (ret < 0) + return ret; + } +@@ -5398,6 +5411,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + const struct nft_set_iter *iter, + struct nft_elem_priv *elem_priv) + { ++ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); ++ ++ if (!nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ + return nft_setelem_data_validate(ctx, set, elem_priv); + } + +@@ -5490,6 +5508,13 @@ static int nft_mapelem_activate(const struct nft_ctx *ctx, + const struct nft_set_iter *iter, + struct nft_elem_priv *elem_priv) + { ++ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); ++ ++ /* called from abort path, reverse check to undo changes. */ ++ if (nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ ++ nft_clear(ctx->net, ext); + nft_setelem_data_activate(ctx->net, set, elem_priv); + + return 0; +@@ -5507,6 +5532,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, + if (!nft_set_elem_active(ext, genmask)) + continue; + ++ nft_clear(ctx->net, ext); + nft_setelem_data_activate(ctx->net, set, catchall->elem); + break; + } +@@ -5781,6 +5807,9 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + struct nft_set_dump_args *args; + ++ if (!nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ + if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) + return 0; + +@@ -6631,7 +6660,7 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set, + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + + if (nft_setelem_is_catchall(set, elem_priv)) { +- nft_set_elem_change_active(net, set, ext); ++ nft_clear(net, ext); + } else { + set->ops->activate(net, set, elem_priv); + } +@@ -7313,8 +7342,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, + const struct nft_set_iter *iter, + struct nft_elem_priv *elem_priv) + { ++ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + struct nft_trans *trans; + ++ if (!nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ + trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, + sizeof(struct nft_trans_elem), GFP_ATOMIC); + if (!trans) +@@ -10792,6 +10825,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, + { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + ++ if (!nft_set_elem_active(ext, iter->genmask)) ++ return 0; ++ + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) + return 0; +diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c +index 32df7a16835da..1caa04619dc6d 100644 +--- a/net/netfilter/nft_set_bitmap.c ++++ b/net/netfilter/nft_set_bitmap.c +@@ -172,7 +172,7 @@ static void nft_bitmap_activate(const struct net *net, + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); + /* Enter 11 state. */ + priv->bitmap[idx] |= (genmask << off); +- nft_set_elem_change_active(net, set, &be->ext); ++ nft_clear(net, &be->ext); + } + + static void nft_bitmap_flush(const struct net *net, +@@ -222,8 +222,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, + list_for_each_entry_rcu(be, &priv->list, head) { + if (iter->count < iter->skip) + goto cont; +- if (!nft_set_elem_active(&be->ext, iter->genmask)) +- goto cont; + + iter->err = iter->fn(ctx, set, iter, &be->priv); + +diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c +index 6968a3b342367..daa56dda737ae 100644 +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -199,7 +199,7 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, + { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); + +- nft_set_elem_change_active(net, set, &he->ext); ++ nft_clear(net, &he->ext); + } + + static void nft_rhash_flush(const struct net *net, +@@ -286,8 +286,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, + + if (iter->count < iter->skip) + goto cont; +- if (!nft_set_elem_active(&he->ext, iter->genmask)) +- goto cont; + + iter->err = iter->fn(ctx, set, iter, &he->priv); + if (iter->err < 0) +@@ -599,7 +597,7 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, + { + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); + +- nft_set_elem_change_active(net, set, &he->ext); ++ nft_clear(net, &he->ext); + } + + static void nft_hash_flush(const struct net *net, +@@ -652,8 +650,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, + hlist_for_each_entry_rcu(he, &priv->table[i], node) { + if (iter->count < iter->skip) + goto cont; +- if (!nft_set_elem_active(&he->ext, iter->genmask)) +- goto cont; + + iter->err = iter->fn(ctx, set, iter, &he->priv); + if (iter->err < 0) +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index c91efad49c6d5..b42a34087e807 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -1773,7 +1773,7 @@ static void nft_pipapo_activate(const struct net *net, + { + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); + +- nft_set_elem_change_active(net, set, &e->ext); ++ nft_clear(net, &e->ext); + } + + /** +@@ -2074,9 +2074,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, + + e = f->mt[r].e; + +- if (!nft_set_elem_active(&e->ext, iter->genmask)) +- goto cont; +- + iter->err = iter->fn(ctx, set, iter, &e->priv); + if (iter->err < 0) + goto out; +diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c +index 9944fe479e536..b7ea21327549b 100644 +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -532,7 +532,7 @@ static void nft_rbtree_activate(const struct net *net, + { + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); + +- nft_set_elem_change_active(net, set, &rbe->ext); ++ nft_clear(net, &rbe->ext); + } + + static void nft_rbtree_flush(const struct net *net, +@@ -600,8 +600,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, + + if (iter->count < iter->skip) + goto cont; +- if (!nft_set_elem_active(&rbe->ext, iter->genmask)) +- goto cont; + + iter->err = iter->fn(ctx, set, iter, &rbe->priv); + if (iter->err < 0) { +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nft_set_pipapo-constify-lookup-fn-args-whe.patch b/queue-6.8/netfilter-nft_set_pipapo-constify-lookup-fn-args-whe.patch new file mode 100644 index 0000000000..e6ce73c0df --- /dev/null +++ b/queue-6.8/netfilter-nft_set_pipapo-constify-lookup-fn-args-whe.patch @@ -0,0 +1,265 @@ +From 1599754294fd1f62e0a7613637dbacbefae8f9e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 13 Feb 2024 16:23:37 +0100 +Subject: netfilter: nft_set_pipapo: constify lookup fn args where possible + +From: Florian Westphal <fw@strlen.de> + +[ Upstream commit f04df573faf90bb828a2241b650598c02c074323 ] + +Those get called from packet path, content must not be modified. +No functional changes intended. + +Reviewed-by: Stefano Brivio <sbrivio@redhat.com> +Signed-off-by: Florian Westphal <fw@strlen.de> +Stable-dep-of: 29b359cf6d95 ("netfilter: nft_set_pipapo: walk over current view on netlink dump") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nft_set_pipapo.c | 18 +++++---- + net/netfilter/nft_set_pipapo.h | 6 +-- + net/netfilter/nft_set_pipapo_avx2.c | 59 +++++++++++++++++------------ + 3 files changed, 48 insertions(+), 35 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index b3b282de802de..7756d70af868c 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -360,7 +360,7 @@ + * Return: -1 on no match, bit position on 'match_only', 0 otherwise. + */ + int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, +- union nft_pipapo_map_bucket *mt, bool match_only) ++ const union nft_pipapo_map_bucket *mt, bool match_only) + { + unsigned long bitset; + int k, ret = -1; +@@ -412,9 +412,9 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + struct nft_pipapo_scratch *scratch; + unsigned long *res_map, *fill_map; + u8 genmask = nft_genmask_cur(net); ++ const struct nft_pipapo_match *m; ++ const struct nft_pipapo_field *f; + const u8 *rp = (const u8 *)key; +- struct nft_pipapo_match *m; +- struct nft_pipapo_field *f; + bool map_index; + int i; + +@@ -519,11 +519,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, + { + struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); + struct nft_pipapo *priv = nft_set_priv(set); +- struct nft_pipapo_match *m = priv->clone; + unsigned long *res_map, *fill_map = NULL; +- struct nft_pipapo_field *f; ++ const struct nft_pipapo_match *m; ++ const struct nft_pipapo_field *f; + int i; + ++ m = priv->clone; ++ + res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); + if (!res_map) { + ret = ERR_PTR(-ENOMEM); +@@ -1597,7 +1599,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) + + while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { + union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; +- struct nft_pipapo_field *f; ++ const struct nft_pipapo_field *f; + int i, start, rules_fx; + + start = first_rule; +@@ -2039,8 +2041,8 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, + { + struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); +- struct nft_pipapo_match *m; +- struct nft_pipapo_field *f; ++ const struct nft_pipapo_match *m; ++ const struct nft_pipapo_field *f; + int i, r; + + rcu_read_lock(); +diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h +index 3842c7341a9f4..42464e7c24ac0 100644 +--- a/net/netfilter/nft_set_pipapo.h ++++ b/net/netfilter/nft_set_pipapo.h +@@ -187,7 +187,7 @@ struct nft_pipapo_elem { + }; + + int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, +- union nft_pipapo_map_bucket *mt, bool match_only); ++ const union nft_pipapo_map_bucket *mt, bool match_only); + + /** + * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets +@@ -195,7 +195,7 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + * @dst: Area to store result + * @data: Input data selecting table buckets + */ +-static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, ++static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f, + unsigned long *dst, + const u8 *data) + { +@@ -223,7 +223,7 @@ static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, + * @dst: Area to store result + * @data: Input data selecting table buckets + */ +-static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f, ++static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f, + unsigned long *dst, + const u8 *data) + { +diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c +index a3a8ddca99189..d08407d589eac 100644 +--- a/net/netfilter/nft_set_pipapo_avx2.c ++++ b/net/netfilter/nft_set_pipapo_avx2.c +@@ -212,8 +212,9 @@ static int nft_pipapo_avx2_refill(int offset, unsigned long *map, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf }; +@@ -274,8 +275,9 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf }; +@@ -350,8 +352,9 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, +@@ -445,8 +448,9 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, +@@ -534,8 +538,9 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, +@@ -669,8 +674,9 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; +@@ -726,8 +732,9 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; +@@ -790,8 +797,9 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; +@@ -865,8 +873,9 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; +@@ -950,8 +959,9 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; +@@ -1042,8 +1052,9 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, + * word index to be checked next (i.e. first filled word). + */ + static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, +- struct nft_pipapo_field *f, int offset, +- const u8 *pkt, bool first, bool last) ++ const struct nft_pipapo_field *f, ++ int offset, const u8 *pkt, ++ bool first, bool last) + { + unsigned long bsize = f->bsize; + int i, ret = -1, b; +@@ -1119,9 +1130,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; + u8 genmask = nft_genmask_cur(net); ++ const struct nft_pipapo_match *m; ++ const struct nft_pipapo_field *f; + const u8 *rp = (const u8 *)key; +- struct nft_pipapo_match *m; +- struct nft_pipapo_field *f; + unsigned long *res, *fill; + bool map_index; + int i, ret = 0; +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nft_set_pipapo-do-not-free-live-element.patch b/queue-6.8/netfilter-nft_set_pipapo-do-not-free-live-element.patch new file mode 100644 index 0000000000..66e1471a18 --- /dev/null +++ b/queue-6.8/netfilter-nft_set_pipapo-do-not-free-live-element.patch @@ -0,0 +1,105 @@ +From ea61cba27d0bdf2f8375bf51000723f165a38684 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 21:05:13 +0200 +Subject: netfilter: nft_set_pipapo: do not free live element + +From: Florian Westphal <fw@strlen.de> + +[ Upstream commit 3cfc9ec039af60dbd8965ae085b2c2ccdcfbe1cc ] + +Pablo reports a crash with large batches of elements with a +back-to-back add/remove pattern. Quoting Pablo: + + add_elem("00000000") timeout 100 ms + ... + add_elem("0000000X") timeout 100 ms + del_elem("0000000X") <---------------- delete one that was just added + ... + add_elem("00005000") timeout 100 ms + + 1) nft_pipapo_remove() removes element 0000000X + Then, KASAN shows a splat. + +Looking at the remove function there is a chance that we will drop a +rule that maps to a non-deactivated element. + +Removal happens in two steps, first we do a lookup for key k and return the +to-be-removed element and mark it as inactive in the next generation. +Then, in a second step, the element gets removed from the set/map. + +The _remove function does not work correctly if we have more than one +element that share the same key. + +This can happen if we insert an element into a set when the set already +holds an element with same key, but the element mapping to the existing +key has timed out or is not active in the next generation. + +In such case its possible that removal will unmap the wrong element. +If this happens, we will leak the non-deactivated element, it becomes +unreachable. + +The element that got deactivated (and will be freed later) will +remain reachable in the set data structure, this can result in +a crash when such an element is retrieved during lookup (stale +pointer). + +Add a check that the fully matching key does in fact map to the element +that we have marked as inactive in the deactivation step. +If not, we need to continue searching. + +Add a bug/warn trap at the end of the function as well, the remove +function must not ever be called with an invisible/unreachable/non-existent +element. + +v2: avoid uneeded temporary variable (Stefano) + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Reported-by: Pablo Neira Ayuso <pablo@netfilter.org> +Reviewed-by: Stefano Brivio <sbrivio@redhat.com> +Signed-off-by: Florian Westphal <fw@strlen.de> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nft_set_pipapo.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 69b1ab6849e67..979b5e80c400b 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -2002,6 +2002,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, + rules_fx = rules_f0; + + nft_pipapo_for_each_field(f, i, m) { ++ bool last = i == m->field_count - 1; ++ + if (!pipapo_match_field(f, start, rules_fx, + match_start, match_end)) + break; +@@ -2014,16 +2016,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, + + match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); +- } + +- if (i == m->field_count) { +- priv->dirty = true; +- pipapo_drop(m, rulemap); +- return; ++ if (last && f->mt[rulemap[i].to].e == e) { ++ priv->dirty = true; ++ pipapo_drop(m, rulemap); ++ return; ++ } + } + + first_rule += rules_f0; + } ++ ++ WARN_ON_ONCE(1); /* elem_priv not found */ + } + + /** +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nft_set_pipapo-walk-over-current-view-on-n.patch b/queue-6.8/netfilter-nft_set_pipapo-walk-over-current-view-on-n.patch new file mode 100644 index 0000000000..754cc34e71 --- /dev/null +++ b/queue-6.8/netfilter-nft_set_pipapo-walk-over-current-view-on-n.patch @@ -0,0 +1,129 @@ +From 178921ebf7d489014377f2b8b57d9bd2f459cb5d Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 18:50:45 +0200 +Subject: netfilter: nft_set_pipapo: walk over current view on netlink dump + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 29b359cf6d95fd60730533f7f10464e95bd17c73 ] + +The generation mask can be updated while netlink dump is in progress. +The pipapo set backend walk iterator cannot rely on it to infer what +view of the datastructure is to be used. Add notation to specify if user +wants to read/update the set. + +Based on patch from Florian Westphal. + +Fixes: 2b84e215f874 ("netfilter: nft_set_pipapo: .walk does not deal with generations") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/netfilter/nf_tables.h | 14 ++++++++++++++ + net/netfilter/nf_tables_api.c | 6 ++++++ + net/netfilter/nft_set_pipapo.c | 5 +++-- + 3 files changed, 23 insertions(+), 2 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index 510244cc0f8f0..1cf9cb0f0a975 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) + return (void *)priv; + } + ++ ++/** ++ * enum nft_iter_type - nftables set iterator type ++ * ++ * @NFT_ITER_READ: read-only iteration over set elements ++ * @NFT_ITER_UPDATE: iteration under mutex to update set element state ++ */ ++enum nft_iter_type { ++ NFT_ITER_UNSPEC, ++ NFT_ITER_READ, ++ NFT_ITER_UPDATE, ++}; ++ + struct nft_set; + struct nft_set_iter { + u8 genmask; ++ enum nft_iter_type type:8; + unsigned int count; + unsigned int skip; + int err; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index ad9fb019684b3..8a3fa7f5b456d 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -626,6 +626,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) + { + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), ++ .type = NFT_ITER_UPDATE, + .fn = nft_mapelem_deactivate, + }; + +@@ -5441,6 +5442,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + } + + iter.genmask = nft_genmask_next(ctx->net); ++ iter.type = NFT_ITER_UPDATE; + iter.skip = 0; + iter.count = 0; + iter.err = 0; +@@ -5514,6 +5516,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) + { + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), ++ .type = NFT_ITER_UPDATE, + .fn = nft_mapelem_activate, + }; + +@@ -5888,6 +5891,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) + args.skb = skb; + args.reset = dump_ctx->reset; + args.iter.genmask = nft_genmask_cur(net); ++ args.iter.type = NFT_ITER_READ; + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; +@@ -7372,6 +7376,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) + { + struct nft_set_iter iter = { + .genmask = genmask, ++ .type = NFT_ITER_UPDATE, + .fn = nft_setelem_flush, + }; + +@@ -10871,6 +10876,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, + continue; + + iter.genmask = nft_genmask_next(ctx->net); ++ iter.type = NFT_ITER_UPDATE; + iter.skip = 0; + iter.count = 0; + iter.err = 0; +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 7756d70af868c..69b1ab6849e67 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -2040,13 +2040,14 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) + { + struct nft_pipapo *priv = nft_set_priv(set); +- struct net *net = read_pnet(&set->net); + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; + int i, r; + ++ WARN_ON_ONCE(iter->type == NFT_ITER_UNSPEC); ++ + rcu_read_lock(); +- if (iter->genmask == nft_genmask_cur(net)) ++ if (iter->type == NFT_ITER_READ) + m = rcu_dereference(priv->match); + else + m = priv->clone; +-- +2.43.0 + diff --git a/queue-6.8/octeontx2-pf-fix-flow_dis_is_fragment-implementation.patch b/queue-6.8/octeontx2-pf-fix-flow_dis_is_fragment-implementation.patch new file mode 100644 index 0000000000..60b7e3eaff --- /dev/null +++ b/queue-6.8/octeontx2-pf-fix-flow_dis_is_fragment-implementation.patch @@ -0,0 +1,62 @@ +From a118da75ca4b52fb82d48704dcf55c86f0dc7aa7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Fri, 12 Apr 2024 12:02:56 +0000 +Subject: octeontx2-pf: fix FLOW_DIS_IS_FRAGMENT implementation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Asbjørn Sloth Tønnesen <ast@fiberby.net> + +[ Upstream commit 75ce9506ee3dc66648a7d74ab3b0acfa364d6d43 ] + +Upon reviewing the flower control flags handling in +this driver, I notice that the key wasn't being used, +only the mask. + +Ie. `tc flower ... ip_flags nofrag` was hardware +offloaded as `... ip_flags frag`. + +Only compile tested, no access to HW. + +Fixes: c672e3727989 ("octeontx2-pf: Add support to filter packet based on IP fragment") +Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net> +Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +index 4fd44b6eecea6..60ee7ae2c4097 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +@@ -688,6 +688,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; ++ u32 val; + + flow_rule_match_control(rule, &match); + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { +@@ -696,12 +697,14 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, + } + + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { ++ val = match.key->flags & FLOW_DIS_IS_FRAGMENT; + if (ntohs(flow_spec->etype) == ETH_P_IP) { +- flow_spec->ip_flag = IPV4_FLAG_MORE; ++ flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0; + flow_mask->ip_flag = IPV4_FLAG_MORE; + req->features |= BIT_ULL(NPC_IPFRAG_IPV4); + } else if (ntohs(flow_spec->etype) == ETH_P_IPV6) { +- flow_spec->next_header = IPPROTO_FRAGMENT; ++ flow_spec->next_header = val ? ++ IPPROTO_FRAGMENT : 0; + flow_mask->next_header = 0xff; + req->features |= BIT_ULL(NPC_IPFRAG_IPV6); + } else { +-- +2.43.0 + diff --git a/queue-6.8/ravb-group-descriptor-types-used-in-rx-ring.patch b/queue-6.8/ravb-group-descriptor-types-used-in-rx-ring.patch new file mode 100644 index 0000000000..f5efb11952 --- /dev/null +++ b/queue-6.8/ravb-group-descriptor-types-used-in-rx-ring.patch @@ -0,0 +1,241 @@ +From fb17fd565be203e2aa62544a586a72430c457751 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Mon, 4 Mar 2024 12:08:53 +0100 +Subject: ravb: Group descriptor types used in Rx ring +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se> + +[ Upstream commit 4123c3fbf8632e5c553222bf1c10b3a3e0a8dc06 ] + +The Rx ring can either be made up of normal or extended descriptors, not +a mix of the two at the same time. Make this explicit by grouping the +two variables in a rx_ring union. + +The extension of the storage for more than one queue of normal +descriptors from a single to NUM_RX_QUEUE queues have no practical +effect. But aids in making the code readable as the code that uses it +already piggyback on other members of struct ravb_private that are +arrays of max length NUM_RX_QUEUE, e.g. rx_desc_dma. This will also make +further refactoring easier. + +While at it, rename the normal descriptor Rx ring to make it clear it's +not strictly related to the GbEthernet E-MAC IP found in RZ/G2L, normal +descriptors could be used on R-Car SoCs too. + +Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se> +Reviewed-by: Paul Barker <paul.barker.ct@bp.renesas.com> +Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru> +Signed-off-by: David S. Miller <davem@davemloft.net> +Stable-dep-of: def52db470df ("net: ravb: Count packets instead of descriptors in R-Car RX path") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/renesas/ravb.h | 6 ++- + drivers/net/ethernet/renesas/ravb_main.c | 57 ++++++++++++------------ + 2 files changed, 33 insertions(+), 30 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h +index e0f8276cffedd..fd59155a70e1f 100644 +--- a/drivers/net/ethernet/renesas/ravb.h ++++ b/drivers/net/ethernet/renesas/ravb.h +@@ -1060,8 +1060,10 @@ struct ravb_private { + struct ravb_desc *desc_bat; + dma_addr_t rx_desc_dma[NUM_RX_QUEUE]; + dma_addr_t tx_desc_dma[NUM_TX_QUEUE]; +- struct ravb_rx_desc *gbeth_rx_ring; +- struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE]; ++ union { ++ struct ravb_rx_desc *desc; ++ struct ravb_ex_rx_desc *ex_desc; ++ } rx_ring[NUM_RX_QUEUE]; + struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE]; + void *tx_align[NUM_TX_QUEUE]; + struct sk_buff *rx_1st_skb; +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c +index 1bdf0abb256cf..e97c98d5eb19c 100644 +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -250,11 +250,11 @@ static void ravb_rx_ring_free_gbeth(struct net_device *ndev, int q) + unsigned int ring_size; + unsigned int i; + +- if (!priv->gbeth_rx_ring) ++ if (!priv->rx_ring[q].desc) + return; + + for (i = 0; i < priv->num_rx_ring[q]; i++) { +- struct ravb_rx_desc *desc = &priv->gbeth_rx_ring[i]; ++ struct ravb_rx_desc *desc = &priv->rx_ring[q].desc[i]; + + if (!dma_mapping_error(ndev->dev.parent, + le32_to_cpu(desc->dptr))) +@@ -264,9 +264,9 @@ static void ravb_rx_ring_free_gbeth(struct net_device *ndev, int q) + DMA_FROM_DEVICE); + } + ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1); +- dma_free_coherent(ndev->dev.parent, ring_size, priv->gbeth_rx_ring, ++ dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q].desc, + priv->rx_desc_dma[q]); +- priv->gbeth_rx_ring = NULL; ++ priv->rx_ring[q].desc = NULL; + } + + static void ravb_rx_ring_free_rcar(struct net_device *ndev, int q) +@@ -275,11 +275,11 @@ static void ravb_rx_ring_free_rcar(struct net_device *ndev, int q) + unsigned int ring_size; + unsigned int i; + +- if (!priv->rx_ring[q]) ++ if (!priv->rx_ring[q].ex_desc) + return; + + for (i = 0; i < priv->num_rx_ring[q]; i++) { +- struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i]; ++ struct ravb_ex_rx_desc *desc = &priv->rx_ring[q].ex_desc[i]; + + if (!dma_mapping_error(ndev->dev.parent, + le32_to_cpu(desc->dptr))) +@@ -290,9 +290,9 @@ static void ravb_rx_ring_free_rcar(struct net_device *ndev, int q) + } + ring_size = sizeof(struct ravb_ex_rx_desc) * + (priv->num_rx_ring[q] + 1); +- dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q], ++ dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q].ex_desc, + priv->rx_desc_dma[q]); +- priv->rx_ring[q] = NULL; ++ priv->rx_ring[q].ex_desc = NULL; + } + + /* Free skb's and DMA buffers for Ethernet AVB */ +@@ -344,11 +344,11 @@ static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q) + unsigned int i; + + rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; +- memset(priv->gbeth_rx_ring, 0, rx_ring_size); ++ memset(priv->rx_ring[q].desc, 0, rx_ring_size); + /* Build RX ring buffer */ + for (i = 0; i < priv->num_rx_ring[q]; i++) { + /* RX descriptor */ +- rx_desc = &priv->gbeth_rx_ring[i]; ++ rx_desc = &priv->rx_ring[q].desc[i]; + rx_desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE); + dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, + GBETH_RX_BUFF_MAX, +@@ -361,7 +361,7 @@ static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q) + rx_desc->dptr = cpu_to_le32(dma_addr); + rx_desc->die_dt = DT_FEMPTY; + } +- rx_desc = &priv->gbeth_rx_ring[i]; ++ rx_desc = &priv->rx_ring[q].desc[i]; + rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]); + rx_desc->die_dt = DT_LINKFIX; /* type */ + } +@@ -374,11 +374,11 @@ static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q) + dma_addr_t dma_addr; + unsigned int i; + +- memset(priv->rx_ring[q], 0, rx_ring_size); ++ memset(priv->rx_ring[q].ex_desc, 0, rx_ring_size); + /* Build RX ring buffer */ + for (i = 0; i < priv->num_rx_ring[q]; i++) { + /* RX descriptor */ +- rx_desc = &priv->rx_ring[q][i]; ++ rx_desc = &priv->rx_ring[q].ex_desc[i]; + rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ); + dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, + RX_BUF_SZ, +@@ -391,7 +391,7 @@ static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q) + rx_desc->dptr = cpu_to_le32(dma_addr); + rx_desc->die_dt = DT_FEMPTY; + } +- rx_desc = &priv->rx_ring[q][i]; ++ rx_desc = &priv->rx_ring[q].ex_desc[i]; + rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]); + rx_desc->die_dt = DT_LINKFIX; /* type */ + } +@@ -446,10 +446,10 @@ static void *ravb_alloc_rx_desc_gbeth(struct net_device *ndev, int q) + + ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1); + +- priv->gbeth_rx_ring = dma_alloc_coherent(ndev->dev.parent, ring_size, +- &priv->rx_desc_dma[q], +- GFP_KERNEL); +- return priv->gbeth_rx_ring; ++ priv->rx_ring[q].desc = dma_alloc_coherent(ndev->dev.parent, ring_size, ++ &priv->rx_desc_dma[q], ++ GFP_KERNEL); ++ return priv->rx_ring[q].desc; + } + + static void *ravb_alloc_rx_desc_rcar(struct net_device *ndev, int q) +@@ -459,10 +459,11 @@ static void *ravb_alloc_rx_desc_rcar(struct net_device *ndev, int q) + + ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1); + +- priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size, +- &priv->rx_desc_dma[q], +- GFP_KERNEL); +- return priv->rx_ring[q]; ++ priv->rx_ring[q].ex_desc = dma_alloc_coherent(ndev->dev.parent, ++ ring_size, ++ &priv->rx_desc_dma[q], ++ GFP_KERNEL); ++ return priv->rx_ring[q].ex_desc; + } + + /* Init skb and descriptor buffer for Ethernet AVB */ +@@ -784,7 +785,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) + limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; + stats = &priv->stats[q]; + +- desc = &priv->gbeth_rx_ring[entry]; ++ desc = &priv->rx_ring[q].desc[entry]; + for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { + /* Descriptor type must be checked before all other reads */ + dma_rmb(); +@@ -851,13 +852,13 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) + } + + entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; +- desc = &priv->gbeth_rx_ring[entry]; ++ desc = &priv->rx_ring[q].desc[entry]; + } + + /* Refill the RX ring buffers. */ + for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { + entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; +- desc = &priv->gbeth_rx_ring[entry]; ++ desc = &priv->rx_ring[q].desc[entry]; + desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE); + + if (!priv->rx_skb[q][entry]) { +@@ -907,7 +908,7 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + + boguscnt = min(boguscnt, *quota); + limit = boguscnt; +- desc = &priv->rx_ring[q][entry]; ++ desc = &priv->rx_ring[q].ex_desc[entry]; + while (desc->die_dt != DT_FEMPTY) { + /* Descriptor type must be checked before all other reads */ + dma_rmb(); +@@ -967,13 +968,13 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) + } + + entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; +- desc = &priv->rx_ring[q][entry]; ++ desc = &priv->rx_ring[q].ex_desc[entry]; + } + + /* Refill the RX ring buffers. */ + for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { + entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; +- desc = &priv->rx_ring[q][entry]; ++ desc = &priv->rx_ring[q].ex_desc[entry]; + desc->ds_cc = cpu_to_le16(RX_BUF_SZ); + + if (!priv->rx_skb[q][entry]) { +-- +2.43.0 + diff --git a/queue-6.8/s390-ism-properly-fix-receive-message-buffer-allocat.patch b/queue-6.8/s390-ism-properly-fix-receive-message-buffer-allocat.patch new file mode 100644 index 0000000000..dd05a59a14 --- /dev/null +++ b/queue-6.8/s390-ism-properly-fix-receive-message-buffer-allocat.patch @@ -0,0 +1,102 @@ +From df972fb3bc8566aa13addad84de8495a755aacbd Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Mon, 15 Apr 2024 15:15:07 +0200 +Subject: s390/ism: Properly fix receive message buffer allocation + +From: Gerd Bayer <gbayer@linux.ibm.com> + +[ Upstream commit 83781384a96b95e2b6403d3c8a002b2c89031770 ] + +Since [1], dma_alloc_coherent() does not accept requests for GFP_COMP +anymore, even on archs that may be able to fulfill this. Functionality that +relied on the receive buffer being a compound page broke at that point: +The SMC-D protocol, that utilizes the ism device driver, passes receive +buffers to the splice processor in a struct splice_pipe_desc with a +single entry list of struct pages. As the buffer is no longer a compound +page, the splice processor now rejects requests to handle more than a +page worth of data. + +Replace dma_alloc_coherent() and allocate a buffer with folio_alloc and +create a DMA map for it with dma_map_page(). Since only receive buffers +on ISM devices use DMA, qualify the mapping as FROM_DEVICE. +Since ISM devices are available on arch s390, only, and on that arch all +DMA is coherent, there is no need to introduce and export some kind of +dma_sync_to_cpu() method to be called by the SMC-D protocol layer. + +Analogously, replace dma_free_coherent by a two step dma_unmap_page, +then folio_put to free the receive buffer. + +[1] https://lore.kernel.org/all/20221113163535.884299-1-hch@lst.de/ + +Fixes: c08004eede4b ("s390/ism: don't pass bogus GFP_ flags to dma_alloc_coherent") +Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/s390/net/ism_drv.c | 37 ++++++++++++++++++++++++++++--------- + 1 file changed, 28 insertions(+), 9 deletions(-) + +diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c +index 2c8e964425dc3..43778b088ffac 100644 +--- a/drivers/s390/net/ism_drv.c ++++ b/drivers/s390/net/ism_drv.c +@@ -292,13 +292,16 @@ static int ism_read_local_gid(struct ism_dev *ism) + static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb) + { + clear_bit(dmb->sba_idx, ism->sba_bitmap); +- dma_free_coherent(&ism->pdev->dev, dmb->dmb_len, +- dmb->cpu_addr, dmb->dma_addr); ++ dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len, ++ DMA_FROM_DEVICE); ++ folio_put(virt_to_folio(dmb->cpu_addr)); + } + + static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb) + { ++ struct folio *folio; + unsigned long bit; ++ int rc; + + if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev)) + return -EINVAL; +@@ -315,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb) + test_and_set_bit(dmb->sba_idx, ism->sba_bitmap)) + return -EINVAL; + +- dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len, +- &dmb->dma_addr, +- GFP_KERNEL | __GFP_NOWARN | +- __GFP_NOMEMALLOC | __GFP_NORETRY); +- if (!dmb->cpu_addr) +- clear_bit(dmb->sba_idx, ism->sba_bitmap); ++ folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | ++ __GFP_NORETRY, get_order(dmb->dmb_len)); + +- return dmb->cpu_addr ? 0 : -ENOMEM; ++ if (!folio) { ++ rc = -ENOMEM; ++ goto out_bit; ++ } ++ ++ dmb->cpu_addr = folio_address(folio); ++ dmb->dma_addr = dma_map_page(&ism->pdev->dev, ++ virt_to_page(dmb->cpu_addr), 0, ++ dmb->dmb_len, DMA_FROM_DEVICE); ++ if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) { ++ rc = -ENOMEM; ++ goto out_free; ++ } ++ ++ return 0; ++ ++out_free: ++ kfree(dmb->cpu_addr); ++out_bit: ++ clear_bit(dmb->sba_idx, ism->sba_bitmap); ++ return rc; + } + + int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, +-- +2.43.0 + diff --git a/queue-6.8/scsi-ufs-qcom-add-missing-interconnect-bandwidth-val.patch b/queue-6.8/scsi-ufs-qcom-add-missing-interconnect-bandwidth-val.patch new file mode 100644 index 0000000000..02b5d414f0 --- /dev/null +++ b/queue-6.8/scsi-ufs-qcom-add-missing-interconnect-bandwidth-val.patch @@ -0,0 +1,72 @@ +From 9a8798e656fead5cc69e55e0b67381e2a3f2c4b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 3 Apr 2024 18:50:03 +0530 +Subject: scsi: ufs: qcom: Add missing interconnect bandwidth values for Gear 5 + +From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> + +[ Upstream commit 8db8f6ce556af60ca9a9fd5e826d369ded70fcc7 ] + +These entries are necessary to scale the interconnect bandwidth while +operating in Gear 5. + +Cc: Amit Pundir <amit.pundir@linaro.org> +Fixes: 03ce80a1bb86 ("scsi: ufs: qcom: Add support for scaling interconnects") +Tested-by: Amit Pundir <amit.pundir@linaro.org> +Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> +Link: https://lore.kernel.org/r/20240403-ufs-icc-fix-v2-1-958412a5eb45@linaro.org +Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/ufs/host/ufs-qcom.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c +index f532e2c004a25..bcbcf758925be 100644 +--- a/drivers/ufs/host/ufs-qcom.c ++++ b/drivers/ufs/host/ufs-qcom.c +@@ -47,7 +47,7 @@ enum { + TSTBUS_MAX, + }; + +-#define QCOM_UFS_MAX_GEAR 4 ++#define QCOM_UFS_MAX_GEAR 5 + #define QCOM_UFS_MAX_LANE 2 + + enum { +@@ -67,26 +67,32 @@ static const struct __ufs_qcom_bw_table { + [MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844, 1000 }, + [MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688, 1000 }, + [MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376, 1000 }, ++ [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752, 1000 }, + [MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844, 1000 }, + [MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688, 1000 }, + [MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376, 1000 }, + [MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752, 1000 }, ++ [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504, 1000 }, + [MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796, 1000 }, + [MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591, 1000 }, + [MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 }, + [MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 }, ++ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 }, + [MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591, 1000 }, + [MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181, 1000 }, + [MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 }, + [MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 }, ++ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 }, + [MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422, 1000 }, + [MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189, 1000 }, + [MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 }, + [MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 }, ++ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 }, + [MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189, 1000 }, + [MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378, 1000 }, + [MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 }, + [MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 }, ++ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 }, + [MODE_MAX][0][0] = { 7643136, 307200 }, + }; + +-- +2.43.0 + diff --git a/queue-6.8/selftests-tcp_ao-fix-fscanf-call-for-format-security.patch b/queue-6.8/selftests-tcp_ao-fix-fscanf-call-for-format-security.patch new file mode 100644 index 0000000000..80ea99b27d --- /dev/null +++ b/queue-6.8/selftests-tcp_ao-fix-fscanf-call-for-format-security.patch @@ -0,0 +1,47 @@ +From ea6a25848bccdb69ae68d01b5ed2fbf429ea747e Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 02:42:54 +0100 +Subject: selftests/tcp_ao: Fix fscanf() call for format-security +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dmitry Safonov <0x7f454c46@gmail.com> + +[ Upstream commit beb78cd1329d039d73487ca05633d1b92e1ab2ea ] + +On my new laptop with packages from nixos-unstable, gcc 12.3.0 produces: +> lib/proc.c: In function ‘netstat_read_type’: +> lib/proc.c:89:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 89 | if (fscanf(fnetstat, type->header_name) == EOF) +> | ^~ +> cc1: some warnings being treated as errors + +Here the selftests lib parses header name, while expectes non-space word +ending with a column. + +Fixes: cfbab37b3da0 ("selftests/net: Add TCP-AO library") +Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> +Reported-by: Muhammad Usama Anjum <usama.anjum@collabora.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + tools/testing/selftests/net/tcp_ao/lib/proc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c +index 2fb6dd8adba69..8b984fa042869 100644 +--- a/tools/testing/selftests/net/tcp_ao/lib/proc.c ++++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c +@@ -86,7 +86,7 @@ static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line) + + pos = strchr(line, ' ') + 1; + +- if (fscanf(fnetstat, type->header_name) == EOF) ++ if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF) + test_error("fscanf(%s)", type->header_name); + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':') + test_error("Unexpected netstat format (%c)", tmp); +-- +2.43.0 + diff --git a/queue-6.8/selftests-tcp_ao-make-rst-tests-less-flaky.patch b/queue-6.8/selftests-tcp_ao-make-rst-tests-less-flaky.patch new file mode 100644 index 0000000000..a3fbad4ac8 --- /dev/null +++ b/queue-6.8/selftests-tcp_ao-make-rst-tests-less-flaky.patch @@ -0,0 +1,87 @@ +From f387aa94e13a2fe3a833aaff8688a394aa3c687a Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 02:42:52 +0100 +Subject: selftests/tcp_ao: Make RST tests less flaky + +From: Dmitry Safonov <0x7f454c46@gmail.com> + +[ Upstream commit 4225dfa4535f219b03ae14147d9c6e7e82ec8df4 ] + +Currently, "active reset" cases are flaky, because select() is called +for 3 sockets, while only 2 are expected to receive RST. +The idea of the third socket was to get into request_sock_queue, +but the test mistakenly attempted to connect() after the listener +socket was shut down. + +Repair this test, it's important to check the different kernel +code-paths for signing RST TCP-AO segments. + +Fixes: c6df7b2361d7 ("selftests/net: Add TCP-AO RST test") +Reported-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + tools/testing/selftests/net/tcp_ao/rst.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c +index 7df8b8700e39e..a2fe88d35ac06 100644 +--- a/tools/testing/selftests/net/tcp_ao/rst.c ++++ b/tools/testing/selftests/net/tcp_ao/rst.c +@@ -256,8 +256,6 @@ static int test_wait_fds(int sk[], size_t nr, bool is_writable[], + + static void test_client_active_rst(unsigned int port) + { +- /* one in queue, another accept()ed */ +- unsigned int wait_for = backlog + 2; + int i, sk[3], err; + bool is_writable[ARRAY_SIZE(sk)] = {false}; + unsigned int last = ARRAY_SIZE(sk) - 1; +@@ -275,16 +273,20 @@ static void test_client_active_rst(unsigned int port) + for (i = 0; i < last; i++) { + err = _test_connect_socket(sk[i], this_ip_dest, port, + (i == 0) ? TEST_TIMEOUT_SEC : -1); +- + if (err < 0) + test_error("failed to connect()"); + } + +- synchronize_threads(); /* 2: connection accept()ed, another queued */ +- err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC); ++ synchronize_threads(); /* 2: two connections: one accept()ed, another queued */ ++ err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("test_wait_fds(): %d", err); + ++ /* async connect() with third sk to get into request_sock_queue */ ++ err = _test_connect_socket(sk[last], this_ip_dest, port, -1); ++ if (err < 0) ++ test_error("failed to connect()"); ++ + synchronize_threads(); /* 3: close listen socket */ + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); +@@ -292,13 +294,14 @@ static void test_client_active_rst(unsigned int port) + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 4: finishing up */ +- err = _test_connect_socket(sk[last], this_ip_dest, port, -1); +- if (err < 0) +- test_error("failed to connect()"); + + synchronize_threads(); /* 5: closed active sk */ +- err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL, +- wait_for, TEST_TIMEOUT_SEC); ++ /* ++ * Wait for 2 connections: one accepted, another in the accept queue, ++ * the one in request_sock_queue won't get fully established, so ++ * doesn't receive an active RST, see inet_csk_listen_stop(). ++ */ ++ err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("select(): %d", err); + +-- +2.43.0 + diff --git a/queue-6.8/selftests-tcp_ao-printing-fixes-to-confirm-with-form.patch b/queue-6.8/selftests-tcp_ao-printing-fixes-to-confirm-with-form.patch new file mode 100644 index 0000000000..167e4f4dff --- /dev/null +++ b/queue-6.8/selftests-tcp_ao-printing-fixes-to-confirm-with-form.patch @@ -0,0 +1,102 @@ +From 7a79ea68d943e210378647358ff5edd8539ca8d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 02:42:55 +0100 +Subject: selftests/tcp_ao: Printing fixes to confirm with format-security +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dmitry Safonov <0x7f454c46@gmail.com> + +[ Upstream commit b476c93654d748c13624f7c7d0ba191c56a8092e ] + +On my new laptop with packages from nixos-unstable, gcc 12.3.0 produces +> lib/setup.c: In function ‘__test_msg’: +> lib/setup.c:20:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 20 | ksft_print_msg(buf); +> | ^~~~~~~~~~~~~~ +> lib/setup.c: In function ‘__test_ok’: +> lib/setup.c:26:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 26 | ksft_test_result_pass(buf); +> | ^~~~~~~~~~~~~~~~~~~~~ +> lib/setup.c: In function ‘__test_fail’: +> lib/setup.c:32:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 32 | ksft_test_result_fail(buf); +> | ^~~~~~~~~~~~~~~~~~~~~ +> lib/setup.c: In function ‘__test_xfail’: +> lib/setup.c:38:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 38 | ksft_test_result_xfail(buf); +> | ^~~~~~~~~~~~~~~~~~~~~~ +> lib/setup.c: In function ‘__test_error’: +> lib/setup.c:44:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 44 | ksft_test_result_error(buf); +> | ^~~~~~~~~~~~~~~~~~~~~~ +> lib/setup.c: In function ‘__test_skip’: +> lib/setup.c:50:9: error: format not a string literal and no format arguments [-Werror=format-security] +> 50 | ksft_test_result_skip(buf); +> | ^~~~~~~~~~~~~~~~~~~~~ +> cc1: some warnings being treated as errors + +As the buffer was already pre-printed into, print it as a string +rather than a format-string. + +Fixes: cfbab37b3da0 ("selftests/net: Add TCP-AO library") +Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> +Reported-by: Muhammad Usama Anjum <usama.anjum@collabora.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + tools/testing/selftests/net/tcp_ao/lib/setup.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c +index 92276f916f2f3..e408b9243b2c5 100644 +--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c ++++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c +@@ -17,37 +17,37 @@ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; + void __test_msg(const char *buf) + { + pthread_mutex_lock(&ksft_print_lock); +- ksft_print_msg(buf); ++ ksft_print_msg("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); + } + void __test_ok(const char *buf) + { + pthread_mutex_lock(&ksft_print_lock); +- ksft_test_result_pass(buf); ++ ksft_test_result_pass("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); + } + void __test_fail(const char *buf) + { + pthread_mutex_lock(&ksft_print_lock); +- ksft_test_result_fail(buf); ++ ksft_test_result_fail("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); + } + void __test_xfail(const char *buf) + { + pthread_mutex_lock(&ksft_print_lock); +- ksft_test_result_xfail(buf); ++ ksft_test_result_xfail("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); + } + void __test_error(const char *buf) + { + pthread_mutex_lock(&ksft_print_lock); +- ksft_test_result_error(buf); ++ ksft_test_result_error("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); + } + void __test_skip(const char *buf) + { + pthread_mutex_lock(&ksft_print_lock); +- ksft_test_result_skip(buf); ++ ksft_test_result_skip("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); + } + +-- +2.43.0 + diff --git a/queue-6.8/selftests-tcp_ao-zero-init-tcp_ao_info_opt.patch b/queue-6.8/selftests-tcp_ao-zero-init-tcp_ao_info_opt.patch new file mode 100644 index 0000000000..a8f62c88d1 --- /dev/null +++ b/queue-6.8/selftests-tcp_ao-zero-init-tcp_ao_info_opt.patch @@ -0,0 +1,38 @@ +From 1b12b6cc1a557cd385f6fc97e870edb127c5bf89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 02:42:53 +0100 +Subject: selftests/tcp_ao: Zero-init tcp_ao_info_opt + +From: Dmitry Safonov <0x7f454c46@gmail.com> + +[ Upstream commit b089b3bead532419cdcbd8e4e0a3e23c49d11573 ] + +The structure is on the stack and has to be zero-initialized as +the kernel checks for: +> if (in.reserved != 0 || in.reserved2 != 0) +> return -EINVAL; + +Fixes: b26660531cf6 ("selftests/net: Add test for TCP-AO add setsockopt() command") +Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + tools/testing/selftests/net/tcp_ao/setsockopt-closed.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +index 452de131fa3a9..517930f9721bd 100644 +--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c ++++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +@@ -21,7 +21,7 @@ static void make_listen(int sk) + static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, + const char *tst) + { +- struct tcp_ao_info_opt tmp; ++ struct tcp_ao_info_opt tmp = {}; + socklen_t len = sizeof(tmp); + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len)) +-- +2.43.0 + diff --git a/queue-6.8/series b/queue-6.8/series index 6ba7c4dbf5..f75011e5f0 100644 --- a/queue-6.8/series +++ b/queue-6.8/series @@ -10,3 +10,44 @@ btrfs-do-not-wait-for-short-bulk-allocation.patch btrfs-zoned-do-not-flag-zeroout-on-non-dirty-extent-buffer.patch r8169-fix-led-related-deadlock-on-module-removal.patch r8169-add-missing-conditional-compiling-for-call-to-r8169_remove_leds.patch +scsi-ufs-qcom-add-missing-interconnect-bandwidth-val.patch +netfilter-nf_tables-fix-potential-data-race-in-__nft.patch +netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-25621 +netfilter-br_netfilter-skip-conntrack-input-hook-for.patch +netfilter-nft_set_pipapo-constify-lookup-fn-args-whe.patch +netfilter-nft_set_pipapo-walk-over-current-view-on-n.patch +netfilter-nft_set_pipapo-do-not-free-live-element.patch +netfilter-flowtable-validate-pppoe-header.patch +netfilter-flowtable-incorrect-pppoe-tuple.patch +af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch +af_unix-don-t-peek-oob-data-without-msg_oob.patch +net-sparx5-flower-fix-fragment-flags-handling.patch +net-mlx5-lag-restore-buckets-number-to-default-after.patch +net-mlx5-restore-mistakenly-dropped-parts-in-registe.patch +net-mlx5e-prevent-deadlock-while-disabling-arfs.patch +net-change-maximum-number-of-udp-segments-to-128.patch +octeontx2-pf-fix-flow_dis_is_fragment-implementation.patch +selftests-tcp_ao-make-rst-tests-less-flaky.patch +selftests-tcp_ao-zero-init-tcp_ao_info_opt.patch +selftests-tcp_ao-fix-fscanf-call-for-format-security.patch +selftests-tcp_ao-printing-fixes-to-confirm-with-form.patch +net-stmmac-apply-half-duplex-less-constraint-for-dw-.patch +net-stmmac-fix-max-speed-being-ignored-on-queue-re-i.patch +net-stmmac-fix-ip-cores-specific-mac-capabilities.patch +ice-tc-check-src_vsi-in-case-of-traffic-from-vf.patch +ice-tc-allow-zero-flags-in-parsing-tc-flower.patch +ice-fix-checking-for-unsupported-keys-on-non-tunnel-.patch +tun-limit-printing-rate-when-illegal-packet-received.patch +net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch +net-dsa-mt7530-fix-port-mirroring-for-mt7988-soc-swi.patch +s390-ism-properly-fix-receive-message-buffer-allocat.patch +netfilter-nf_tables-missing-iterator-type-in-lookup-.patch +netfilter-nf_tables-restore-set-elements-when-delete.patch +gpiolib-swnode-remove-wrong-header-inclusion.patch +netfilter-nf_tables-fix-memleak-in-map-from-abort-pa.patch +net-sched-fix-mirred-deadlock-on-device-recursion.patch +net-ethernet-mtk_eth_soc-fix-wed-wifi-reset.patch +ravb-group-descriptor-types-used-in-rx-ring.patch +net-ravb-count-packets-instead-of-descriptors-in-r-c.patch +net-ravb-allow-rx-loop-to-move-past-dma-mapping-erro.patch +net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch diff --git a/queue-6.8/tun-limit-printing-rate-when-illegal-packet-received.patch b/queue-6.8/tun-limit-printing-rate-when-illegal-packet-received.patch new file mode 100644 index 0000000000..7b5ed917ba --- /dev/null +++ b/queue-6.8/tun-limit-printing-rate-when-illegal-packet-received.patch @@ -0,0 +1,91 @@ +From 709a4cd55f080c149f10e8539f16f090b3531d1d Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sun, 14 Apr 2024 22:02:46 -0400 +Subject: tun: limit printing rate when illegal packet received by tun dev + +From: Lei Chen <lei.chen@smartx.com> + +[ Upstream commit f8bbc07ac535593139c875ffa19af924b1084540 ] + +vhost_worker will call tun call backs to receive packets. If too many +illegal packets arrives, tun_do_read will keep dumping packet contents. +When console is enabled, it will costs much more cpu time to dump +packet and soft lockup will be detected. + +net_ratelimit mechanism can be used to limit the dumping rate. + +PID: 33036 TASK: ffff949da6f20000 CPU: 23 COMMAND: "vhost-32980" + #0 [fffffe00003fce50] crash_nmi_callback at ffffffff89249253 + #1 [fffffe00003fce58] nmi_handle at ffffffff89225fa3 + #2 [fffffe00003fceb0] default_do_nmi at ffffffff8922642e + #3 [fffffe00003fced0] do_nmi at ffffffff8922660d + #4 [fffffe00003fcef0] end_repeat_nmi at ffffffff89c01663 + [exception RIP: io_serial_in+20] + RIP: ffffffff89792594 RSP: ffffa655314979e8 RFLAGS: 00000002 + RAX: ffffffff89792500 RBX: ffffffff8af428a0 RCX: 0000000000000000 + RDX: 00000000000003fd RSI: 0000000000000005 RDI: ffffffff8af428a0 + RBP: 0000000000002710 R8: 0000000000000004 R9: 000000000000000f + R10: 0000000000000000 R11: ffffffff8acbf64f R12: 0000000000000020 + R13: ffffffff8acbf698 R14: 0000000000000058 R15: 0000000000000000 + ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + #5 [ffffa655314979e8] io_serial_in at ffffffff89792594 + #6 [ffffa655314979e8] wait_for_xmitr at ffffffff89793470 + #7 [ffffa65531497a08] serial8250_console_putchar at ffffffff897934f6 + #8 [ffffa65531497a20] uart_console_write at ffffffff8978b605 + #9 [ffffa65531497a48] serial8250_console_write at ffffffff89796558 + #10 [ffffa65531497ac8] console_unlock at ffffffff89316124 + #11 [ffffa65531497b10] vprintk_emit at ffffffff89317c07 + #12 [ffffa65531497b68] printk at ffffffff89318306 + #13 [ffffa65531497bc8] print_hex_dump at ffffffff89650765 + #14 [ffffa65531497ca8] tun_do_read at ffffffffc0b06c27 [tun] + #15 [ffffa65531497d38] tun_recvmsg at ffffffffc0b06e34 [tun] + #16 [ffffa65531497d68] handle_rx at ffffffffc0c5d682 [vhost_net] + #17 [ffffa65531497ed0] vhost_worker at ffffffffc0c644dc [vhost] + #18 [ffffa65531497f10] kthread at ffffffff892d2e72 + #19 [ffffa65531497f50] ret_from_fork at ffffffff89c0022f + +Fixes: ef3db4a59542 ("tun: avoid BUG, dump packet on GSO errors") +Signed-off-by: Lei Chen <lei.chen@smartx.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Acked-by: Jason Wang <jasowang@redhat.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Acked-by: Michael S. Tsirkin <mst@redhat.com> +Link: https://lore.kernel.org/r/20240415020247.2207781-1-lei.chen@smartx.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/tun.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/tun.c b/drivers/net/tun.c +index 8f95a562b8d0c..86515f0c2b6c1 100644 +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -2132,14 +2132,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, + tun_is_little_endian(tun), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); +- pr_err("unexpected GSO type: " +- "0x%x, gso_size %d, hdr_len %d\n", +- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), +- tun16_to_cpu(tun, gso.hdr_len)); +- print_hex_dump(KERN_ERR, "tun: ", +- DUMP_PREFIX_NONE, +- 16, 1, skb->head, +- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); ++ ++ if (net_ratelimit()) { ++ netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", ++ sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), ++ tun16_to_cpu(tun, gso.hdr_len)); ++ print_hex_dump(KERN_ERR, "tun: ", ++ DUMP_PREFIX_NONE, ++ 16, 1, skb->head, ++ min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); ++ } + WARN_ON_ONCE(1); + return -EINVAL; + } +-- +2.43.0 + |