diff options
author | Sasha Levin <sashal@kernel.org> | 2024-04-19 07:43:50 -0400 |
---|---|---|
committer | Sasha Levin <sashal@kernel.org> | 2024-04-19 07:43:50 -0400 |
commit | bc31263b2ebeaace32b5941c3cedc82d46815689 (patch) | |
tree | 07ea6df025f4fd3638b8289acd2de6434fbacc53 | |
parent | a2abe5e655247570fa95df6504f2f2d85f3a3689 (diff) | |
download | stable-queue-bc31263b2ebeaace32b5941c3cedc82d46815689.tar.gz |
Fixes for 5.15
Signed-off-by: Sasha Levin <sashal@kernel.org>
13 files changed, 1356 insertions, 0 deletions
diff --git a/queue-5.15/af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch b/queue-5.15/af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch new file mode 100644 index 0000000000..fa516eaa6c --- /dev/null +++ b/queue-5.15/af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch @@ -0,0 +1,73 @@ +From 206ccb4ef4003b80cde5b7ae272fff67282b2a63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 10:10:15 -0700 +Subject: af_unix: Call manage_oob() for every skb in + unix_stream_read_generic(). + +From: Kuniyuki Iwashima <kuniyu@amazon.com> + +[ Upstream commit 283454c8a123072e5c386a5a2b5fc576aa455b6f ] + +When we call recv() for AF_UNIX socket, we first peek one skb and +calls manage_oob() to check if the skb is sent with MSG_OOB. + +However, when we fetch the next (and the following) skb, manage_oob() +is not called now, leading a wrong behaviour. + +Let's say a socket send()s "hello" with MSG_OOB and the peer tries +to recv() 5 bytes with MSG_PEEK. Here, we should get only "hell" +without 'o', but actually not: + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'hello', MSG_OOB) + 5 + >>> c2.recv(5, MSG_PEEK) + b'hello' + +The first skb fills 4 bytes, and the next skb is peeked but not +properly checked by manage_oob(). + +Let's move up the again label to call manage_oob() for evry skb. + +With this patch: + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'hello', MSG_OOB) + 5 + >>> c2.recv(5, MSG_PEEK) + b'hell' + +Fixes: 314001f0bf92 ("af_unix: Add OOB support") +Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> +Link: https://lore.kernel.org/r/20240410171016.7621-2-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/unix/af_unix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 628d97c195a7e..e2a2e22d210f6 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2644,6 +2644,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + last = skb = skb_peek(&sk->sk_receive_queue); + last_len = last ? last->len : 0; + ++again: + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); +@@ -2655,7 +2656,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + } + } + #endif +-again: + if (skb == NULL) { + if (copied >= target) + goto unlock; +-- +2.43.0 + diff --git a/queue-5.15/af_unix-don-t-peek-oob-data-without-msg_oob.patch b/queue-5.15/af_unix-don-t-peek-oob-data-without-msg_oob.patch new file mode 100644 index 0000000000..9ace31d308 --- /dev/null +++ b/queue-5.15/af_unix-don-t-peek-oob-data-without-msg_oob.patch @@ -0,0 +1,86 @@ +From 64d9bb58145fadc309760f3f70c0c20172c04bc5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 10:10:16 -0700 +Subject: af_unix: Don't peek OOB data without MSG_OOB. + +From: Kuniyuki Iwashima <kuniyu@amazon.com> + +[ Upstream commit 22dd70eb2c3d754862964377a75abafd3167346b ] + +Currently, we can read OOB data without MSG_OOB by using MSG_PEEK +when OOB data is sitting on the front row, which is apparently +wrong. + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'a', MSG_OOB) + 1 + >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) + b'a' + +If manage_oob() is called when no data has been copied, we only +check if the socket enables SO_OOBINLINE or MSG_PEEK is not used. +Otherwise, the skb is returned as is. + +However, here we should return NULL if MSG_PEEK is set and no data +has been copied. + +Also, in such a case, we should not jump to the redo label because +we will be caught in the loop and hog the CPU until normal data +comes in. + +Then, we need to handle skb == NULL case with the if-clause below +the manage_oob() block. + +With this patch: + + >>> from socket import * + >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) + >>> c1.send(b'a', MSG_OOB) + 1 + >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + BlockingIOError: [Errno 11] Resource temporarily unavailable + +Fixes: 314001f0bf92 ("af_unix: Add OOB support") +Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> +Link: https://lore.kernel.org/r/20240410171016.7621-3-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/unix/af_unix.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index e2a2e22d210f6..f66f867049015 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2565,7 +2565,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + WRITE_ONCE(u->oob_skb, NULL); + consume_skb(skb); + } +- } else if (!(flags & MSG_PEEK)) { ++ } else if (flags & MSG_PEEK) { ++ skb = NULL; ++ } else { + skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) +@@ -2648,11 +2650,9 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); +- if (!skb) { ++ if (!skb && copied) { + unix_state_unlock(sk); +- if (copied) +- break; +- goto redo; ++ break; + } + } + #endif +-- +2.43.0 + diff --git a/queue-5.15/net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch b/queue-5.15/net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch new file mode 100644 index 0000000000..1429e6510c --- /dev/null +++ b/queue-5.15/net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch @@ -0,0 +1,81 @@ +From cf9e071d5e21065ed3fbaf235b02e39cb6ab57ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sat, 13 Apr 2024 16:01:39 +0300 +Subject: net: dsa: mt7530: fix mirroring frames received on local port +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Arınç ÜNAL <arinc.unal@arinc9.com> + +[ Upstream commit d59cf049c8378677053703e724808836f180888e ] + +This switch intellectual property provides a bit on the ARL global control +register which controls allowing mirroring frames which are received on the +local port (monitor port). This bit is unset after reset. + +This ability must be enabled to fully support the port mirroring feature on +this switch intellectual property. + +Therefore, this patch fixes the traffic not being reflected on a port, +which would be configured like below: + + tc qdisc add dev swp0 clsact + + tc filter add dev swp0 ingress matchall skip_sw \ + action mirred egress mirror dev swp0 + +As a side note, this configuration provides the hairpinning feature for a +single port. + +Fixes: 37feab6076aa ("net: dsa: mt7530: add support for port mirroring") +Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/dsa/mt7530.c | 6 ++++++ + drivers/net/dsa/mt7530.h | 4 ++++ + 2 files changed, 10 insertions(+) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index f291d1e70f807..053f1b4aa324c 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -2437,6 +2437,9 @@ mt7530_setup(struct dsa_switch *ds) + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + } + ++ /* Allow mirroring frames received on the local port (monitor port). */ ++ mt7530_set(priv, MT753X_AGC, LOCAL_EN); ++ + /* Setup VLAN ID 0 for VLAN-unaware bridges */ + ret = mt7530_setup_vlan0(priv); + if (ret) +@@ -2553,6 +2556,9 @@ mt7531_setup_common(struct dsa_switch *ds) + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + } + ++ /* Allow mirroring frames received on the local port (monitor port). */ ++ mt7530_set(priv, MT753X_AGC, LOCAL_EN); ++ + /* Flush the FDB table */ + ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); + if (ret < 0) +diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h +index 299a26ad5809c..0247a58d5554c 100644 +--- a/drivers/net/dsa/mt7530.h ++++ b/drivers/net/dsa/mt7530.h +@@ -32,6 +32,10 @@ enum mt753x_id { + #define SYSC_REG_RSTCTRL 0x34 + #define RESET_MCM BIT(2) + ++/* Register for ARL global control */ ++#define MT753X_AGC 0xc ++#define LOCAL_EN BIT(7) ++ + /* Registers to mac forward control for unknown frames */ + #define MT7530_MFC 0x10 + #define BC_FFP(x) (((x) & 0xff) << 24) +-- +2.43.0 + diff --git a/queue-5.15/net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch b/queue-5.15/net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch new file mode 100644 index 0000000000..76cd597043 --- /dev/null +++ b/queue-5.15/net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch @@ -0,0 +1,66 @@ +From b0847d9ed05135c407d91e33504a8dc13450152f Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 17 Apr 2024 15:24:25 +0530 +Subject: net: ethernet: ti: am65-cpsw-nuss: cleanup DMA Channels before using + them + +From: Siddharth Vadapalli <s-vadapalli@ti.com> + +[ Upstream commit c24cd679b075b0e953ea167b0aa2b2d59e4eba7f ] + +The TX and RX DMA Channels used by the driver to exchange data with CPSW +are not guaranteed to be in a clean state during driver initialization. +The Bootloader could have used the same DMA Channels without cleaning them +up in the event of failure. Thus, reset and disable the DMA Channels to +ensure that they are in a clean state before using them. + +Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") +Reported-by: Schuyler Patton <spatton@ti.com> +Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com> +Reviewed-by: Roger Quadros <rogerq@kernel.org> +Link: https://lore.kernel.org/r/20240417095425.2253876-1-s-vadapalli@ti.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/ethernet/ti/am65-cpsw-nuss.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c +index f94d6d322df42..4bd57b79a023b 100644 +--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c ++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c +@@ -2535,6 +2535,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) + + static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) + { ++ struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns; ++ struct am65_cpsw_tx_chn *tx_chan = common->tx_chns; + struct device *dev = common->dev; + struct devlink_port *dl_port; + struct am65_cpsw_port *port; +@@ -2553,6 +2555,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) + return ret; + } + ++ /* The DMA Channels are not guaranteed to be in a clean state. ++ * Reset and disable them to ensure that they are back to the ++ * clean state and ready to be used. ++ */ ++ for (i = 0; i < common->tx_ch_num; i++) { ++ k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i], ++ am65_cpsw_nuss_tx_cleanup); ++ k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn); ++ } ++ ++ for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) ++ k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, ++ am65_cpsw_nuss_rx_cleanup, !!i); ++ ++ k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); ++ + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + return ret; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch b/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch new file mode 100644 index 0000000000..a4640b1744 --- /dev/null +++ b/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch @@ -0,0 +1,220 @@ +From ad062a1727c18b732542d3f31753e5ba94e5c2ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 11:24:59 +0200 +Subject: netfilter: br_netfilter: skip conntrack input hook for promisc + packets + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 751de2012eafa4d46d8081056761fa0e9cc8a178 ] + +For historical reasons, when bridge device is in promisc mode, packets +that are directed to the taps follow bridge input hook path. This patch +adds a workaround to reset conntrack for these packets. + +Jianbo Liu reports warning splats in their test infrastructure where +cloned packets reach the br_netfilter input hook to confirm the +conntrack object. + +Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has +reached the input hook because it is passed up to the bridge device to +reach the taps. + +[ 57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core +[ 57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19 +[ 57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[ 57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1 +[ 57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202 +[ 57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000 +[ 57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000 +[ 57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003 +[ 57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000 +[ 57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800 +[ 57.582313] FS: 0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000 +[ 57.583040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0 +[ 57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2: +0000000000000000 +[ 57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: +0000000000000400 +[ 57.585440] Call Trace: +[ 57.585721] <IRQ> +[ 57.585976] ? __warn+0x7d/0x130 +[ 57.586323] ? br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.586811] ? report_bug+0xf1/0x1c0 +[ 57.587177] ? handle_bug+0x3f/0x70 +[ 57.587539] ? exc_invalid_op+0x13/0x60 +[ 57.587929] ? asm_exc_invalid_op+0x16/0x20 +[ 57.588336] ? br_nf_local_in+0x157/0x180 [br_netfilter] +[ 57.588825] nf_hook_slow+0x3d/0xd0 +[ 57.589188] ? br_handle_vlan+0x4b/0x110 +[ 57.589579] br_pass_frame_up+0xfc/0x150 +[ 57.589970] ? br_port_flags_change+0x40/0x40 +[ 57.590396] br_handle_frame_finish+0x346/0x5e0 +[ 57.590837] ? ipt_do_table+0x32e/0x430 +[ 57.591221] ? br_handle_local_finish+0x20/0x20 +[ 57.591656] br_nf_hook_thresh+0x4b/0xf0 [br_netfilter] +[ 57.592286] ? br_handle_local_finish+0x20/0x20 +[ 57.592802] br_nf_pre_routing_finish+0x178/0x480 [br_netfilter] +[ 57.593348] ? br_handle_local_finish+0x20/0x20 +[ 57.593782] ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat] +[ 57.594279] br_nf_pre_routing+0x24c/0x550 [br_netfilter] +[ 57.594780] ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter] +[ 57.595280] br_handle_frame+0x1f3/0x3d0 +[ 57.595676] ? br_handle_local_finish+0x20/0x20 +[ 57.596118] ? br_handle_frame_finish+0x5e0/0x5e0 +[ 57.596566] __netif_receive_skb_core+0x25b/0xfc0 +[ 57.597017] ? __napi_build_skb+0x37/0x40 +[ 57.597418] __netif_receive_skb_list_core+0xfb/0x220 + +Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") +Reported-by: Jianbo Liu <jianbol@nvidia.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/bridge/br_input.c | 15 +++++++++++---- + net/bridge/br_netfilter_hooks.c | 6 ++++++ + net/bridge/br_private.h | 1 + + net/bridge/netfilter/nf_conntrack_bridge.c | 14 ++++++++++---- + 4 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c +index 54bfcdf692732..f3d49343f7dbe 100644 +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) + return netif_receive_skb(skb); + } + +-static int br_pass_frame_up(struct sk_buff *skb) ++static int br_pass_frame_up(struct sk_buff *skb, bool promisc) + { + struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); +@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) + br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), + BR_MCAST_DIR_TX); + ++ BR_INPUT_SKB_CB(skb)->promisc = promisc; ++ + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(indev), NULL, skb, indev, NULL, + br_netif_receive_skb); +@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + struct net_bridge_mcast *brmctx; + struct net_bridge_vlan *vlan; + struct net_bridge *br; ++ bool promisc; + u16 vid = 0; + u8 state; + +@@ -102,7 +105,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); + +- local_rcv = !!(br->dev->flags & IFF_PROMISC); ++ promisc = !!(br->dev->flags & IFF_PROMISC); ++ local_rcv = promisc; ++ + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { + /* by definition the broadcast is also a multicast address */ + if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { +@@ -165,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + unsigned long now = jiffies; + + if (test_bit(BR_FDB_LOCAL, &dst->flags)) +- return br_pass_frame_up(skb); ++ return br_pass_frame_up(skb, false); + + if (now != dst->used) + dst->used = now; +@@ -178,7 +183,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + } + + if (local_rcv) +- return br_pass_frame_up(skb); ++ return br_pass_frame_up(skb, promisc); + + out: + return 0; +@@ -350,6 +355,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) + goto forward; + } + ++ BR_INPUT_SKB_CB(skb)->promisc = false; ++ + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index 8a114a5000466..9981e0dfdd4d3 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -584,11 +584,17 @@ static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) + { ++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc; + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + ++ if (promisc) { ++ nf_reset_ct(skb); ++ return NF_ACCEPT; ++ } ++ + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index ff10ddeeb50ff..fe61d3b8d0cc2 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -547,6 +547,7 @@ struct br_input_skb_cb { + #endif + u8 proxyarp_replied:1; + u8 src_port_isolated:1; ++ u8 promisc:1; + #ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_filtered:1; + #endif +diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c +index 83743e95939b1..fbdb1ad448c3a 100644 +--- a/net/bridge/netfilter/nf_conntrack_bridge.c ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c +@@ -293,18 +293,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, + static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) + { +- enum ip_conntrack_info ctinfo; ++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc; ++ struct nf_conntrack *nfct = skb_nfct(skb); + struct nf_conn *ct; + +- if (skb->pkt_type == PACKET_HOST) ++ if (promisc) { ++ nf_reset_ct(skb); ++ return NF_ACCEPT; ++ } ++ ++ if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ +- ct = nf_ct_get(skb, &ctinfo); +- if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) ++ ct = container_of(nfct, struct nf_conn, ct_general); ++ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ +-- +2.43.0 + diff --git a/queue-5.15/netfilter-flowtable-incorrect-pppoe-tuple.patch b/queue-5.15/netfilter-flowtable-incorrect-pppoe-tuple.patch new file mode 100644 index 0000000000..a438851adc --- /dev/null +++ b/queue-5.15/netfilter-flowtable-incorrect-pppoe-tuple.patch @@ -0,0 +1,37 @@ +From 5b01f17f95e922da7d78080535f7fa9583a298ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Thu, 11 Apr 2024 00:09:00 +0200 +Subject: netfilter: flowtable: incorrect pppoe tuple + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 6db5dc7b351b9569940cd1cf445e237c42cd6d27 ] + +pppoe traffic reaching ingress path does not match the flowtable entry +because the pppoe header is expected to be at the network header offset. +This bug causes a mismatch in the flow table lookup, so pppoe packets +enter the classical forwarding path. + +Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_flow_table_ip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index 448956fb52f69..f3227f9316969 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -156,7 +156,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, + tuple->encap[i].proto = skb->protocol; + break; + case htons(ETH_P_PPP_SES): +- phdr = (struct pppoe_hdr *)skb_mac_header(skb); ++ phdr = (struct pppoe_hdr *)skb_network_header(skb); + tuple->encap[i].id = ntohs(phdr->sid); + tuple->encap[i].proto = skb->protocol; + break; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-flowtable-validate-pppoe-header.patch b/queue-5.15/netfilter-flowtable-validate-pppoe-header.patch new file mode 100644 index 0000000000..8e7973e2af --- /dev/null +++ b/queue-5.15/netfilter-flowtable-validate-pppoe-header.patch @@ -0,0 +1,106 @@ +From 3502d405ac432b3ec4ee9c0a410d5696951fdb3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Tue, 9 Apr 2024 13:47:33 +0200 +Subject: netfilter: flowtable: validate pppoe header + +From: Pablo Neira Ayuso <pablo@netfilter.org> + +[ Upstream commit 87b3593bed1868b2d9fe096c01bcdf0ea86cbebf ] + +Ensure there is sufficient room to access the protocol field of the +PPPoe header. Validate it once before the flowtable lookup, then use a +helper function to access protocol field. + +Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com +Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/netfilter/nf_flow_table.h | 12 +++++++++++- + net/netfilter/nf_flow_table_inet.c | 3 ++- + net/netfilter/nf_flow_table_ip.c | 8 +++++--- + 3 files changed, 18 insertions(+), 5 deletions(-) + +diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h +index 6cc0cfbf69b86..8e98fb8edff8d 100644 +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -318,7 +318,7 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, + int nf_flow_table_offload_init(void); + void nf_flow_table_offload_exit(void); + +-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) ++static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) + { + __be16 proto; + +@@ -334,6 +334,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) + return 0; + } + ++static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) ++{ ++ if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) ++ return false; ++ ++ *inner_proto = __nf_flow_pppoe_proto(skb); ++ ++ return true; ++} ++ + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) + #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) + #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ +diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c +index 280fdd32965f6..6783ea220f8fe 100644 +--- a/net/netfilter/nf_flow_table_inet.c ++++ b/net/netfilter/nf_flow_table_inet.c +@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, + proto = veth->h_vlan_encapsulated_proto; + break; + case htons(ETH_P_PPP_SES): +- proto = nf_flow_pppoe_proto(skb); ++ if (!nf_flow_pppoe_proto(skb, &proto)) ++ return NF_ACCEPT; + break; + default: + proto = skb->protocol; +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index 28026467b54cd..448956fb52f69 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -246,10 +246,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, + return NF_STOLEN; + } + +-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, ++static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, + u32 *offset) + { + struct vlan_ethhdr *veth; ++ __be16 inner_proto; + + switch (skb->protocol) { + case htons(ETH_P_8021Q): +@@ -260,7 +261,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, + } + break; + case htons(ETH_P_PPP_SES): +- if (nf_flow_pppoe_proto(skb) == proto) { ++ if (nf_flow_pppoe_proto(skb, &inner_proto) && ++ inner_proto == proto) { + *offset += PPPOE_SES_HLEN; + return true; + } +@@ -289,7 +291,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, + skb_reset_network_header(skb); + break; + case htons(ETH_P_PPP_SES): +- skb->protocol = nf_flow_pppoe_proto(skb); ++ skb->protocol = __nf_flow_pppoe_proto(skb); + skb_pull(skb, PPPOE_SES_HLEN); + skb_reset_network_header(skb); + break; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nf_flow_table-count-pending-offload-workqu.patch b/queue-5.15/netfilter-nf_flow_table-count-pending-offload-workqu.patch new file mode 100644 index 0000000000..df6ffe16f5 --- /dev/null +++ b/queue-5.15/netfilter-nf_flow_table-count-pending-offload-workqu.patch @@ -0,0 +1,364 @@ +From 174f45185367bfeda00e17f7fabfcbf8f63ef2eb Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 15 Jun 2022 12:43:55 +0200 +Subject: netfilter: nf_flow_table: count pending offload workqueue tasks + +From: Vlad Buslov <vladbu@nvidia.com> + +[ Upstream commit b038177636f83bbf87c2b238706474145dd2cd04 ] + +To improve hardware offload debuggability count pending 'add', 'del' and +'stats' flow_table offload workqueue tasks. Counters are incremented before +scheduling new task and decremented when workqueue handler finishes +executing. These counters allow user to diagnose congestion on hardware +offload workqueues that can happen when either CPU is starved and workqueue +jobs are executed at lower rate than new ones are added or when +hardware/driver can't keep up with the rate. + +Implement the described counters as percpu counters inside new struct +netns_ft which is stored inside struct net. Expose them via new procfs file +'/proc/net/stats/nf_flowtable' that is similar to existing 'nf_conntrack' +file. + +Signed-off-by: Vlad Buslov <vladbu@nvidia.com> +Signed-off-by: Oz Shlomo <ozsh@nvidia.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Stable-dep-of: 87b3593bed18 ("netfilter: flowtable: validate pppoe header") +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + include/net/net_namespace.h | 6 ++ + include/net/netfilter/nf_flow_table.h | 21 +++++++ + include/net/netns/flow_table.h | 14 +++++ + net/netfilter/Kconfig | 9 +++ + net/netfilter/Makefile | 1 + + net/netfilter/nf_flow_table_core.c | 62 ++++++++++++++++++++- + net/netfilter/nf_flow_table_offload.c | 17 +++++- + net/netfilter/nf_flow_table_procfs.c | 80 +++++++++++++++++++++++++++ + 8 files changed, 206 insertions(+), 4 deletions(-) + create mode 100644 include/net/netns/flow_table.h + create mode 100644 net/netfilter/nf_flow_table_procfs.c + +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index 2ba326f9e004d..c47baa623ba58 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -26,6 +26,9 @@ + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + #include <net/netns/conntrack.h> + #endif ++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) ++#include <net/netns/flow_table.h> ++#endif + #include <net/netns/nftables.h> + #include <net/netns/xfrm.h> + #include <net/netns/mpls.h> +@@ -138,6 +141,9 @@ struct net { + #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) + struct netns_nftables nft; + #endif ++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) ++ struct netns_ft ft; ++#endif + #endif + #ifdef CONFIG_WEXT_CORE + struct sk_buff_head wext_nlevents; +diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h +index dabd84fa3fd36..6cc0cfbf69b86 100644 +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -334,4 +334,25 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) + return 0; + } + ++#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) ++#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) ++#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ ++ this_cpu_inc((net)->ft.stat->count) ++#define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count) \ ++ this_cpu_dec((net)->ft.stat->count) ++ ++#ifdef CONFIG_NF_FLOW_TABLE_PROCFS ++int nf_flow_table_init_proc(struct net *net); ++void nf_flow_table_fini_proc(struct net *net); ++#else ++static inline int nf_flow_table_init_proc(struct net *net) ++{ ++ return 0; ++} ++ ++static inline void nf_flow_table_fini_proc(struct net *net) ++{ ++} ++#endif /* CONFIG_NF_FLOW_TABLE_PROCFS */ ++ + #endif /* _NF_FLOW_TABLE_H */ +diff --git a/include/net/netns/flow_table.h b/include/net/netns/flow_table.h +new file mode 100644 +index 0000000000000..1c5fc657e2675 +--- /dev/null ++++ b/include/net/netns/flow_table.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __NETNS_FLOW_TABLE_H ++#define __NETNS_FLOW_TABLE_H ++ ++struct nf_flow_table_stat { ++ unsigned int count_wq_add; ++ unsigned int count_wq_del; ++ unsigned int count_wq_stats; ++}; ++ ++struct netns_ft { ++ struct nf_flow_table_stat __percpu *stat; ++}; ++#endif +diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig +index 4f645d51c2573..f02ebe4609650 100644 +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -728,6 +728,15 @@ config NF_FLOW_TABLE + + To compile it as a module, choose M here. + ++config NF_FLOW_TABLE_PROCFS ++ bool "Supply flow table statistics in procfs" ++ default y ++ depends on PROC_FS ++ depends on SYSCTL ++ help ++ This option enables for the flow table offload statistics ++ to be shown in procfs under net/netfilter/nf_flowtable. ++ + config NETFILTER_XTABLES + tristate "Netfilter Xtables support (required for ip_tables)" + default m if NETFILTER_ADVANCED=n +diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile +index aab20e575ecd8..3f77f20ae39e4 100644 +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -124,6 +124,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o + obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o + nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ + nf_flow_table_offload.o ++nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o + + obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o + +diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c +index e78cdd73ef628..beb0e84b5f427 100644 +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -606,14 +606,74 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); + ++static int nf_flow_table_init_net(struct net *net) ++{ ++ net->ft.stat = alloc_percpu(struct nf_flow_table_stat); ++ return net->ft.stat ? 0 : -ENOMEM; ++} ++ ++static void nf_flow_table_fini_net(struct net *net) ++{ ++ free_percpu(net->ft.stat); ++} ++ ++static int nf_flow_table_pernet_init(struct net *net) ++{ ++ int ret; ++ ++ ret = nf_flow_table_init_net(net); ++ if (ret < 0) ++ return ret; ++ ++ ret = nf_flow_table_init_proc(net); ++ if (ret < 0) ++ goto out_proc; ++ ++ return 0; ++ ++out_proc: ++ nf_flow_table_fini_net(net); ++ return ret; ++} ++ ++static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) ++{ ++ struct net *net; ++ ++ list_for_each_entry(net, net_exit_list, exit_list) { ++ nf_flow_table_fini_proc(net); ++ nf_flow_table_fini_net(net); ++ } ++} ++ ++static struct pernet_operations nf_flow_table_net_ops = { ++ .init = nf_flow_table_pernet_init, ++ .exit_batch = nf_flow_table_pernet_exit, ++}; ++ + static int __init nf_flow_table_module_init(void) + { +- return nf_flow_table_offload_init(); ++ int ret; ++ ++ ret = register_pernet_subsys(&nf_flow_table_net_ops); ++ if (ret < 0) ++ return ret; ++ ++ ret = nf_flow_table_offload_init(); ++ if (ret) ++ goto out_offload; ++ ++ return 0; ++ ++out_offload: ++ unregister_pernet_subsys(&nf_flow_table_net_ops); ++ return ret; + } + + static void __exit nf_flow_table_module_exit(void) + { + nf_flow_table_offload_exit(); ++ unregister_pernet_subsys(&nf_flow_table_net_ops); + } + + module_init(nf_flow_table_module_init); +diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c +index 336f282a221fd..6ac1ebe17456d 100644 +--- a/net/netfilter/nf_flow_table_offload.c ++++ b/net/netfilter/nf_flow_table_offload.c +@@ -953,17 +953,22 @@ static void flow_offload_work_stats(struct flow_offload_work *offload) + static void flow_offload_work_handler(struct work_struct *work) + { + struct flow_offload_work *offload; ++ struct net *net; + + offload = container_of(work, struct flow_offload_work, work); ++ net = read_pnet(&offload->flowtable->net); + switch (offload->cmd) { + case FLOW_CLS_REPLACE: + flow_offload_work_add(offload); ++ NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add); + break; + case FLOW_CLS_DESTROY: + flow_offload_work_del(offload); ++ NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del); + break; + case FLOW_CLS_STATS: + flow_offload_work_stats(offload); ++ NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats); + break; + default: + WARN_ON_ONCE(1); +@@ -975,12 +980,18 @@ static void flow_offload_work_handler(struct work_struct *work) + + static void flow_offload_queue_work(struct flow_offload_work *offload) + { +- if (offload->cmd == FLOW_CLS_REPLACE) ++ struct net *net = read_pnet(&offload->flowtable->net); ++ ++ if (offload->cmd == FLOW_CLS_REPLACE) { ++ NF_FLOW_TABLE_STAT_INC(net, count_wq_add); + queue_work(nf_flow_offload_add_wq, &offload->work); +- else if (offload->cmd == FLOW_CLS_DESTROY) ++ } else if (offload->cmd == FLOW_CLS_DESTROY) { ++ NF_FLOW_TABLE_STAT_INC(net, count_wq_del); + queue_work(nf_flow_offload_del_wq, &offload->work); +- else ++ } else { ++ NF_FLOW_TABLE_STAT_INC(net, count_wq_stats); + queue_work(nf_flow_offload_stats_wq, &offload->work); ++ } + } + + static struct flow_offload_work * +diff --git a/net/netfilter/nf_flow_table_procfs.c b/net/netfilter/nf_flow_table_procfs.c +new file mode 100644 +index 0000000000000..159b033a43e60 +--- /dev/null ++++ b/net/netfilter/nf_flow_table_procfs.c +@@ -0,0 +1,80 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#include <linux/kernel.h> ++#include <linux/proc_fs.h> ++#include <net/netfilter/nf_flow_table.h> ++ ++static void *nf_flow_table_cpu_seq_start(struct seq_file *seq, loff_t *pos) ++{ ++ struct net *net = seq_file_net(seq); ++ int cpu; ++ ++ if (*pos == 0) ++ return SEQ_START_TOKEN; ++ ++ for (cpu = *pos - 1; cpu < nr_cpu_ids; ++cpu) { ++ if (!cpu_possible(cpu)) ++ continue; ++ *pos = cpu + 1; ++ return per_cpu_ptr(net->ft.stat, cpu); ++ } ++ ++ return NULL; ++} ++ ++static void *nf_flow_table_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ struct net *net = seq_file_net(seq); ++ int cpu; ++ ++ for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { ++ if (!cpu_possible(cpu)) ++ continue; ++ *pos = cpu + 1; ++ return per_cpu_ptr(net->ft.stat, cpu); ++ } ++ (*pos)++; ++ return NULL; ++} ++ ++static void nf_flow_table_cpu_seq_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static int nf_flow_table_cpu_seq_show(struct seq_file *seq, void *v) ++{ ++ const struct nf_flow_table_stat *st = v; ++ ++ if (v == SEQ_START_TOKEN) { ++ seq_puts(seq, "wq_add wq_del wq_stats\n"); ++ return 0; ++ } ++ ++ seq_printf(seq, "%8d %8d %8d\n", ++ st->count_wq_add, ++ st->count_wq_del, ++ st->count_wq_stats ++ ); ++ return 0; ++} ++ ++static const struct seq_operations nf_flow_table_cpu_seq_ops = { ++ .start = nf_flow_table_cpu_seq_start, ++ .next = nf_flow_table_cpu_seq_next, ++ .stop = nf_flow_table_cpu_seq_stop, ++ .show = nf_flow_table_cpu_seq_show, ++}; ++ ++int nf_flow_table_init_proc(struct net *net) ++{ ++ struct proc_dir_entry *pde; ++ ++ pde = proc_create_net("nf_flowtable", 0444, net->proc_net_stat, ++ &nf_flow_table_cpu_seq_ops, ++ sizeof(struct seq_net_private)); ++ return pde ? 0 : -ENOMEM; ++} ++ ++void nf_flow_table_fini_proc(struct net *net) ++{ ++ remove_proc_entry("nf_flowtable", net->proc_net_stat); ++} +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch b/queue-5.15/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch new file mode 100644 index 0000000000..579f3d0144 --- /dev/null +++ b/queue-5.15/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch @@ -0,0 +1,58 @@ +From 562f3715fca4d6bf9bd668065414d9c30f6f04fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sun, 7 Apr 2024 14:56:04 +0800 +Subject: netfilter: nf_tables: Fix potential data-race in + __nft_expr_type_get() + +From: Ziyang Xuan <william.xuanziyang@huawei.com> + +[ Upstream commit f969eb84ce482331a991079ab7a5c4dc3b7f89bf ] + +nft_unregister_expr() can concurrent with __nft_expr_type_get(), +and there is not any protection when iterate over nf_tables_expressions +list in __nft_expr_type_get(). Therefore, there is potential data-race +of nf_tables_expressions list entry. + +Use list_for_each_entry_rcu() to iterate over nf_tables_expressions +list in __nft_expr_type_get(), and use rcu_read_lock() in the caller +nft_expr_type_get() to protect the entire type query process. + +Fixes: ef1f7df9170d ("netfilter: nf_tables: expression ops overloading") +Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_tables_api.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 113c1ebe4a5be..d0712553d2b06 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -2821,7 +2821,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, + { + const struct nft_expr_type *type, *candidate = NULL; + +- list_for_each_entry(type, &nf_tables_expressions, list) { ++ list_for_each_entry_rcu(type, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, type->name)) { + if (!type->family && !candidate) + candidate = type; +@@ -2853,9 +2853,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, + if (nla == NULL) + return ERR_PTR(-EINVAL); + ++ rcu_read_lock(); + type = __nft_expr_type_get(family, nla); +- if (type != NULL && try_module_get(type->owner)) ++ if (type != NULL && try_module_get(type->owner)) { ++ rcu_read_unlock(); + return type; ++ } ++ rcu_read_unlock(); + + lockdep_nfnl_nft_mutex_not_held(); + #ifdef CONFIG_MODULES +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-29634 b/queue-5.15/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-29634 new file mode 100644 index 0000000000..21fc84c53a --- /dev/null +++ b/queue-5.15/netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-29634 @@ -0,0 +1,57 @@ +From 40a7d5760f30e56575607c43c5dcb1613c8eab86 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sun, 7 Apr 2024 14:56:05 +0800 +Subject: netfilter: nf_tables: Fix potential data-race in __nft_obj_type_get() + +From: Ziyang Xuan <william.xuanziyang@huawei.com> + +[ Upstream commit d78d867dcea69c328db30df665be5be7d0148484 ] + +nft_unregister_obj() can concurrent with __nft_obj_type_get(), +and there is not any protection when iterate over nf_tables_objects +list in __nft_obj_type_get(). Therefore, there is potential data-race +of nf_tables_objects list entry. + +Use list_for_each_entry_rcu() to iterate over nf_tables_objects +list in __nft_obj_type_get(), and use rcu_read_lock() in the caller +nft_obj_type_get() to protect the entire type query process. + +Fixes: e50092404c1b ("netfilter: nf_tables: add stateful objects") +Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_tables_api.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index d0712553d2b06..3999b89793fce 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7045,7 +7045,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) + { + const struct nft_object_type *type; + +- list_for_each_entry(type, &nf_tables_objects, list) { ++ list_for_each_entry_rcu(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; +@@ -7061,9 +7061,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) + { + const struct nft_object_type *type; + ++ rcu_read_lock(); + type = __nft_obj_type_get(objtype, family); +- if (type != NULL && try_module_get(type->owner)) ++ if (type != NULL && try_module_get(type->owner)) { ++ rcu_read_unlock(); + return type; ++ } ++ rcu_read_unlock(); + + lockdep_nfnl_nft_mutex_not_held(); + #ifdef CONFIG_MODULES +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nft_set_pipapo-do-not-free-live-element.patch b/queue-5.15/netfilter-nft_set_pipapo-do-not-free-live-element.patch new file mode 100644 index 0000000000..ed3cc7eed4 --- /dev/null +++ b/queue-5.15/netfilter-nft_set_pipapo-do-not-free-live-element.patch @@ -0,0 +1,105 @@ +From 5f490cf6d9e0835af4a3d19b8192979d36f0404e Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Wed, 10 Apr 2024 21:05:13 +0200 +Subject: netfilter: nft_set_pipapo: do not free live element + +From: Florian Westphal <fw@strlen.de> + +[ Upstream commit 3cfc9ec039af60dbd8965ae085b2c2ccdcfbe1cc ] + +Pablo reports a crash with large batches of elements with a +back-to-back add/remove pattern. Quoting Pablo: + + add_elem("00000000") timeout 100 ms + ... + add_elem("0000000X") timeout 100 ms + del_elem("0000000X") <---------------- delete one that was just added + ... + add_elem("00005000") timeout 100 ms + + 1) nft_pipapo_remove() removes element 0000000X + Then, KASAN shows a splat. + +Looking at the remove function there is a chance that we will drop a +rule that maps to a non-deactivated element. + +Removal happens in two steps, first we do a lookup for key k and return the +to-be-removed element and mark it as inactive in the next generation. +Then, in a second step, the element gets removed from the set/map. + +The _remove function does not work correctly if we have more than one +element that share the same key. + +This can happen if we insert an element into a set when the set already +holds an element with same key, but the element mapping to the existing +key has timed out or is not active in the next generation. + +In such case its possible that removal will unmap the wrong element. +If this happens, we will leak the non-deactivated element, it becomes +unreachable. + +The element that got deactivated (and will be freed later) will +remain reachable in the set data structure, this can result in +a crash when such an element is retrieved during lookup (stale +pointer). + +Add a check that the fully matching key does in fact map to the element +that we have marked as inactive in the deactivation step. +If not, we need to continue searching. + +Add a bug/warn trap at the end of the function as well, the remove +function must not ever be called with an invisible/unreachable/non-existent +element. + +v2: avoid uneeded temporary variable (Stefano) + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Reported-by: Pablo Neira Ayuso <pablo@netfilter.org> +Reviewed-by: Stefano Brivio <sbrivio@redhat.com> +Signed-off-by: Florian Westphal <fw@strlen.de> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nft_set_pipapo.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 58eca26162735..2299ced939c47 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -1994,6 +1994,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, + rules_fx = rules_f0; + + nft_pipapo_for_each_field(f, i, m) { ++ bool last = i == m->field_count - 1; ++ + if (!pipapo_match_field(f, start, rules_fx, + match_start, match_end)) + break; +@@ -2006,16 +2008,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, + + match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); +- } + +- if (i == m->field_count) { +- priv->dirty = true; +- pipapo_drop(m, rulemap); +- return; ++ if (last && f->mt[rulemap[i].to].e == e) { ++ priv->dirty = true; ++ pipapo_drop(m, rulemap); ++ return; ++ } + } + + first_rule += rules_f0; + } ++ ++ WARN_ON_ONCE(1); /* elem_priv not found */ + } + + /** +-- +2.43.0 + diff --git a/queue-5.15/series b/queue-5.15/series index eb3855a64c..6652ca33f8 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -12,3 +12,15 @@ bpf-fix-ringbuf-memory-type-confusion-when-passing-to-helpers.patch kprobes-fix-possible-use-after-free-issue-on-kprobe-registration.patch revert-tracing-trigger-fix-to-return-error-if-failed-to-alloc-snapshot.patch revert-lockd-introduce-safe-async-lock-op.patch +netfilter-nf_tables-fix-potential-data-race-in-__nft.patch +netfilter-nf_tables-fix-potential-data-race-in-__nft.patch-29634 +netfilter-br_netfilter-skip-conntrack-input-hook-for.patch +netfilter-nft_set_pipapo-do-not-free-live-element.patch +netfilter-nf_flow_table-count-pending-offload-workqu.patch +netfilter-flowtable-validate-pppoe-header.patch +netfilter-flowtable-incorrect-pppoe-tuple.patch +af_unix-call-manage_oob-for-every-skb-in-unix_stream.patch +af_unix-don-t-peek-oob-data-without-msg_oob.patch +tun-limit-printing-rate-when-illegal-packet-received.patch +net-dsa-mt7530-fix-mirroring-frames-received-on-loca.patch +net-ethernet-ti-am65-cpsw-nuss-cleanup-dma-channels-.patch diff --git a/queue-5.15/tun-limit-printing-rate-when-illegal-packet-received.patch b/queue-5.15/tun-limit-printing-rate-when-illegal-packet-received.patch new file mode 100644 index 0000000000..38876fa891 --- /dev/null +++ b/queue-5.15/tun-limit-printing-rate-when-illegal-packet-received.patch @@ -0,0 +1,91 @@ +From a1c1bc74f8e13c0caec746226863c1f5392f54f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin <sashal@kernel.org> +Date: Sun, 14 Apr 2024 22:02:46 -0400 +Subject: tun: limit printing rate when illegal packet received by tun dev + +From: Lei Chen <lei.chen@smartx.com> + +[ Upstream commit f8bbc07ac535593139c875ffa19af924b1084540 ] + +vhost_worker will call tun call backs to receive packets. If too many +illegal packets arrives, tun_do_read will keep dumping packet contents. +When console is enabled, it will costs much more cpu time to dump +packet and soft lockup will be detected. + +net_ratelimit mechanism can be used to limit the dumping rate. + +PID: 33036 TASK: ffff949da6f20000 CPU: 23 COMMAND: "vhost-32980" + #0 [fffffe00003fce50] crash_nmi_callback at ffffffff89249253 + #1 [fffffe00003fce58] nmi_handle at ffffffff89225fa3 + #2 [fffffe00003fceb0] default_do_nmi at ffffffff8922642e + #3 [fffffe00003fced0] do_nmi at ffffffff8922660d + #4 [fffffe00003fcef0] end_repeat_nmi at ffffffff89c01663 + [exception RIP: io_serial_in+20] + RIP: ffffffff89792594 RSP: ffffa655314979e8 RFLAGS: 00000002 + RAX: ffffffff89792500 RBX: ffffffff8af428a0 RCX: 0000000000000000 + RDX: 00000000000003fd RSI: 0000000000000005 RDI: ffffffff8af428a0 + RBP: 0000000000002710 R8: 0000000000000004 R9: 000000000000000f + R10: 0000000000000000 R11: ffffffff8acbf64f R12: 0000000000000020 + R13: ffffffff8acbf698 R14: 0000000000000058 R15: 0000000000000000 + ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + #5 [ffffa655314979e8] io_serial_in at ffffffff89792594 + #6 [ffffa655314979e8] wait_for_xmitr at ffffffff89793470 + #7 [ffffa65531497a08] serial8250_console_putchar at ffffffff897934f6 + #8 [ffffa65531497a20] uart_console_write at ffffffff8978b605 + #9 [ffffa65531497a48] serial8250_console_write at ffffffff89796558 + #10 [ffffa65531497ac8] console_unlock at ffffffff89316124 + #11 [ffffa65531497b10] vprintk_emit at ffffffff89317c07 + #12 [ffffa65531497b68] printk at ffffffff89318306 + #13 [ffffa65531497bc8] print_hex_dump at ffffffff89650765 + #14 [ffffa65531497ca8] tun_do_read at ffffffffc0b06c27 [tun] + #15 [ffffa65531497d38] tun_recvmsg at ffffffffc0b06e34 [tun] + #16 [ffffa65531497d68] handle_rx at ffffffffc0c5d682 [vhost_net] + #17 [ffffa65531497ed0] vhost_worker at ffffffffc0c644dc [vhost] + #18 [ffffa65531497f10] kthread at ffffffff892d2e72 + #19 [ffffa65531497f50] ret_from_fork at ffffffff89c0022f + +Fixes: ef3db4a59542 ("tun: avoid BUG, dump packet on GSO errors") +Signed-off-by: Lei Chen <lei.chen@smartx.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Acked-by: Jason Wang <jasowang@redhat.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Acked-by: Michael S. Tsirkin <mst@redhat.com> +Link: https://lore.kernel.org/r/20240415020247.2207781-1-lei.chen@smartx.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + drivers/net/tun.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/tun.c b/drivers/net/tun.c +index 42bf0a3ec632e..f0e34b2b072ee 100644 +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -2096,14 +2096,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, + tun_is_little_endian(tun), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); +- pr_err("unexpected GSO type: " +- "0x%x, gso_size %d, hdr_len %d\n", +- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), +- tun16_to_cpu(tun, gso.hdr_len)); +- print_hex_dump(KERN_ERR, "tun: ", +- DUMP_PREFIX_NONE, +- 16, 1, skb->head, +- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); ++ ++ if (net_ratelimit()) { ++ netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", ++ sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), ++ tun16_to_cpu(tun, gso.hdr_len)); ++ print_hex_dump(KERN_ERR, "tun: ", ++ DUMP_PREFIX_NONE, ++ 16, 1, skb->head, ++ min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); ++ } + WARN_ON_ONCE(1); + return -EINVAL; + } +-- +2.43.0 + |