aboutsummaryrefslogtreecommitdiffstats
path: root/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch
diff options
context:
space:
mode:
Diffstat (limited to 'queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch')
-rw-r--r--queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch220
1 files changed, 220 insertions, 0 deletions
diff --git a/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch b/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch
new file mode 100644
index 0000000000..a4640b1744
--- /dev/null
+++ b/queue-5.15/netfilter-br_netfilter-skip-conntrack-input-hook-for.patch
@@ -0,0 +1,220 @@
+From ad062a1727c18b732542d3f31753e5ba94e5c2ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 11:24:59 +0200
+Subject: netfilter: br_netfilter: skip conntrack input hook for promisc
+ packets
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 751de2012eafa4d46d8081056761fa0e9cc8a178 ]
+
+For historical reasons, when bridge device is in promisc mode, packets
+that are directed to the taps follow bridge input hook path. This patch
+adds a workaround to reset conntrack for these packets.
+
+Jianbo Liu reports warning splats in their test infrastructure where
+cloned packets reach the br_netfilter input hook to confirm the
+conntrack object.
+
+Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has
+reached the input hook because it is passed up to the bridge device to
+reach the taps.
+
+[ 57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter]
+[ 57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core
+[ 57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19
+[ 57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[ 57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter]
+[ 57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1
+[ 57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202
+[ 57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000
+[ 57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000
+[ 57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003
+[ 57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000
+[ 57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800
+[ 57.582313] FS: 0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000
+[ 57.583040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0
+[ 57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
+0000000000000000
+[ 57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
+0000000000000400
+[ 57.585440] Call Trace:
+[ 57.585721] <IRQ>
+[ 57.585976] ? __warn+0x7d/0x130
+[ 57.586323] ? br_nf_local_in+0x157/0x180 [br_netfilter]
+[ 57.586811] ? report_bug+0xf1/0x1c0
+[ 57.587177] ? handle_bug+0x3f/0x70
+[ 57.587539] ? exc_invalid_op+0x13/0x60
+[ 57.587929] ? asm_exc_invalid_op+0x16/0x20
+[ 57.588336] ? br_nf_local_in+0x157/0x180 [br_netfilter]
+[ 57.588825] nf_hook_slow+0x3d/0xd0
+[ 57.589188] ? br_handle_vlan+0x4b/0x110
+[ 57.589579] br_pass_frame_up+0xfc/0x150
+[ 57.589970] ? br_port_flags_change+0x40/0x40
+[ 57.590396] br_handle_frame_finish+0x346/0x5e0
+[ 57.590837] ? ipt_do_table+0x32e/0x430
+[ 57.591221] ? br_handle_local_finish+0x20/0x20
+[ 57.591656] br_nf_hook_thresh+0x4b/0xf0 [br_netfilter]
+[ 57.592286] ? br_handle_local_finish+0x20/0x20
+[ 57.592802] br_nf_pre_routing_finish+0x178/0x480 [br_netfilter]
+[ 57.593348] ? br_handle_local_finish+0x20/0x20
+[ 57.593782] ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat]
+[ 57.594279] br_nf_pre_routing+0x24c/0x550 [br_netfilter]
+[ 57.594780] ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter]
+[ 57.595280] br_handle_frame+0x1f3/0x3d0
+[ 57.595676] ? br_handle_local_finish+0x20/0x20
+[ 57.596118] ? br_handle_frame_finish+0x5e0/0x5e0
+[ 57.596566] __netif_receive_skb_core+0x25b/0xfc0
+[ 57.597017] ? __napi_build_skb+0x37/0x40
+[ 57.597418] __netif_receive_skb_list_core+0xfb/0x220
+
+Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack")
+Reported-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_input.c | 15 +++++++++++----
+ net/bridge/br_netfilter_hooks.c | 6 ++++++
+ net/bridge/br_private.h | 1 +
+ net/bridge/netfilter/nf_conntrack_bridge.c | 14 ++++++++++----
+ 4 files changed, 28 insertions(+), 8 deletions(-)
+
+diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
+index 54bfcdf692732..f3d49343f7dbe 100644
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
+ return netif_receive_skb(skb);
+ }
+
+-static int br_pass_frame_up(struct sk_buff *skb)
++static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
+ {
+ struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+ struct net_bridge *br = netdev_priv(brdev);
+@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
+ br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
+ BR_MCAST_DIR_TX);
+
++ BR_INPUT_SKB_CB(skb)->promisc = promisc;
++
+ return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+ dev_net(indev), NULL, skb, indev, NULL,
+ br_netif_receive_skb);
+@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_vlan *vlan;
+ struct net_bridge *br;
++ bool promisc;
+ u16 vid = 0;
+ u8 state;
+
+@@ -102,7 +105,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
+ if (p->flags & BR_LEARNING)
+ br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
+
+- local_rcv = !!(br->dev->flags & IFF_PROMISC);
++ promisc = !!(br->dev->flags & IFF_PROMISC);
++ local_rcv = promisc;
++
+ if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
+ /* by definition the broadcast is also a multicast address */
+ if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
+@@ -165,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
+ unsigned long now = jiffies;
+
+ if (test_bit(BR_FDB_LOCAL, &dst->flags))
+- return br_pass_frame_up(skb);
++ return br_pass_frame_up(skb, false);
+
+ if (now != dst->used)
+ dst->used = now;
+@@ -178,7 +183,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
+ }
+
+ if (local_rcv)
+- return br_pass_frame_up(skb);
++ return br_pass_frame_up(skb, promisc);
+
+ out:
+ return 0;
+@@ -350,6 +355,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
+ goto forward;
+ }
+
++ BR_INPUT_SKB_CB(skb)->promisc = false;
++
+ /* The else clause should be hit when nf_hook():
+ * - returns < 0 (drop/error)
+ * - returns = 0 (stolen/nf_queue)
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index 8a114a5000466..9981e0dfdd4d3 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -584,11 +584,17 @@ static unsigned int br_nf_local_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
+ struct nf_conntrack *nfct = skb_nfct(skb);
+ const struct nf_ct_hook *ct_hook;
+ struct nf_conn *ct;
+ int ret;
+
++ if (promisc) {
++ nf_reset_ct(skb);
++ return NF_ACCEPT;
++ }
++
+ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index ff10ddeeb50ff..fe61d3b8d0cc2 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -547,6 +547,7 @@ struct br_input_skb_cb {
+ #endif
+ u8 proxyarp_replied:1;
+ u8 src_port_isolated:1;
++ u8 promisc:1;
+ #ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ u8 vlan_filtered:1;
+ #endif
+diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
+index 83743e95939b1..fbdb1ad448c3a 100644
+--- a/net/bridge/netfilter/nf_conntrack_bridge.c
++++ b/net/bridge/netfilter/nf_conntrack_bridge.c
+@@ -293,18 +293,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
+ static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
+- enum ip_conntrack_info ctinfo;
++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
++ struct nf_conntrack *nfct = skb_nfct(skb);
+ struct nf_conn *ct;
+
+- if (skb->pkt_type == PACKET_HOST)
++ if (promisc) {
++ nf_reset_ct(skb);
++ return NF_ACCEPT;
++ }
++
++ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ /* nf_conntrack_confirm() cannot handle concurrent clones,
+ * this happens for broad/multicast frames with e.g. macvlan on top
+ * of the bridge device.
+ */
+- ct = nf_ct_get(skb, &ctinfo);
+- if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
++ ct = container_of(nfct, struct nf_conn, ct_general);
++ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ return NF_ACCEPT;
+
+ /* let inet prerouting call conntrack again */
+--
+2.43.0
+