aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-10 12:37:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-10 12:37:24 -0700
commit25aa0bebba72b318e71fe205bfd1236550cc9534 (patch)
treefd6740b6f837c4faf52102d24fc492a36edbb8a8
parent30813656c6b827947be024484d6da8b18e50c186 (diff)
parent5e3d20617b055e725e785e0058426368269949f3 (diff)
downloadlinux-25aa0bebba72b318e71fe205bfd1236550cc9534.tar.gz
Merge tag 'net-6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from netfilter, wireless and bpf. Still trending up in size but the good news is that the "current" regressions are resolved, AFAIK. We're getting weirdly many fixes for Wake-on-LAN and suspend/resume handling on embedded this week (most not merged yet), not sure why. But those are all for older bugs. Current release - regressions: - tls: set MSG_SPLICE_PAGES consistently when handing encrypted data over to TCP Current release - new code bugs: - eth: mlx5: correct IDs on VFs internal to the device (IPU) Previous releases - regressions: - phy: at803x: fix WoL support / reporting on AR8032 - bonding: fix incorrect deletion of ETH_P_8021AD protocol VID from slaves, leading to BUG_ON() - tun: prevent tun_build_skb() from exceeding the packet size limit - wifi: rtw89: fix 8852AE disconnection caused by RX full flags - eth/PCI: enetc: fix probing after 6fffbc7ae137 ("PCI: Honor firmware's device disabled status"), keep PCI devices around even if they are disabled / not going to be probed to be able to apply quirks on them - eth: prestera: fix handling IPv4 routes with nexthop IDs Previous releases - always broken: - netfilter: re-work garbage collection to avoid races between user-facing API and timeouts - tunnels: fix generating ipv4 PMTU error on non-linear skbs - nexthop: fix infinite nexthop bucket dump when using maximum nexthop ID - wifi: nl80211: fix integer overflow in nl80211_parse_mbssid_elems() Misc: - unix: use consistent error code in SO_PEERPIDFD - ipv6: adjust ndisc_is_useropt() to include PREFIX_INFO, in prep for upcoming IETF RFC" * tag 'net-6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (94 commits) net: hns3: fix strscpy causing content truncation issue net: tls: set MSG_SPLICE_PAGES consistently ibmvnic: Ensure login failure recovery is safe from other resets ibmvnic: Do partial reset on login failure ibmvnic: Handle DMA unmapping of login buffs in release functions ibmvnic: Unmap DMA login rsp buffer on send login fail ibmvnic: Enforce stronger sanity checks on login response net: mana: Fix MANA VF unload when hardware is unresponsive netfilter: nf_tables: remove busy mark and gc batch API netfilter: nft_set_hash: mark set element as dead when deleting from packet path netfilter: nf_tables: adapt set backend to use GC transaction API netfilter: nf_tables: GC transaction API to avoid race with control plane selftests/bpf: Add sockmap test for redirecting partial skb data selftests/bpf: fix a CI failure caused by vsock sockmap test bpf, sockmap: Fix bug that strp_done cannot be called bpf, sockmap: Fix map type error in sock_map_del_link xsk: fix refcount underflow in error path ipv6: adjust ndisc_is_useropt() to also return true for PIO selftests: forwarding: bridge_mdb: Make test more robust selftests: forwarding: bridge_mdb_max: Fix failing test with old libnet ...
-rw-r--r--MAINTAINERS27
-rw-r--r--drivers/isdn/mISDN/dsp.h2
-rw-r--r--drivers/isdn/mISDN/dsp_cmx.c2
-rw-r--r--drivers/isdn/mISDN/dsp_core.c2
-rw-r--r--drivers/net/bonding/bond_main.c4
-rw-r--r--drivers/net/dsa/ocelot/felix.c2
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c111
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c29
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h1
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c112
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c5
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_fdir.c11
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h4
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c34
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c2
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c37
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c23
-rw-r--r--drivers/net/macsec.c28
-rw-r--r--drivers/net/phy/at803x.c47
-rw-r--r--drivers/net/tun.c2
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c11
-rw-r--r--drivers/net/wireguard/allowedips.c8
-rw-r--r--drivers/net/wireguard/selftest/allowedips.c16
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c3
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c5
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.c2
-rw-r--r--drivers/pci/bus.c4
-rw-r--r--drivers/pci/of.c5
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/net/cfg80211.h3
-rw-r--r--include/net/netfilter/nf_tables.h120
-rw-r--r--include/trace/events/tcp.h5
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/skmsg.c10
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sock_map.c10
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c10
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/nexthop.c28
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/mptcp/protocol.c4
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/subflow.c58
-rw-r--r--net/netfilter/nf_tables_api.c307
-rw-r--r--net/netfilter/nft_set_hash.c85
-rw-r--r--net/netfilter/nft_set_pipapo.c66
-rw-r--r--net/netfilter/nft_set_rbtree.c146
-rw-r--r--net/packet/af_packet.c16
-rw-r--r--net/smc/af_smc.c77
-rw-r--r--net/smc/smc.h2
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_core.c25
-rw-r--r--net/smc/smc_sysctl.c10
-rw-r--r--net/tls/tls_device.c64
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/wireless/nl80211.c5
-rw-r--r--net/xdp/xsk.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c74
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c14
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh59
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_mm.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh17
-rw-r--r--tools/testing/selftests/net/forwarding/settings1
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh13
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh9
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh6
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh35
92 files changed, 1431 insertions, 692 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 0f966f05fb0d4..abfc724a2367b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2339,7 +2339,7 @@ F: drivers/phy/mediatek/
ARM/MICROCHIP (ARM64) SoC support
M: Conor Dooley <conor@kernel.org>
M: Nicolas Ferre <nicolas.ferre@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
T: git https://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
@@ -2348,7 +2348,7 @@ F: arch/arm64/boot/dts/microchip/
ARM/Microchip (AT91) SoC support
M: Nicolas Ferre <nicolas.ferre@microchip.com>
M: Alexandre Belloni <alexandre.belloni@bootlin.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
W: http://www.linux4sam.org
@@ -3250,7 +3250,7 @@ F: include/uapi/linux/atm*
ATMEL MACB ETHERNET DRIVER
M: Nicolas Ferre <nicolas.ferre@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
S: Supported
F: drivers/net/ethernet/cadence/
@@ -13786,7 +13786,7 @@ F: Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
F: drivers/spi/spi-at91-usart.c
MICROCHIP AUDIO ASOC DRIVERS
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/sound/atmel*
@@ -13809,7 +13809,7 @@ S: Maintained
F: drivers/crypto/atmel-ecc.*
MICROCHIP EIC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/interrupt-controller/microchip,sama7g5-eic.yaml
@@ -13882,7 +13882,7 @@ F: drivers/video/fbdev/atmel_lcdfb.c
F: include/video/atmel_lcdc.h
MICROCHIP MCP16502 PMIC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
@@ -13909,7 +13909,7 @@ F: Documentation/devicetree/bindings/mtd/atmel-nand.txt
F: drivers/mtd/nand/raw/atmel/*
MICROCHIP OTPC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml
@@ -13948,7 +13948,7 @@ F: Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
F: drivers/fpga/microchip-spi.c
MICROCHIP PWM DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-pwm@vger.kernel.org
S: Supported
@@ -13964,7 +13964,7 @@ F: drivers/iio/adc/at91-sama5d2_adc.c
F: include/dt-bindings/iio/adc/at91-sama5d2_adc.h
MICROCHIP SAMA5D2-COMPATIBLE SHUTDOWN CONTROLLER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
S: Supported
F: Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml
F: drivers/power/reset/at91-sama5d2_shdwc.c
@@ -13981,7 +13981,7 @@ S: Supported
F: drivers/spi/spi-atmel.*
MICROCHIP SSC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -14010,7 +14010,7 @@ F: drivers/usb/gadget/udc/atmel_usba_udc.*
MICROCHIP WILC1000 WIFI DRIVER
M: Ajay Singh <ajay.kathat@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-wireless@vger.kernel.org
S: Supported
F: drivers/net/wireless/microchip/wilc1000/
@@ -18508,17 +18508,14 @@ RTL8180 WIRELESS DRIVER
L: linux-wireless@vger.kernel.org
S: Orphan
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
F: drivers/net/wireless/realtek/rtl818x/rtl8180/
RTL8187 WIRELESS DRIVER
-M: Herton Ronaldo Krzesinski <herton@canonical.com>
-M: Hin-Tak Leung <htl10@users.sourceforge.net>
+M: Hin-Tak Leung <hintak.leung@gmail.com>
M: Larry Finger <Larry.Finger@lwfinger.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
F: drivers/net/wireless/realtek/rtl818x/rtl8187/
RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
index fa09d511a8eda..baf31258f5c90 100644
--- a/drivers/isdn/mISDN/dsp.h
+++ b/drivers/isdn/mISDN/dsp.h
@@ -247,7 +247,7 @@ extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_send(void *arg);
+extern void dsp_cmx_send(struct timer_list *arg);
extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
extern int dsp_cmx_del_conf_member(struct dsp *dsp);
extern int dsp_cmx_del_conf(struct dsp_conf *conf);
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
index 357b87592eb48..61cb45c5d0d84 100644
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -1614,7 +1614,7 @@ static u16 dsp_count; /* last sample count */
static int dsp_count_valid; /* if we have last sample count */
void
-dsp_cmx_send(void *arg)
+dsp_cmx_send(struct timer_list *arg)
{
struct dsp_conf *conf;
struct dsp_conf_member *member;
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 386084530c2f8..fae95f1666883 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -1195,7 +1195,7 @@ static int __init dsp_init(void)
}
/* set sample timer */
- timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
+ timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
dsp_spl_tl.expires = jiffies + dsp_tics;
dsp_spl_jiffies = dsp_spl_tl.expires;
add_timer(&dsp_spl_tl);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 484c9e3e5e825..447b06ea4fc9c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5901,7 +5901,9 @@ void bond_setup(struct net_device *bond_dev)
bond_dev->hw_features = BOND_VLAN_FEATURES |
NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER;
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
bond_dev->features |= bond_dev->hw_features;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 8da46d284e356..bef879c6d500b 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1625,8 +1625,10 @@ static void felix_teardown(struct dsa_switch *ds)
struct felix *felix = ocelot_to_felix(ocelot);
struct dsa_port *dp;
+ rtnl_lock();
if (felix->tag_proto_ops)
felix->tag_proto_ops->teardown(ds);
+ rtnl_unlock();
dsa_switch_for_each_available_port(dp, ds)
ocelot_deinit_port(ocelot, dp->index);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 1416262d42967..e0a4cb7e3f501 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1186,14 +1186,9 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
{
- struct device_node *node = pdev->dev.of_node;
struct platform_device *ierb_pdev;
struct device_node *ierb_node;
- /* Don't register with the IERB if the PF itself is disabled */
- if (!node || !of_device_is_available(node))
- return 0;
-
ierb_node = of_find_compatible_node(NULL, NULL,
"fsl,ls1028a-enetc-ierb");
if (!ierb_node || !of_device_is_available(ierb_node))
@@ -1208,56 +1203,81 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
return enetc_ierb_register_pf(ierb_pdev, pdev);
}
-static int enetc_pf_probe(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static struct enetc_si *enetc_psi_create(struct pci_dev *pdev)
{
- struct device_node *node = pdev->dev.of_node;
- struct enetc_ndev_priv *priv;
- struct net_device *ndev;
struct enetc_si *si;
- struct enetc_pf *pf;
int err;
- err = enetc_pf_register_with_ierb(pdev);
- if (err == -EPROBE_DEFER)
- return err;
- if (err)
- dev_warn(&pdev->dev,
- "Could not register with IERB driver: %pe, please update the device tree\n",
- ERR_PTR(err));
-
- err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
- if (err)
- return dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+ err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf));
+ if (err) {
+ dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+ goto out;
+ }
si = pci_get_drvdata(pdev);
if (!si->hw.port || !si->hw.global) {
err = -ENODEV;
dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
- goto err_map_pf_space;
+ goto out_pci_remove;
}
err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
&si->cbd_ring);
if (err)
- goto err_setup_cbdr;
+ goto out_pci_remove;
err = enetc_init_port_rfs_memory(si);
if (err) {
dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
- goto err_init_port_rfs;
+ goto out_teardown_cbdr;
}
err = enetc_init_port_rss_memory(si);
if (err) {
dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
- goto err_init_port_rss;
+ goto out_teardown_cbdr;
}
- if (node && !of_device_is_available(node)) {
- dev_info(&pdev->dev, "device is disabled, skipping\n");
- err = -ENODEV;
- goto err_device_disabled;
+ return si;
+
+out_teardown_cbdr:
+ enetc_teardown_cbdr(&si->cbd_ring);
+out_pci_remove:
+ enetc_pci_remove(pdev);
+out:
+ return ERR_PTR(err);
+}
+
+static void enetc_psi_destroy(struct pci_dev *pdev)
+{
+ struct enetc_si *si = pci_get_drvdata(pdev);
+
+ enetc_teardown_cbdr(&si->cbd_ring);
+ enetc_pci_remove(pdev);
+}
+
+static int enetc_pf_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct enetc_ndev_priv *priv;
+ struct net_device *ndev;
+ struct enetc_si *si;
+ struct enetc_pf *pf;
+ int err;
+
+ err = enetc_pf_register_with_ierb(pdev);
+ if (err == -EPROBE_DEFER)
+ return err;
+ if (err)
+ dev_warn(&pdev->dev,
+ "Could not register with IERB driver: %pe, please update the device tree\n",
+ ERR_PTR(err));
+
+ si = enetc_psi_create(pdev);
+ if (IS_ERR(si)) {
+ err = PTR_ERR(si);
+ goto err_psi_create;
}
pf = enetc_si_priv(si);
@@ -1339,15 +1359,9 @@ err_alloc_si_res:
si->ndev = NULL;
free_netdev(ndev);
err_alloc_netdev:
-err_init_port_rss:
-err_init_port_rfs:
-err_device_disabled:
err_setup_mac_addresses:
- enetc_teardown_cbdr(&si->cbd_ring);
-err_setup_cbdr:
-err_map_pf_space:
- enetc_pci_remove(pdev);
-
+ enetc_psi_destroy(pdev);
+err_psi_create:
return err;
}
@@ -1370,12 +1384,29 @@ static void enetc_pf_remove(struct pci_dev *pdev)
enetc_free_msix(priv);
enetc_free_si_resources(priv);
- enetc_teardown_cbdr(&si->cbd_ring);
free_netdev(si->ndev);
- enetc_pci_remove(pdev);
+ enetc_psi_destroy(pdev);
+}
+
+static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct enetc_si *si;
+
+ /* Only apply quirk for disabled functions. For the ones
+ * that are enabled, enetc_pf_probe() will apply it.
+ */
+ if (node && of_device_is_available(node))
+ return;
+
+ si = enetc_psi_create(pdev);
+ if (si)
+ enetc_psi_destroy(pdev);
}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
+ enetc_fixup_clear_rss_rfs);
static const struct pci_device_id enetc_pf_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 52546f625c8b0..f276b5ecb431f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -464,9 +464,9 @@ static void hns3_dbg_fill_content(char *content, u16 len,
if (result) {
if (item_len < strlen(result[i]))
break;
- strscpy(pos, result[i], strlen(result[i]));
+ memcpy(pos, result[i], strlen(result[i]));
} else {
- strscpy(pos, items[i].name, strlen(items[i].name));
+ memcpy(pos, items[i].name, strlen(items[i].name));
}
pos += item_len;
len -= item_len;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 9f6890059666e..b7b51e56b0308 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running)
if (!if_running)
return;
+ if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+ return;
+
netif_carrier_off(ndev);
netif_tx_disable(ndev);
@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running)
if (!if_running)
return;
- hns3_nic_reset_all_ring(priv->ae_handle);
+ if (hns3_nic_resetting(ndev))
+ return;
+
+ if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+ return;
+
+ if (hns3_nic_reset_all_ring(priv->ae_handle))
+ return;
+
+ clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
for (i = 0; i < priv->vector_num; i++)
hns3_vector_enable(&priv->tqp_vector[i]);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 409db2e709651..0fb2eaee3e8a0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -111,9 +111,9 @@ static void hclge_dbg_fill_content(char *content, u16 len,
if (result) {
if (item_len < strlen(result[i]))
break;
- strscpy(pos, result[i], strlen(result[i]));
+ memcpy(pos, result[i], strlen(result[i]));
} else {
- strscpy(pos, items[i].name, strlen(items[i].name));
+ memcpy(pos, items[i].name, strlen(items[i].name));
}
pos += item_len;
len -= item_len;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bf675c15fbb9c..a940e35aef29d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev);
static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
static void hclge_sync_fd_table(struct hclge_dev *hdev);
static void hclge_update_fec_stats(struct hclge_dev *hdev);
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+ int wait_cnt);
static struct hnae3_ae_algo ae_algo;
@@ -7558,6 +7560,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
{
+#define HCLGE_LINK_STATUS_WAIT_CNT 3
+
struct hclge_desc desc;
struct hclge_config_mac_mode_cmd *req =
(struct hclge_config_mac_mode_cmd *)desc.data;
@@ -7582,9 +7586,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- if (ret)
+ if (ret) {
dev_err(&hdev->pdev->dev,
"mac enable fail, ret =%d.\n", ret);
+ return;
+ }
+
+ if (!enable)
+ hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
+ HCLGE_LINK_STATUS_WAIT_CNT);
}
static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
@@ -7647,10 +7657,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
} while (++i < HCLGE_PHY_LINK_STATUS_NUM);
}
-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+ int wait_cnt)
{
-#define HCLGE_MAC_LINK_STATUS_NUM 100
-
int link_status;
int i = 0;
int ret;
@@ -7663,13 +7672,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
return 0;
msleep(HCLGE_LINK_STATUS_MS);
- } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
+ } while (++i < wait_cnt);
return -EBUSY;
}
static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
bool is_phy)
{
+#define HCLGE_MAC_LINK_STATUS_NUM 100
+
int link_ret;
link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
@@ -7677,7 +7688,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
if (is_phy)
hclge_phy_link_status_wait(hdev, link_ret);
- return hclge_mac_link_status_wait(hdev, link_ret);
+ return hclge_mac_link_status_wait(hdev, link_ret,
+ HCLGE_MAC_LINK_STATUS_NUM);
}
static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
@@ -10915,9 +10927,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
u32 rx_pause, tx_pause;
u8 flowctl;
- if (!phydev->link || !phydev->autoneg)
+ if (!phydev->link)
return 0;
+ if (!phydev->autoneg)
+ return hclge_mac_pause_setup_hw(hdev);
+
local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
if (phydev->pause)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index de509e5751a7c..c58c312217628 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1553,7 +1553,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
return 0;
}
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
{
bool tx_en, rx_en;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 45dcfef3f90cc..53eec6df51946 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -245,6 +245,7 @@ int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
u8 pfc_bitmap);
int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 763d613adbcc0..df76cdaddcfb0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *,
static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
struct ibmvnic_sub_crq_queue *);
static int ibmvnic_poll(struct napi_struct *napi, int data);
+static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
static void send_query_map(struct ibmvnic_adapter *adapter);
static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
static int send_request_unmap(struct ibmvnic_adapter *, u8);
@@ -114,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
static void free_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb);
static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+static void flush_reset_queue(struct ibmvnic_adapter *adapter);
struct ibmvnic_stat {
char name[ETH_GSTRING_LEN];
@@ -1505,8 +1508,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
static int ibmvnic_login(struct net_device *netdev)
{
+ unsigned long flags, timeout = msecs_to_jiffies(20000);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- unsigned long timeout = msecs_to_jiffies(20000);
int retry_count = 0;
int retries = 10;
bool retry;
@@ -1527,11 +1530,9 @@ static int ibmvnic_login(struct net_device *netdev)
if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
- netdev_warn(netdev, "Login timed out, retrying...\n");
- retry = true;
- adapter->init_done_rc = 0;
- retry_count++;
- continue;
+ netdev_warn(netdev, "Login timed out\n");
+ adapter->login_pending = false;
+ goto partial_reset;
}
if (adapter->init_done_rc == ABORTED) {
@@ -1573,10 +1574,69 @@ static int ibmvnic_login(struct net_device *netdev)
"SCRQ irq initialization failed\n");
return rc;
}
+ /* Default/timeout error handling, reset and start fresh */
} else if (adapter->init_done_rc) {
netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
adapter->init_done_rc);
- return -EIO;
+
+partial_reset:
+ /* adapter login failed, so free any CRQs or sub-CRQs
+ * and register again before attempting to login again.
+ * If we don't do this then the VIOS may think that
+ * we are already logged in and reject any subsequent
+ * attempts
+ */
+ netdev_warn(netdev,
+ "Freeing and re-registering CRQs before attempting to login again\n");
+ retry = true;
+ adapter->init_done_rc = 0;
+ release_sub_crqs(adapter, true);
+ /* Much of this is similar logic as ibmvnic_probe(),
+ * we are essentially re-initializing communication
+ * with the server. We really should not run any
+ * resets/failovers here because this is already a form
+ * of reset and we do not want parallel resets occurring
+ */
+ do {
+ reinit_init_done(adapter);
+ /* Clear any failovers we got in the previous
+ * pass since we are re-initializing the CRQ
+ */
+ adapter->failover_pending = false;
+ release_crq_queue(adapter);
+ /* If we don't sleep here then we risk an
+ * unnecessary failover event from the VIOS.
+ * This is a known VIOS issue caused by a vnic
+ * device freeing and registering a CRQ too
+ * quickly.
+ */
+ msleep(1500);
+ /* Avoid any resets, since we are currently
+ * resetting.
+ */
+ spin_lock_irqsave(&adapter->rwi_lock, flags);
+ flush_reset_queue(adapter);
+ spin_unlock_irqrestore(&adapter->rwi_lock,
+ flags);
+
+ rc = init_crq_queue(adapter);
+ if (rc) {
+ netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+ rc);
+ return -EIO;
+ }
+
+ rc = ibmvnic_reset_init(adapter, false);
+ if (rc)
+ netdev_err(netdev, "login recovery: Reset init failed %d\n",
+ rc);
+ /* IBMVNIC_CRQ_INIT will return EAGAIN if it
+ * fails, since ibmvnic_reset_init will free
+ * irq's in failure, we won't be able to receive
+ * new CRQs so we need to keep trying. probe()
+ * handles this similarly.
+ */
+ } while (rc == -EAGAIN && retry_count++ < retries);
}
} while (retry);
@@ -1588,12 +1648,22 @@ static int ibmvnic_login(struct net_device *netdev)
static void release_login_buffer(struct ibmvnic_adapter *adapter)
{
+ if (!adapter->login_buf)
+ return;
+
+ dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
+ adapter->login_buf_sz, DMA_TO_DEVICE);
kfree(adapter->login_buf);
adapter->login_buf = NULL;
}
static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
{
+ if (!adapter->login_rsp_buf)
+ return;
+
+ dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
+ adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
kfree(adapter->login_rsp_buf);
adapter->login_rsp_buf = NULL;
}
@@ -4830,11 +4900,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
if (rc) {
adapter->login_pending = false;
netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
- goto buf_rsp_map_failed;
+ goto buf_send_failed;
}
return 0;
+buf_send_failed:
+ dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
+ DMA_FROM_DEVICE);
buf_rsp_map_failed:
kfree(login_rsp_buffer);
adapter->login_rsp_buf = NULL;
@@ -5396,6 +5469,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
int num_tx_pools;
int num_rx_pools;
u64 *size_array;
+ u32 rsp_len;
int i;
/* CHECK: Test/set of login_pending does not need to be atomic
@@ -5407,11 +5481,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
}
adapter->login_pending = false;
- dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
- DMA_TO_DEVICE);
- dma_unmap_single(dev, adapter->login_rsp_buf_token,
- adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
-
/* If the number of queues requested can't be allocated by the
* server, the login response will return with code 1. We will need
* to resend the login buffer with fewer queues requested.
@@ -5447,6 +5516,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
ibmvnic_reset(adapter, VNIC_RESET_FATAL);
return -EIO;
}
+
+ rsp_len = be32_to_cpu(login_rsp->len);
+ if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
+ rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
+ /* This can happen if a login request times out and there are
+ * 2 outstanding login requests sent, the LOGIN_RSP crq
+ * could have been for the older login request. So we are
+ * parsing the newer response buffer which may be incomplete
+ */
+ dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+ return -EIO;
+ }
+
size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
/* variable buffer sizes are not supported, so just read the
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 2f47cfa7f06e2..460ca561819a9 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -1401,14 +1401,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
if (fsp->flow_type & FLOW_MAC_EXT)
return -EINVAL;
+ spin_lock_bh(&adapter->fdir_fltr_lock);
if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
dev_err(&adapter->pdev->dev,
"Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
IAVF_MAX_FDIR_FILTERS);
return -ENOSPC;
}
- spin_lock_bh(&adapter->fdir_fltr_lock);
if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -1781,7 +1782,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
case ETHTOOL_GRXCLSRLCNT:
if (!FDIR_FLTR_SUPPORT(adapter))
break;
+ spin_lock_bh(&adapter->fdir_fltr_lock);
cmd->rule_cnt = adapter->fdir_active_fltr;
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
cmd->data = IAVF_MAX_FDIR_FILTERS;
ret = 0;
break;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index 6146203efd84a..505e82ebafe47 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -722,7 +722,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
{
struct iavf_fdir_fltr *tmp;
+ bool ret = false;
+ spin_lock_bh(&adapter->fdir_fltr_lock);
list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
if (tmp->flow_type != fltr->flow_type)
continue;
@@ -732,11 +734,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
!memcmp(&tmp->ip_data, &fltr->ip_data,
sizeof(fltr->ip_data)) &&
!memcmp(&tmp->ext_data, &fltr->ext_data,
- sizeof(fltr->ext_data)))
- return true;
+ sizeof(fltr->ext_data))) {
+ ret = true;
+ break;
+ }
}
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
- return false;
+ return ret;
}
/**
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 9db384f66a8ee..38901d2a46807 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -195,6 +195,10 @@ struct igc_adapter {
u32 qbv_config_change_errors;
bool qbv_transition;
unsigned int qbv_count;
+ /* Access to oper_gate_closed, admin_gate_closed and qbv_transition
+ * are protected by the qbv_tx_lock.
+ */
+ spinlock_t qbv_tx_lock;
/* OS defined structs */
struct pci_dev *pdev;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index bdeb36790d774..6f557e843e495 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4801,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
adapter->nfc_rule_count = 0;
spin_lock_init(&adapter->stats64_lock);
+ spin_lock_init(&adapter->qbv_tx_lock);
/* Assume MSI-X interrupts, will be checked during IRQ allocation */
adapter->flags |= IGC_FLAG_HAS_MSIX;
@@ -6119,15 +6120,15 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
return igc_tsn_offload_apply(adapter);
}
-static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+static int igc_qbv_clear_schedule(struct igc_adapter *adapter)
{
+ unsigned long flags;
int i;
adapter->base_time = 0;
adapter->cycle_time = NSEC_PER_SEC;
adapter->taprio_offload_enable = false;
adapter->qbv_config_change_errors = 0;
- adapter->qbv_transition = false;
adapter->qbv_count = 0;
for (i = 0; i < adapter->num_tx_queues; i++) {
@@ -6136,10 +6137,28 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
ring->start_time = 0;
ring->end_time = NSEC_PER_SEC;
ring->max_sdu = 0;
+ }
+
+ spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+ adapter->qbv_transition = false;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+
ring->oper_gate_closed = false;
ring->admin_gate_closed = false;
}
+ spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+ return 0;
+}
+
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+ igc_qbv_clear_schedule(adapter);
+
return 0;
}
@@ -6150,6 +6169,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
struct igc_hw *hw = &adapter->hw;
u32 start_time = 0, end_time = 0;
struct timespec64 now;
+ unsigned long flags;
size_t n;
int i;
@@ -6217,6 +6237,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
start_time += e->interval;
}
+ spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
/* Check whether a queue gets configured.
* If not, set the start and end time to be end time.
*/
@@ -6241,6 +6263,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
}
}
+ spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *ring = adapter->tx_ring[i];
struct net_device *dev = adapter->netdev;
@@ -6619,8 +6643,11 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
{
struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
hrtimer);
+ unsigned long flags;
unsigned int i;
+ spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
adapter->qbv_transition = true;
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *tx_ring = adapter->tx_ring[i];
@@ -6633,6 +6660,9 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
}
}
adapter->qbv_transition = false;
+
+ spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
return HRTIMER_NORESTART;
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index a9a1028cb17bb..de317179a7dcc 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -166,11 +166,11 @@ prestera_util_neigh2nc_key(struct prestera_switch *sw, struct neighbour *n,
static bool __prestera_fi_is_direct(struct fib_info *fi)
{
- struct fib_nh *fib_nh;
+ struct fib_nh_common *fib_nhc;
if (fib_info_num_path(fi) == 1) {
- fib_nh = fib_info_nh(fi, 0);
- if (fib_nh->fib_nh_gw_family == AF_UNSPEC)
+ fib_nhc = fib_info_nhc(fi, 0);
+ if (fib_nhc->nhc_gw_family == AF_UNSPEC)
return true;
}
@@ -261,7 +261,7 @@ static bool
__prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
struct net_device *dev)
{
- struct fib_nh *fib_nh;
+ struct fib_nh_common *fib_nhc;
struct fib_result res;
bool reachable;
@@ -269,8 +269,8 @@ __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
if (!prestera_util_kern_get_route(&res, tb_id, addr))
if (prestera_fi_is_direct(res.fi)) {
- fib_nh = fib_info_nh(res.fi, 0);
- if (dev == fib_nh->fib_nh_dev)
+ fib_nhc = fib_info_nhc(res.fi, 0);
+ if (dev == fib_nhc->nhc_dev)
reachable = true;
}
@@ -324,7 +324,7 @@ prestera_kern_fib_info_nhc(struct fib_notifier_info *info, int n)
if (info->family == AF_INET) {
fen4_info = container_of(info, struct fib_entry_notifier_info,
info);
- return &fib_info_nh(fen4_info->fi, n)->nh_common;
+ return fib_info_nhc(fen4_info->fi, n);
} else if (info->family == AF_INET6) {
fen6_info = container_of(info, struct fib6_entry_notifier_info,
info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
index b0128336ff01a..e869c65d8e90f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
#include "reporter_vnic.h"
+#include "en_stats.h"
#include "devlink.h"
#define VNIC_ENV_GET64(vnic_env_stats, c) \
@@ -36,55 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
if (err)
return err;
- err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
- VNIC_ENV_GET64(&vnic, total_error_queues));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
- VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
- VNIC_ENV_GET64(&vnic, comp_eq_overrun));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
- VNIC_ENV_GET64(&vnic, async_eq_overrun));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
- VNIC_ENV_GET64(&vnic, cq_overrun));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
- VNIC_ENV_GET64(&vnic, invalid_command));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
- VNIC_ENV_GET64(&vnic, quota_exceeded_command));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
- VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
- VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
- VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
- if (err)
- return err;
+ if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues",
+ VNIC_ENV_GET(&vnic, total_error_queues));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow",
+ VNIC_ENV_GET(&vnic,
+ send_queue_priority_update_flow));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, eq_overrun_count)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun",
+ VNIC_ENV_GET(&vnic, comp_eq_overrun));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun",
+ VNIC_ENV_GET(&vnic, async_eq_overrun));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun",
+ VNIC_ENV_GET(&vnic, cq_overrun));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, invalid_command_count)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command",
+ VNIC_ENV_GET(&vnic, invalid_command));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, quota_exceeded_count)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command",
+ VNIC_ENV_GET(&vnic, quota_exceeded_command));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) {
+ err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+ VNIC_ENV_GET64(&vnic,
+ nic_receive_steering_discard));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) {
+ err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
+ VNIC_ENV_GET64(&vnic,
+ generated_pkt_steering_fail));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
+ VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
+ if (err)
+ return err;
+ }
err = devlink_fmsg_obj_nest_end(fmsg);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 0c88cf47af01b..1730f6a716eea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -1461,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
- if (flow_flag_test(flow, SLOW))
+ if (flow_flag_test(flow, SLOW)) {
mlx5e_tc_unoffload_from_slow_path(esw, flow);
- else
+ } else {
mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ }
mlx5e_tc_detach_mod_hdr(priv, flow, attr);
attr->modify_hdr = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1c820119e438f..c27df14df1457 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5266,6 +5266,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
struct net_device *netdev)
{
+ const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_flow_steering *fs;
int err;
@@ -5294,9 +5295,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
mlx5e_health_create_reporters(priv);
+
+ /* If netdev is already registered (e.g. move from uplink to nic profile),
+ * RTNL lock must be held before triggering netdev notifiers.
+ */
+ if (take_rtnl)
+ rtnl_lock();
+
/* update XDP supported features */
mlx5e_set_xdp_feature(netdev);
+ if (take_rtnl)
+ rtnl_unlock();
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 92377632f9e07..31708d5aa6087 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1943,9 +1943,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
- struct mlx5_esw_flow_attr *esw_attr;
- esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
remove_unready_flow(flow);
@@ -1966,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
- if (esw_attr->int_port)
- mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
-
- if (esw_attr->dest_int_port)
- mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
-
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
@@ -4268,6 +4260,7 @@ static void
mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
{
struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+ struct mlx5_esw_flow_attr *esw_attr;
if (!attr)
return;
@@ -4285,6 +4278,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a
mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
}
+ if (mlx5e_is_eswitch_flow(flow)) {
+ esw_attr = attr->esw_attr;
+
+ if (esw_attr->int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+ esw_attr->int_port);
+
+ if (esw_attr->dest_int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+ esw_attr->dest_int_port);
+ }
+
mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
free_branch_attr(flow, attr->branch_true);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index af779c700278e..fdf2be548e855 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -60,7 +60,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
} else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
dl_port->attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+ devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum,
vport_num - 1, false);
}
return dl_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index d3a3fe4ce6702..7d9bbb494d95b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
for (i = 0; i < ldev->ports; i++) {
for (j = 0; j < ldev->buckets; j++) {
idx = i * ldev->buckets + j;
- if (ldev->v2p_map[i] == ports[i])
+ if (ldev->v2p_map[idx] == ports[idx])
continue;
dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 973babfaff25c..377372f0578ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -227,10 +227,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
clock = container_of(timer, struct mlx5_clock, timer);
mdev = container_of(clock, struct mlx5_core_dev, clock);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&timer->tc);
mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f42abc2ea73c3..72ae560a1c684 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1989,7 +1989,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
mlx5_enter_error_state(dev, false);
mlx5_error_sw_reset(dev);
- mlx5_unload_one(dev, true);
+ mlx5_unload_one(dev, false);
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index c4be257c043d4..682d3dc00dd19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -361,7 +361,7 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16
static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func)
{
- return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev)
+ return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1
: vport;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 4e42a3b9b8eec..a2fc937d54617 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -285,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
host_total_vfs = MLX5_GET(query_esw_functions_out, out,
host_params_context.host_total_vfs);
kvfree(out);
- if (host_total_vfs)
- return host_total_vfs;
+ return host_total_vfs;
}
done:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
index d6947fe13d560..8ca534ef5d031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
@@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
u32 chunk_size;
u32 index;
- chunk_size = ilog2(num_of_actions);
+ chunk_size = ilog2(roundup_pow_of_two(num_of_actions));
/* HW modify action index granularity is at least 64B */
chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b6..c2ad0921e893c 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -8,6 +8,7 @@
#include <linux/ethtool.h>
#include <linux/filter.h>
#include <linux/mm.h>
+#include <linux/pci.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
static int mana_dealloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ unsigned long timeout = jiffies + 120 * HZ;
struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_txq *txq;
+ struct sk_buff *skb;
int i, err;
+ u32 tsleep;
if (apc->port_is_up)
return -EINVAL;
@@ -2363,15 +2367,40 @@ static int mana_dealloc_queues(struct net_device *ndev)
* to false, but it doesn't matter since mana_start_xmit() drops any
* new packets due to apc->port_is_up being false.
*
- * Drain all the in-flight TX packets
+ * Drain all the in-flight TX packets.
+ * A timeout of 120 seconds for all the queues is used.
+ * This will break the while loop when h/w is not responding.
+ * This value of 120 has been decided here considering max
+ * number of queues.
*/
+
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
-
- while (atomic_read(&txq->pending_sends) > 0)
- usleep_range(1000, 2000);
+ tsleep = 1000;
+ while (atomic_read(&txq->pending_sends) > 0 &&
+ time_before(jiffies, timeout)) {
+ usleep_range(tsleep, tsleep + 1000);
+ tsleep <<= 1;
+ }
+ if (atomic_read(&txq->pending_sends)) {
+ err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+ if (err) {
+ netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+ err, atomic_read(&txq->pending_sends),
+ txq->gdma_txq_id);
+ }
+ break;
+ }
}
+ for (i = 0; i < apc->num_queues; i++) {
+ txq = &apc->tx_qp[i].txq;
+ while ((skb = skb_dequeue(&txq->pending_skbs))) {
+ mana_unmap_skb(skb, apc);
+ dev_kfree_skb_any(skb);
+ }
+ atomic_set(&txq->pending_sends, 0);
+ }
/* We're 100% sure the queues can no longer be woken up, because
* we're sure now mana_poll_tx_cq() can't be running.
*/
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 612b0015dc43e..432fb93aa8017 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1817,6 +1817,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
static void ionic_tx_timeout_work(struct work_struct *ws)
{
struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+ int err;
if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
return;
@@ -1829,8 +1830,11 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
mutex_lock(&lif->queue_lock);
ionic_stop_queues_reconfig(lif);
- ionic_start_queues_reconfig(lif);
+ err = ionic_start_queues_reconfig(lif);
mutex_unlock(&lif->queue_lock);
+
+ if (err)
+ dev_err(lif->ionic->dev, "%s: Restarting queues failed\n", __func__);
}
static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -2800,17 +2804,22 @@ static int ionic_cmb_reconfig(struct ionic_lif *lif,
if (err) {
dev_err(lif->ionic->dev,
"CMB restore failed: %d\n", err);
- goto errout;
+ goto err_out;
}
}
- ionic_start_queues_reconfig(lif);
- } else {
- /* This was detached in ionic_stop_queues_reconfig() */
- netif_device_attach(lif->netdev);
+ err = ionic_start_queues_reconfig(lif);
+ if (err) {
+ dev_err(lif->ionic->dev,
+ "CMB reconfig failed: %d\n", err);
+ goto err_out;
+ }
}
-errout:
+err_out:
+ /* This was detached in ionic_stop_queues_reconfig() */
+ netif_device_attach(lif->netdev);
+
return err;
}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 984dfa5d6c11c..144ec756c796a 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -743,7 +743,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
u64_stats_update_begin(&rxsc_stats->syncp);
rxsc_stats->stats.InPktsLate++;
u64_stats_update_end(&rxsc_stats->syncp);
- secy->netdev->stats.rx_dropped++;
+ DEV_STATS_INC(secy->netdev, rx_dropped);
return false;
}
@@ -767,7 +767,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
rxsc_stats->stats.InPktsNotValid++;
u64_stats_update_end(&rxsc_stats->syncp);
this_cpu_inc(rx_sa->stats->InPktsNotValid);
- secy->netdev->stats.rx_errors++;
+ DEV_STATS_INC(secy->netdev, rx_errors);
return false;
}
@@ -1069,7 +1069,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
u64_stats_update_begin(&secy_stats->syncp);
secy_stats->stats.InPktsNoTag++;
u64_stats_update_end(&secy_stats->syncp);
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
continue;
}
@@ -1179,7 +1179,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
u64_stats_update_begin(&secy_stats->syncp);
secy_stats->stats.InPktsBadTag++;
u64_stats_update_end(&secy_stats->syncp);
- secy->netdev->stats.rx_errors++;
+ DEV_STATS_INC(secy->netdev, rx_errors);
goto drop_nosa;
}
@@ -1196,7 +1196,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
u64_stats_update_begin(&rxsc_stats->syncp);
rxsc_stats->stats.InPktsNotUsingSA++;
u64_stats_update_end(&rxsc_stats->syncp);
- secy->netdev->stats.rx_errors++;
+ DEV_STATS_INC(secy->netdev, rx_errors);
if (active_rx_sa)
this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA);
goto drop_nosa;
@@ -1230,7 +1230,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
u64_stats_update_begin(&rxsc_stats->syncp);
rxsc_stats->stats.InPktsLate++;
u64_stats_update_end(&rxsc_stats->syncp);
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
goto drop;
}
}
@@ -1271,7 +1271,7 @@ deliver:
if (ret == NET_RX_SUCCESS)
count_rx(dev, len);
else
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
rcu_read_unlock();
@@ -1308,7 +1308,7 @@ nosci:
u64_stats_update_begin(&secy_stats->syncp);
secy_stats->stats.InPktsNoSCI++;
u64_stats_update_end(&secy_stats->syncp);
- macsec->secy.netdev->stats.rx_errors++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_errors);
continue;
}
@@ -1327,7 +1327,7 @@ nosci:
secy_stats->stats.InPktsUnknownSCI++;
u64_stats_update_end(&secy_stats->syncp);
} else {
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
}
}
@@ -3422,7 +3422,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
if (!secy->operational) {
kfree_skb(skb);
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return NETDEV_TX_OK;
}
@@ -3430,7 +3430,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
skb = macsec_encrypt(skb, dev);
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EINPROGRESS)
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return NETDEV_TX_OK;
}
@@ -3667,9 +3667,9 @@ static void macsec_get_stats64(struct net_device *dev,
dev_fetch_sw_netstats(s, dev->tstats);
- s->rx_dropped = dev->stats.rx_dropped;
- s->tx_dropped = dev->stats.tx_dropped;
- s->rx_errors = dev->stats.rx_errors;
+ s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped);
+ s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped);
+ s->rx_errors = atomic_long_read(&dev->stats.__rx_errors);
}
static int macsec_get_iflink(const struct net_device *dev)
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c1f307d90518b..8a77ec33b4172 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -459,21 +459,27 @@ static int at803x_set_wol(struct phy_device *phydev,
phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
- /* Enable WOL function */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
- 0, AT803X_WOL_EN);
- if (ret)
- return ret;
+ /* Enable WOL function for 1588 */
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ 0, AT803X_WOL_EN);
+ if (ret)
+ return ret;
+ }
/* Enable WOL interrupt */
ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
if (ret)
return ret;
} else {
- /* Disable WoL function */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
- AT803X_WOL_EN, 0);
- if (ret)
- return ret;
+ /* Disable WoL function for 1588 */
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+ if (ret)
+ return ret;
+ }
/* Disable WOL interrupt */
ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
if (ret)
@@ -508,11 +514,11 @@ static void at803x_get_wol(struct phy_device *phydev,
wol->supported = WAKE_MAGIC;
wol->wolopts = 0;
- value = phy_read_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL);
+ value = phy_read(phydev, AT803X_INTR_ENABLE);
if (value < 0)
return;
- if (value & AT803X_WOL_EN)
+ if (value & AT803X_INTR_ENABLE_WOL)
wol->wolopts |= WAKE_MAGIC;
}
@@ -858,9 +864,6 @@ static int at803x_probe(struct phy_device *phydev)
if (phydev->drv->phy_id == ATH8031_PHY_ID) {
int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
int mode_cfg;
- struct ethtool_wolinfo wol = {
- .wolopts = 0,
- };
if (ccr < 0)
return ccr;
@@ -877,12 +880,14 @@ static int at803x_probe(struct phy_device *phydev)
break;
}
- /* Disable WOL by default */
- ret = at803x_set_wol(phydev, &wol);
- if (ret < 0) {
- phydev_err(phydev, "failed to disable WOL on probe: %d\n", ret);
+ /* Disable WoL in 1588 register which is enabled
+ * by default
+ */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+ if (ret)
return ret;
- }
}
return 0;
@@ -2059,8 +2064,6 @@ static struct phy_driver at803x_driver[] = {
.flags = PHY_POLL_CABLE_TEST,
.config_init = at803x_config_init,
.link_change_notify = at803x_link_change_notify,
- .set_wol = at803x_set_wol,
- .get_wol = at803x_get_wol,
.suspend = at803x_suspend,
.resume = at803x_resume,
/* PHY_BASIC_FEATURES */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 25f0191df00bf..100339bc8b04a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1594,7 +1594,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
if (zerocopy)
return false;
- if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
return false;
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index a3de081cda5ee..c3ff30ab782e9 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -713,6 +713,12 @@ static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
return vninode;
}
+static void vxlan_vni_free(struct vxlan_vni_node *vninode)
+{
+ free_percpu(vninode->stats);
+ kfree(vninode);
+}
+
static int vxlan_vni_add(struct vxlan_dev *vxlan,
struct vxlan_vni_group *vg,
u32 vni, union vxlan_addr *group,
@@ -740,7 +746,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
&vninode->vnode,
vxlan_vni_rht_params);
if (err) {
- kfree(vninode);
+ vxlan_vni_free(vninode);
return err;
}
@@ -763,8 +769,7 @@ static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
struct vxlan_vni_node *v;
v = container_of(rcu, struct vxlan_vni_node, rcu);
- free_percpu(v->stats);
- kfree(v);
+ vxlan_vni_free(v);
}
static int vxlan_vni_del(struct vxlan_dev *vxlan,
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 5bf7822c53f18..0ba714ca5185c 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -6,7 +6,7 @@
#include "allowedips.h"
#include "peer.h"
-enum { MAX_ALLOWEDIPS_BITS = 128 };
+enum { MAX_ALLOWEDIPS_DEPTH = 129 };
static struct kmem_cache *node_cache;
@@ -42,7 +42,7 @@ static void push_rcu(struct allowedips_node **stack,
struct allowedips_node __rcu *p, unsigned int *len)
{
if (rcu_access_pointer(p)) {
- if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_BITS))
+ if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_DEPTH))
return;
stack[(*len)++] = rcu_dereference_raw(p);
}
@@ -55,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu)
static void root_free_rcu(struct rcu_head *rcu)
{
- struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = {
+ struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = {
container_of(rcu, struct allowedips_node, rcu) };
unsigned int len = 1;
@@ -68,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu)
static void root_remove_peer_lists(struct allowedips_node *root)
{
- struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { root };
+ struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { root };
unsigned int len = 1;
while (len > 0 && (node = stack[--len])) {
diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
index 78ebe2892a788..3d1f64ff2e122 100644
--- a/drivers/net/wireguard/selftest/allowedips.c
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -593,16 +593,20 @@ bool __init wg_allowedips_selftest(void)
wg_allowedips_remove_by_peer(&t, a, &mutex);
test_negative(4, a, 192, 168, 0, 1);
- /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_BITS) in free_node
+ /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_DEPTH) in free_node
* if something goes wrong.
*/
- for (i = 0; i < MAX_ALLOWEDIPS_BITS; ++i) {
- part = cpu_to_be64(~(1LLU << (i % 64)));
- memset(&ip, 0xff, 16);
- memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+ for (i = 0; i < 64; ++i) {
+ part = cpu_to_be64(~0LLU << i);
+ memset(&ip, 0xff, 8);
+ memcpy((u8 *)&ip + 8, &part, 8);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+ memcpy(&ip, &part, 8);
+ memset((u8 *)&ip + 8, 0, 8);
wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
}
-
+ memset(&ip, 0, 16);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
wg_allowedips_free(&t, &mutex);
wg_allowedips_init(&t);
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 6512267ae4ca5..4928e4e916603 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -2144,8 +2144,7 @@ int ath12k_wmi_send_scan_start_cmd(struct ath12k *ar,
struct wmi_tlv *tlv;
void *ptr;
int i, ret, len;
- u32 *tmp_ptr;
- u8 extraie_len_with_pad = 0;
+ u32 *tmp_ptr, extraie_len_with_pad = 0;
struct ath12k_wmi_hint_short_ssid_arg *s_ssid = NULL;
struct ath12k_wmi_hint_bssid_arg *hint_bssid = NULL;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index de8a2e27f49c7..2a90bb24ba77f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1456,6 +1456,10 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE;
params_v1 = kzalloc(params_size, GFP_KERNEL);
+ if (!params_v1) {
+ err = -ENOMEM;
+ goto exit_params;
+ }
params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
brcmf_scan_params_v2_to_v1(&params->params_v2_le, &params_v1->params_le);
kfree(params);
@@ -1473,6 +1477,7 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
bphy_err(drvr, "error (%d)\n", err);
}
+exit_params:
kfree(params);
exit:
return err;
diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c
index b114babec698c..c93e6250cb8b4 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.c
+++ b/drivers/net/wireless/realtek/rtw89/mac.c
@@ -2524,7 +2524,7 @@ static int cmac_dma_init(struct rtw89_dev *rtwdev, u8 mac_idx)
u32 reg;
int ret;
- if (chip_id != RTL8852A && chip_id != RTL8852B)
+ if (chip_id != RTL8852B)
return 0;
ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 5bc81cc0a2de4..46b252bbe5000 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/errno.h>
#include <linux/ioport.h>
+#include <linux/of.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
@@ -332,6 +333,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
*/
void pci_bus_add_device(struct pci_dev *dev)
{
+ struct device_node *dn = dev->dev.of_node;
int retval;
/*
@@ -344,7 +346,7 @@ void pci_bus_add_device(struct pci_dev *dev)
pci_proc_attach_device(dev);
pci_bridge_d3_update(dev);
- dev->match_driver = true;
+ dev->match_driver = !dn || of_device_is_available(dn);
retval = device_attach(&dev->dev);
if (retval < 0 && retval != -EPROBE_DEFER)
pci_warn(dev, "device attach failed (%d)\n", retval);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index e51219f9f523c..3c158b17dcb53 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -34,11 +34,6 @@ int pci_set_of_node(struct pci_dev *dev)
if (!node)
return 0;
- if (!of_device_is_available(node)) {
- of_node_put(node);
- return -ENODEV;
- }
-
device_set_node(&dev->dev, of_fwnode_handle(node));
return 0;
}
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 054d7911bfc9f..c1637515a8a41 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -62,6 +62,7 @@ struct sk_psock_progs {
enum sk_psock_state_bits {
SK_PSOCK_TX_ENABLED,
+ SK_PSOCK_RX_STRP_ENABLED,
};
struct sk_psock_link {
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7c7d03aa9d068..d6fa7c8767ad3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
return NULL;
+ if (iftype == NL80211_IFTYPE_AP_VLAN)
+ iftype = NL80211_IFTYPE_AP;
+
for (i = 0; i < sband->n_iftype_data; i++) {
const struct ieee80211_sband_iftype_data *data =
&sband->iftype_data[i];
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 640441a2f9266..35870858ddf26 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
*
* @list: table set list node
* @bindings: list of set bindings
+ * @refs: internal refcounting for async set destruction
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
@@ -541,6 +542,7 @@ struct nft_set_elem_expr {
struct nft_set {
struct list_head list;
struct list_head bindings;
+ refcount_t refs;
struct nft_table *table;
possible_net_t net;
char *name;
@@ -562,7 +564,8 @@ struct nft_set {
struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:14,
+ u16 flags:13,
+ dead:1,
genmask:2;
u8 klen;
u8 dlen;
@@ -596,7 +599,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -813,62 +815,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem);
-/**
- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- * @rcu: rcu head
- * @set: set the elements belong to
- * @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
- struct rcu_head rcu;
- const struct nft_set *set;
- unsigned int cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
- sizeof(struct nft_set_gc_batch_head)) / \
- sizeof(void *))
-
-/**
- * struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- * @head: GC batch head
- * @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
- struct nft_set_gc_batch_head head;
- void *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
- if (gcb != NULL)
- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
- gfp_t gfp)
-{
- if (gcb != NULL) {
- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
- return gcb;
- nft_set_gc_batch_complete(gcb);
- }
- return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
- void *elem)
-{
- gcb->elems[gcb->head.cnt++] = elem;
-}
-
struct nft_expr_ops;
/**
* struct nft_expr_type - nf_tables expression type
@@ -1557,39 +1503,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT 2
+#define NFT_SET_ELEM_DEAD_BIT 2
#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
/**
@@ -1732,6 +1669,38 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable_flags(trans) \
(((struct nft_trans_flowtable *)trans->data)->flags)
+#define NFT_TRANS_GC_BATCHCOUNT 256
+
+struct nft_trans_gc {
+ struct list_head list;
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+ u8 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1758,6 +1727,7 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
+ unsigned int gc_seq;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index bf06db8d2046c..7b1ddffa3dfc8 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -381,6 +381,7 @@ TRACE_EVENT(tcp_cong_state_set,
__field(const void *, skaddr)
__field(__u16, sport)
__field(__u16, dport)
+ __field(__u16, family)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
@@ -396,6 +397,7 @@ TRACE_EVENT(tcp_cong_state_set,
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
+ __entry->family = sk->sk_family;
p32 = (__be32 *) __entry->saddr;
*p32 = inet->inet_saddr;
@@ -409,7 +411,8 @@ TRACE_EVENT(tcp_cong_state_set,
__entry->cong_state = ca_state;
),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+ TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+ show_family_name(__entry->family),
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e40aa3e3641cb..b3662119ddbce 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -384,8 +384,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
- if (event == NETDEV_DOWN &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ if (event == NETDEV_DOWN)
vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
diff --git a/net/core/filter.c b/net/core/filter.c
index 06ba0e56e3693..28a59596987a9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
if (unlikely(data_end > data_hard_end))
return -EINVAL;
- /* ALL drivers MUST init xdp->frame_sz, chicken check below */
- if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
- WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
- return -EINVAL;
- }
-
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a29508e1ff356..ef1a2eb6520bf 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
+ int ret;
+
static const struct strp_callbacks cb = {
.rcv_msg = sk_psock_strp_read,
.read_sock_done = sk_psock_strp_read_done,
.parse_msg = sk_psock_strp_parse,
};
- return strp_init(&psock->strp, sk, &cb);
+ ret = strp_init(&psock->strp, sk, &cb);
+ if (!ret)
+ sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+ return ret;
}
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
static void sk_psock_done_strp(struct sk_psock *psock)
{
/* Parser has been stopped */
- if (psock->progs.stream_parser)
+ if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
strp_done(&psock->strp);
}
#else
diff --git a/net/core/sock.c b/net/core/sock.c
index 6d4f28efe29a7..732fc37a4771b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1778,7 +1778,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
spin_unlock(&sk->sk_peer_lock);
if (!peer_pid)
- return -ESRCH;
+ return -ENODATA;
pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
put_pid(peer_pid);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 08ab108206bf8..8f07fea39d9ea 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -146,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
list_for_each_entry_safe(link, tmp, &psock->link, list) {
if (link->link_raw == link_raw) {
struct bpf_map *map = link->map;
- struct bpf_stab *stab = container_of(map, struct bpf_stab,
- map);
- if (psock->saved_data_ready && stab->progs.stream_parser)
+ struct sk_psock_progs *progs = sock_map_progs(map);
+
+ if (psock->saved_data_ready && progs->stream_parser)
strp_stop = true;
- if (psock->saved_data_ready && stab->progs.stream_verdict)
+ if (psock->saved_data_ready && progs->stream_verdict)
verdict_stop = true;
- if (psock->saved_data_ready && stab->progs.skb_verdict)
+ if (psock->saved_data_ready && progs->skb_verdict)
verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b8a24734385ef..fd2eb148d24de 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
- dp->dccps_mss_cache = cur_mps;
+ WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
return cur_mps;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f331e5977a844..4e3266e4d7c3c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -630,7 +630,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_GET_CUR_MPS:
- val = dp->dccps_mss_cache;
+ val = READ_ONCE(dp->dccps_mss_cache);
break;
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +739,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
trace_dccp_probe(sk, len);
- if (len > dp->dccps_mss_cache)
+ if (len > READ_ONCE(dp->dccps_mss_cache))
return -EMSGSIZE;
lock_sock(sk);
@@ -772,6 +772,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out_discard;
}
+ /* We need to check dccps_mss_cache after socket is locked. */
+ if (len > dp->dccps_mss_cache) {
+ rc = -EMSGSIZE;
+ goto out_discard;
+ }
+
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc != 0)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 92c02c886fe73..586b1b3e35b80 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
.un.frag.__unused = 0,
.un.frag.mtu = htons(mtu),
};
- icmph->checksum = ip_compute_csum(icmph, len);
+ icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
skb_reset_transport_header(skb);
niph = skb_push(skb, sizeof(*niph));
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f95142e56da05..be5498f5dd319 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
&rtm_dump_nexthop_cb, &filter);
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3367,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
dd->filter.res_bucket_nh_id != nhge->nh->id)
continue;
+ dd->ctx->bucket_index = bucket_index;
err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
RTM_NEWNEXTHOPBUCKET, portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
- goto out_err;
- }
+ if (err)
+ return err;
}
dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
- bucket_index = 0;
+ dd->ctx->bucket_index = 0;
-out:
- err = skb->len;
-out_err:
- dd->ctx->bucket_index = bucket_index;
- return err;
+ return 0;
}
static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 18634ebd20a47..a42be96ae209b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
static inline int ndisc_is_useropt(const struct net_device *dev,
struct nd_opt_hdr *opt)
{
- return opt->nd_opt_type == ND_OPT_RDNSS ||
+ return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+ opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
opt->nd_opt_type == ND_OPT_PREF64 ||
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3317d1cca1568..d80658547836f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2335,7 +2335,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if (flags & MPTCP_CF_FASTCLOSE) {
+ if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
/* be sure to force the tcp_disconnect() path,
* to generate the egress reset
*/
@@ -3328,7 +3328,7 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (unlikely(&msk->cb_flags)) {
+ if (unlikely(msk->cb_flags)) {
/* be sure to set the current sk state before tacking actions
* depending on sk_state, that is processing MPTCP_ERROR_REPORT
*/
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 37fbe22e2433d..ba2a873a4d2e6 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -325,7 +325,6 @@ struct mptcp_sock {
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
- struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ee3b7abbaf6d..94ae7dd01c65e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
- struct mptcp_sock *msk, *next, *head = NULL;
- struct request_sock *req;
- struct sock *sk;
+ struct request_sock *req, *head, *tail;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk, *ssk;
- /* build a list of all unaccepted mptcp sockets */
+ /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+ * Splice the req list, so that accept() can not reach the pending ssk after
+ * the listener socket is released below.
+ */
spin_lock_bh(&queue->rskq_lock);
- for (req = queue->rskq_accept_head; req; req = req->dl_next) {
- struct mptcp_subflow_context *subflow;
- struct sock *ssk = req->sk;
+ head = queue->rskq_accept_head;
+ tail = queue->rskq_accept_tail;
+ queue->rskq_accept_head = NULL;
+ queue->rskq_accept_tail = NULL;
+ spin_unlock_bh(&queue->rskq_lock);
+
+ if (!head)
+ return;
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (req = head; req; req = req->dl_next) {
+ ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
if (!subflow || !subflow->conn)
continue;
- /* skip if already in list */
sk = subflow->conn;
- msk = mptcp_sk(sk);
- if (msk->dl_next || msk == head)
- continue;
-
sock_hold(sk);
- msk->dl_next = head;
- head = msk;
- }
- spin_unlock_bh(&queue->rskq_lock);
- if (!head)
- return;
-
- /* can't acquire the msk socket lock under the subflow one,
- * or will cause ABBA deadlock
- */
- release_sock(listener_ssk);
-
- for (msk = head; msk; msk = next) {
- sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- next = msk->dl_next;
- msk->dl_next = NULL;
-
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+ /* restore the listener queue, to let the TCP code clean it up */
+ spin_lock_bh(&queue->rskq_lock);
+ WARN_ON_ONCE(queue->rskq_accept_head);
+ queue->rskq_accept_head = head;
+ queue->rskq_accept_tail = tail;
+ spin_unlock_bh(&queue->rskq_lock);
}
static int subflow_ulp_init(struct sock *sk)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d3c6ecd1f5a68..c62227ae77467 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
enum {
NFT_VALIDATE_SKIP = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -582,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem);
-
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -5055,6 +5056,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&set->bindings);
INIT_LIST_HEAD(&set->catchall_list);
+ refcount_set(&set->refs, 1);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
@@ -5122,6 +5124,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
}
}
+static void nft_set_put(struct nft_set *set)
+{
+ if (refcount_dec_and_test(&set->refs)) {
+ kfree(set->name);
+ kvfree(set);
+ }
+}
+
static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
int i;
@@ -5134,8 +5144,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
- kfree(set->name);
- kvfree(set);
+ nft_set_put(set);
}
static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5602,8 +5611,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_set_dump_args *args;
+ if (nft_set_elem_expired(ext))
+ return 0;
+
args = container_of(iter, struct nft_set_dump_args, iter);
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
@@ -6274,7 +6287,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask) &&
- !nft_set_elem_expired(ext))
+ !nft_set_elem_expired(ext) &&
+ !nft_set_elem_is_dead(ext))
return ext;
}
@@ -6282,29 +6296,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
}
EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
- struct nft_set_elem_catchall *catchall, *next;
- struct nft_set_ext *ext;
- void *elem = NULL;
-
- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
-
- if (!nft_set_elem_expired(ext) ||
- nft_set_elem_mark_busy(ext))
- continue;
-
- elem = catchall->elem;
- list_del_rcu(&catchall->list);
- kfree_rcu(catchall, rcu);
- break;
- }
-
- return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
@@ -6366,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
if (nft_setelem_is_catchall(set, elem)) {
nft_set_elem_change_active(net, set, ext);
- nft_set_elem_clear_busy(ext);
} else {
set->ops->activate(net, set, elem);
}
@@ -6381,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_is_active(net, ext))
continue;
kfree(elem->priv);
@@ -6777,7 +6766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_elem_free;
}
- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+ ext->genmask = nft_genmask_cur(ctx->net);
err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
if (err) {
@@ -6929,9 +6918,9 @@ static void nft_setelem_data_activate(const struct net *net,
nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -7095,8 +7084,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_set_elem_active(ext, genmask))
continue;
elem.priv = catchall->elem;
@@ -7170,29 +7158,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return err;
}
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
- struct nft_set_gc_batch *gcb;
- unsigned int i;
-
- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
- for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
- kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp)
-{
- struct nft_set_gc_batch *gcb;
-
- gcb = kzalloc(sizeof(*gcb), gfp);
- if (gcb == NULL)
- return gcb;
- gcb->head.set = set;
- return gcb;
-}
-
/*
* Stateful objects
*/
@@ -9414,6 +9379,207 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+ struct nft_trans_gc *trans)
+{
+ void **priv = trans->priv;
+ unsigned int i;
+
+ for (i = 0; i < trans->count; i++) {
+ struct nft_set_elem elem = {
+ .priv = priv[i],
+ };
+
+ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+ nft_setelem_remove(ctx->net, trans->set, &elem);
+ }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+ nft_set_put(trans->set);
+ put_net(trans->net);
+ kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+ struct nft_set_elem elem = {};
+ struct nft_trans_gc *trans;
+ struct nft_ctx ctx = {};
+ unsigned int i;
+
+ trans = container_of(rcu, struct nft_trans_gc, rcu);
+ ctx.net = read_pnet(&trans->set->net);
+
+ for (i = 0; i < trans->count; i++) {
+ elem.priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, &elem))
+ atomic_dec(&trans->set->nelems);
+
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ }
+
+ nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+ struct nftables_pernet *nft_net;
+ struct nft_ctx ctx = {};
+
+ nft_net = nft_pernet(trans->net);
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ /* Check for race with transaction, otherwise this batch refers to
+ * stale objects that might not be there anymore. Skip transaction if
+ * set has been destroyed from control plane transaction in case gc
+ * worker loses race.
+ */
+ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+ mutex_unlock(&nft_net->commit_mutex);
+ return false;
+ }
+
+ ctx.net = trans->net;
+ ctx.table = trans->set->table;
+
+ nft_trans_gc_setelem_remove(&ctx, trans);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+ spin_lock(&nf_tables_destroy_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+ if (!nft_trans_gc_work_done(trans)) {
+ nft_trans_gc_destroy(trans);
+ continue;
+ }
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ struct net *net = read_pnet(&set->net);
+ struct nft_trans_gc *trans;
+
+ trans = kzalloc(sizeof(*trans), gfp);
+ if (!trans)
+ return NULL;
+
+ refcount_inc(&set->refs);
+ trans->set = set;
+ trans->net = get_net(net);
+ trans->seq = gc_seq;
+
+ return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+ trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+ spin_lock(&nf_tables_gc_list_lock);
+ list_add_tail(&trans->list, &nf_tables_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ nft_trans_gc_queue_work(gc);
+
+ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+ return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ struct nft_set_elem_catchall *catchall;
+ const struct nft_set *set = gc->set;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!nft_set_elem_expired(ext))
+ continue;
+ if (nft_set_elem_is_dead(ext))
+ goto dead_elem;
+
+ nft_set_elem_dead(ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ return NULL;
+
+ nft_trans_gc_elem_add(gc, catchall->elem);
+ }
+
+ return gc;
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9576,11 +9742,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
- unsigned int base_seq;
LIST_HEAD(adl);
int err;
@@ -9657,6 +9823,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -9764,6 +9934,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
@@ -9866,6 +10037,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
nf_tables_commit_release(net);
return 0;
@@ -10915,6 +11088,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
+ nft_net->gc_seq = 0;
return 0;
}
@@ -10943,10 +11117,16 @@ static void __net_exit nf_tables_exit_net(struct net *net)
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
}
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+ flush_work(&trans_gc_work);
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.pre_exit = nf_tables_pre_exit_net,
.exit = nf_tables_exit_net,
+ .exit_batch = nf_tables_exit_batch,
.id = &nf_tables_net_id,
.size = sizeof(struct nftables_pernet),
};
@@ -11018,6 +11198,7 @@ static void __exit nf_tables_module_exit(void)
nft_chain_filter_fini();
nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0b73cb0e752f7..cef5df8460009 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_is_dead(&he->ext))
+ return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
}
static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
{
struct nft_rhash_elem *he = priv;
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext)) {
- nft_set_elem_change_active(net, set, &he->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &he->ext);
+
+ return true;
}
static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
- if (he != NULL &&
- !nft_rhash_flush(net, set, he))
- he = NULL;
+ if (he)
+ nft_set_elem_change_active(net, set, &he->ext);
rcu_read_unlock();
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
if (he == NULL)
return false;
- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+ nft_set_elem_dead(&he->ext);
+
+ return true;
}
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
@@ -314,25 +311,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
static void nft_rhash_gc(struct work_struct *work)
{
+ struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
- struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
+ struct nft_trans_gc *gc;
+ struct net *net;
+ u32 gc_seq;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- break;
+ if (PTR_ERR(he) != -EAGAIN) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
continue;
}
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
+ if (nft_set_elem_is_dead(&he->ext))
+ goto dead_elem;
+
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
nft_rhash_expr_needs_gc_run(set, &he->ext))
goto needs_gc_run;
@@ -340,26 +360,26 @@ static void nft_rhash_gc(struct work_struct *work)
if (!nft_set_elem_expired(&he->ext))
continue;
needs_gc_run:
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
+ nft_set_elem_dead(&he->ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- break;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
+ nft_trans_gc_elem_add(gc, he);
}
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+ /* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
- he = nft_set_catchall_gc(set);
- if (he) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, he);
- }
- nft_set_gc_batch_complete(gcb);
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
@@ -394,7 +414,7 @@ static int nft_rhash_init(const struct nft_set *set,
return err;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
+ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
nft_rhash_gc_init(set);
return 0;
@@ -422,7 +442,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
};
cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 49915a2a58eb7..a5b8301afe4af 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -566,8 +566,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext) ||
- (genmask &&
+ if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
@@ -601,8 +600,17 @@ out:
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
- return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ struct nft_pipapo_elem *ret;
+
+ ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net));
+ if (IS_ERR(ret))
+ return ret;
+
+ if (nft_set_elem_expired(&ret->ext))
+ return ERR_PTR(-ENOENT);
+
+ return ret;
}
/**
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
}
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+ struct nft_pipapo_elem *e)
+
+{
+ struct nft_set_elem elem = {
+ .priv = e,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+}
+
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
{
+ struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
+ struct nft_trans_gc *gc;
+
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
f--;
i--;
e = f->mt[rulemap[i].to].e;
- if (nft_set_elem_expired(&e->ext) &&
- !nft_set_elem_mark_busy(&e->ext)) {
+
+ /* synchronous gc never fails, there is no need to set on
+ * NFT_SET_ELEM_DEAD_BIT.
+ */
+ if (nft_set_elem_expired(&e->ext)) {
priv->dirty = true;
- pipapo_drop(m, rulemap);
- rcu_barrier();
- nft_set_elem_destroy(set, e, true);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (!gc)
+ break;
+
+ nft_pipapo_gc_deactivate(net, set, e);
+ pipapo_drop(m, rulemap);
+ nft_trans_gc_elem_add(gc, e);
/* And check again current first rule, which is now the
* first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
}
}
- e = nft_set_catchall_gc(set);
- if (e)
- nft_set_elem_destroy(set, e, true);
-
- priv->last_gc = jiffies;
+ gc = nft_trans_gc_catchall(gc, 0);
+ if (gc) {
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
/**
@@ -1706,7 +1739,6 @@ static void nft_pipapo_activate(const struct net *net,
return;
nft_set_elem_change_active(net, set, &e->ext);
- nft_set_elem_clear_busy(&e->ext);
}
/**
@@ -2005,8 +2037,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
goto cont;
e = f->mt[r].e;
- if (nft_set_elem_expired(&e->ext))
- goto cont;
elem.priv = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 8d73fffd2d09d..f9d4c8fcbbf82 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
set->klen);
}
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_elem_expired(&rbe->ext) ||
+ nft_set_elem_is_dead(&rbe->ext);
+}
+
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
continue;
}
- if (nft_set_elem_expired(&rbe->ext))
+ if (nft_rbtree_elem_expired(rbe))
return false;
if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
+ !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -215,6 +221,18 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ rb_erase(&rbe->node, &priv->root);
+}
+
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe,
@@ -222,11 +240,12 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
+ struct net *net = read_pnet(&set->net);
struct nft_rbtree_elem *rbe_prev;
- struct nft_set_gc_batch *gcb;
+ struct nft_trans_gc *gc;
- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
- if (!gcb)
+ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+ if (!gc)
return -ENOMEM;
/* search for end interval coming before this element.
@@ -244,17 +263,28 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
- rb_erase(&rbe_prev->node, &priv->root);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_prev);
+ /* There is always room in this trans gc for this element,
+ * memory allocation never actually happens, hence, the warning
+ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+ * this is synchronous gc which never fails.
+ */
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe_prev);
}
- rb_erase(&rbe->node, &priv->root);
- atomic_dec(&set->nelems);
+ nft_rbtree_gc_remove(net, set, priv, rbe);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe);
- nft_set_gc_batch_add(gcb, rbe);
- nft_set_gc_batch_complete(gcb);
+ nft_trans_gc_queue_sync_done(gc);
return 0;
}
@@ -482,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(net, set, &rbe->ext);
- nft_set_elem_clear_busy(&rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
@@ -490,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
{
struct nft_rbtree_elem *rbe = priv;
- if (!nft_set_elem_mark_busy(&rbe->ext) ||
- !nft_is_active(net, &rbe->ext)) {
- nft_set_elem_change_active(net, set, &rbe->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &rbe->ext);
+
+ return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
@@ -552,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&rbe->ext))
- goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
@@ -572,26 +596,40 @@ cont:
static void nft_rbtree_gc(struct work_struct *work)
{
- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
+ struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
+ unsigned int gc_seq;
struct net *net;
- u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- genmask = nft_genmask_cur(net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (!nft_set_elem_active(&rbe->ext, genmask))
- continue;
+ if (nft_set_elem_is_dead(&rbe->ext))
+ goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
@@ -604,46 +642,36 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext)) {
- rbe_end = NULL;
+ nft_set_elem_dead(&rbe->ext);
+
+ if (!rbe_end)
continue;
- }
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- rbe_prev = NULL;
- }
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (!gcb)
- break;
+ nft_set_elem_dead(&rbe_end->ext);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe);
- rbe_prev = rbe;
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- if (rbe_end) {
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_end);
- rb_erase(&rbe_end->node, &priv->root);
- rbe_end = NULL;
- }
- node = rb_next(node);
- if (!node)
- break;
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe);
}
- if (rbe_prev)
- rb_erase(&rbe_prev->node, &priv->root);
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
- rbe = nft_set_catchall_gc(set);
- if (rbe) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, rbe);
- }
- nft_set_gc_batch_complete(gcb);
-
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a4631cb457a91..a2935bd18ed98 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union tpacket_uhdr h;
+ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- h.h1->tp_status = status;
+ WRITE_ONCE(h.h1->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
- h.h2->tp_status = status;
+ WRITE_ONCE(h.h2->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
- h.h3->tp_status = status;
+ WRITE_ONCE(h.h3->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
smp_rmb();
+ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
- return h.h1->tp_status;
+ return READ_ONCE(h.h1->tp_status);
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
- return h.h2->tp_status;
+ return READ_ONCE(h.h2->tp_status);
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
- return h.h3->tp_status;
+ return READ_ONCE(h.h3->tp_status);
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0c013d2b5d8f4..f5834af5fad53 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
- WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
- WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+ WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+ WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,13 +436,60 @@ out:
return rc;
}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
+
+#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
+ (1UL << SOCK_KEEPOPEN) | \
+ (1UL << SOCK_LINGER) | \
+ (1UL << SOCK_BROADCAST) | \
+ (1UL << SOCK_TIMESTAMP) | \
+ (1UL << SOCK_DBG) | \
+ (1UL << SOCK_RCVTSTAMP) | \
+ (1UL << SOCK_RCVTSTAMPNS) | \
+ (1UL << SOCK_LOCALROUTE) | \
+ (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
+ (1UL << SOCK_RXQ_OVFL) | \
+ (1UL << SOCK_WIFI_STATUS) | \
+ (1UL << SOCK_NOFCS) | \
+ (1UL << SOCK_FILTER_LOCKED) | \
+ (1UL << SOCK_TSTAMP_NEW))
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ struct net *nnet = sock_net(nsk);
+
+ nsk->sk_userlocks = osk->sk_userlocks;
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_sndbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_sndbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+ }
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+ }
+}
+
static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
unsigned long mask)
{
/* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
- nsk->sk_sndbuf = osk->sk_sndbuf;
- nsk->sk_rcvbuf = osk->sk_rcvbuf;
nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
nsk->sk_mark = READ_ONCE(osk->sk_mark);
@@ -453,26 +500,10 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
nsk->sk_flags &= ~mask;
nsk->sk_flags |= osk->sk_flags & mask;
+
+ smc_adjust_sock_bufsizes(nsk, osk, mask);
}
-#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
- (1UL << SOCK_KEEPOPEN) | \
- (1UL << SOCK_LINGER) | \
- (1UL << SOCK_BROADCAST) | \
- (1UL << SOCK_TIMESTAMP) | \
- (1UL << SOCK_DBG) | \
- (1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_RCVTSTAMPNS) | \
- (1UL << SOCK_LOCALROUTE) | \
- (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
- (1UL << SOCK_RXQ_OVFL) | \
- (1UL << SOCK_WIFI_STATUS) | \
- (1UL << SOCK_NOFCS) | \
- (1UL << SOCK_FILTER_LOCKED) | \
- (1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
- new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2eeea4cdc7187..1f2b912c43d10 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -161,7 +161,7 @@ struct smc_connection {
struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
- int rmbe_size_short;/* compressed notation */
+ int rmbe_size_comp; /* compressed notation */
int rmbe_update_limit;
/* lower limit for consumer
* cursor update
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index b9b8b07aa7023..c90d9e5dda540 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1007,7 +1007,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->d0.gid =
conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
- clc->d0.dmbe_size = conn->rmbe_size_short;
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
if (version == SMC_V1) {
@@ -1050,7 +1050,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
break;
}
- clc->r0.rmbe_size = conn->rmbe_size_short;
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
cpu_to_be64((u64)sg_dma_address
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3f465faf2b681..6b78075404d7d 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2309,31 +2309,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
- int bufsize, bufsize_short;
+ int bufsize, bufsize_comp;
struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- int sk_buf_size;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_rcvbuf;
+ bufsize = smc->sk.sk_rcvbuf / 2;
else
/* use socket send buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_sndbuf;
+ bufsize = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
- bufsize_short >= 0; bufsize_short--) {
+ for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+ bufsize_comp >= 0; bufsize_comp--) {
if (is_rmb) {
lock = &lgr->rmbs_lock;
- buf_list = &lgr->rmbs[bufsize_short];
+ buf_list = &lgr->rmbs[bufsize_comp];
} else {
lock = &lgr->sndbufs_lock;
- buf_list = &lgr->sndbufs[bufsize_short];
+ buf_list = &lgr->sndbufs[bufsize_comp];
}
- bufsize = smc_uncompress_bufsize(bufsize_short);
+ bufsize = smc_uncompress_bufsize(bufsize_comp);
/* check for reusable slot in the link group */
- buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+ buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2376,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
- conn->rmbe_size_short = bufsize_short;
- smc->sk.sk_rcvbuf = bufsize;
+ conn->rmbe_size_comp = bufsize_comp;
+ smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2385,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
- smc->sk.sk_sndbuf = bufsize;
+ smc->sk.sk_sndbuf = bufsize * 2;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index b6f79fabb9d3f..0b2a957ca5f5f 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -21,6 +21,10 @@
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
static struct ctl_table smc_table[] = {
{
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
+ .extra2 = &max_sndbuf,
},
{
.procname = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
+ .extra2 = &max_rcvbuf,
},
{ }
};
@@ -88,8 +94,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
- WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
- WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+ WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+ WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
return 0;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 2021fe557e503..529101eb20bd5 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
static LIST_HEAD(tls_device_down_list);
static DEFINE_SPINLOCK(tls_device_lock);
+static struct page *dummy_page;
+
static void tls_device_free_ctx(struct tls_context *ctx)
{
if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
}
-static int tls_device_record_close(struct sock *sk,
- struct tls_context *ctx,
- struct tls_record_info *record,
- struct page_frag *pfrag,
- unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+ struct tls_context *ctx,
+ struct tls_record_info *record,
+ struct page_frag *pfrag,
+ unsigned char record_type)
{
struct tls_prot_info *prot = &ctx->prot_info;
- int ret;
+ struct page_frag dummy_tag_frag;
/* append tag
* device will fill in the tag, we just need to append a placeholder
* use socket memory to improve coalescing (re-using a single buffer
* increases frag count)
- * if we can't allocate memory now, steal some back from data
+ * if we can't allocate memory now use the dummy page
*/
- if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
- sk->sk_allocation))) {
- ret = 0;
- tls_append_frag(record, pfrag, prot->tag_size);
- } else {
- ret = prot->tag_size;
- if (record->len <= prot->overhead_size)
- return -ENOMEM;
+ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+ !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+ dummy_tag_frag.page = dummy_page;
+ dummy_tag_frag.offset = 0;
+ pfrag = &dummy_tag_frag;
}
+ tls_append_frag(record, pfrag, prot->tag_size);
/* fill prepend */
tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
record->len - prot->overhead_size,
record_type);
- return ret;
}
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -541,18 +540,8 @@ last_record:
if (done || record->len >= max_open_record_len ||
(record->num_frags >= MAX_SKB_FRAGS - 1)) {
- rc = tls_device_record_close(sk, tls_ctx, record,
- pfrag, record_type);
- if (rc) {
- if (rc > 0) {
- size += rc;
- } else {
- size = orig_size;
- destroy_record(record);
- ctx->open_record = NULL;
- break;
- }
- }
+ tls_device_record_close(sk, tls_ctx, record,
+ pfrag, record_type);
rc = tls_push_record(sk,
tls_ctx,
@@ -1450,14 +1439,26 @@ int __init tls_device_init(void)
{
int err;
- destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
- if (!destruct_wq)
+ dummy_page = alloc_page(GFP_KERNEL);
+ if (!dummy_page)
return -ENOMEM;
+ destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+ if (!destruct_wq) {
+ err = -ENOMEM;
+ goto err_free_dummy;
+ }
+
err = register_netdevice_notifier(&tls_dev_notifier);
if (err)
- destroy_workqueue(destruct_wq);
+ goto err_destroy_wq;
+ return 0;
+
+err_destroy_wq:
+ destroy_workqueue(destruct_wq);
+err_free_dummy:
+ put_page(dummy_page);
return err;
}
@@ -1466,4 +1467,5 @@ void __exit tls_device_cleanup(void)
unregister_netdevice_notifier(&tls_dev_notifier);
destroy_workqueue(destruct_wq);
clean_acked_data_flush();
+ put_page(dummy_page);
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b6896126bb922..4a8ee2f6badb9 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -139,9 +139,6 @@ int tls_push_sg(struct sock *sk,
ctx->splicing_pages = true;
while (1) {
- if (sg_is_last(sg))
- msg.msg_flags = flags;
-
/* is sending application-limited? */
tcp_rate_check_app_limited(sk);
p = sg_page(sg);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0da2e6a2a7ea9..8bcf8e293308e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5430,8 +5430,11 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
if (!wiphy->mbssid_max_interfaces)
return ERR_PTR(-EINVAL);
- nla_for_each_nested(nl_elems, attrs, rem_elems)
+ nla_for_each_nested(nl_elems, attrs, rem_elems) {
+ if (num_elems >= 255)
+ return ERR_PTR(-EINVAL);
num_elems++;
+ }
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b89adb52a977e..10ea85c031475 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -994,6 +994,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xp_alloc_tx_descs(xs->pool, xs);
if (err) {
xp_put_pool(xs->pool);
+ xs->pool = NULL;
sockfd_put(sock);
goto out_unlock;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index b4f6f3a50ae58..5674a9d0cacf0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+ int s, c0, c1, p0, p1;
+ int err, n, key, value;
+ char buf[] = "abc";
+
+ key = 0;
+ value = sizeof(buf) - 1;
+ err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+ if (err)
+ return;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ goto clean_parser_map;
+
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ if (err)
+ goto close_srv;
+
+ err = add_to_sockmap(sock_map, p0, p1);
+ if (err)
+ goto close;
+
+ n = xsend(c1, buf, sizeof(buf), 0);
+ if (n < sizeof(buf))
+ FAIL("incomplete write");
+
+ n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+ if (n != sizeof(buf) - 1)
+ FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+ xclose(c0);
+ xclose(p0);
+ xclose(c1);
+ xclose(p1);
+close_srv:
+ xclose(s);
+
+clean_parser_map:
+ key = 0;
+ value = 0;
+ xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+ int parser_map = bpf_map__fd(skel->maps.parser_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_partial(family, sotype, sock_map, parser_map);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
static void test_reuseport_select_listening(int family, int sotype,
int sock_map, int verd_map,
int reuseport_prog)
@@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
} tests[] = {
TEST(test_skb_redir_to_connected),
TEST(test_skb_redir_to_listening),
+ TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
TEST(test_msg_redir_to_listening),
};
@@ -1432,7 +1504,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
if (n < 1)
goto out;
- n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+ n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
if (n < 0)
FAIL("%s: recv() err, errno=%d", log_prefix, errno);
if (n == 0)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index 325c9f193432a..464d35bd57c70 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,12 +28,26 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} parser_map SEC(".maps");
+
bool test_sockmap = false; /* toggled by user-space */
bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
{
+ int *value;
+ __u32 key = 0;
+
+ value = bpf_map_lookup_elem(&parser_map, &key);
+ if (value && *value)
+ return *value;
+
return skb->len;
}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 0f5e88c8f4ffe..df8d90b51867a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1981,6 +1981,11 @@ basic()
run_cmd "$IP link set dev lo up"
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+ run_cmd "timeout 5 $IP nexthop"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# groups
#
@@ -2201,6 +2206,11 @@ basic_res()
run_cmd "$IP nexthop bucket list fdb"
log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+ run_cmd "timeout 5 $IP nexthop bucket"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# resilient nexthop buckets get requests
#
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index ae3f9462a2b61..d0c6c499d5dab 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -617,7 +617,7 @@ __cfg_test_port_ip_sg()
grep -q "permanent"
check_err $? "Entry not added as \"permanent\" when should"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_err $? "\"permanent\" entry has a pending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -626,7 +626,7 @@ __cfg_test_port_ip_sg()
grep -q "temp"
check_err $? "Entry not added as \"temp\" when should"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_fail $? "\"temp\" entry has an unpending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -659,7 +659,7 @@ __cfg_test_port_ip_sg()
grep -q "permanent"
check_err $? "Entry not marked as \"permanent\" after replace"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_err $? "Entry has a pending group timer after replace"
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
@@ -667,7 +667,7 @@ __cfg_test_port_ip_sg()
grep -q "temp"
check_err $? "Entry not marked as \"temp\" after replace"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_fail $? "Entry has an unpending group timer after replace"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -850,6 +850,7 @@ cfg_test()
__fwd_test_host_ip()
{
local grp=$1; shift
+ local dmac=$1; shift
local src=$1; shift
local mode=$1; shift
local name
@@ -872,27 +873,27 @@ __fwd_test_host_ip()
# Packet should only be flooded to multicast router ports when there is
# no matching MDB entry. The bridge is not configured as a multicast
# router port.
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 0
check_err $? "Packet locally received after flood"
# Install a regular port group entry and expect the packet to not be
# locally received.
bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 0
check_err $? "Packet locally received after installing a regular entry"
# Add a host entry and expect the packet to be locally received.
bridge mdb add dev br0 port br0 grp $grp temp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 1
check_err $? "Packet not locally received after adding a host entry"
# Remove the host entry and expect the packet to not be locally
# received.
bridge mdb del dev br0 port br0 grp $grp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 1
check_err $? "Packet locally received after removing a host entry"
@@ -905,8 +906,8 @@ __fwd_test_host_ip()
fwd_test_host_ip()
{
- __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
- __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+ __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+ __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
}
fwd_test_host_l2()
@@ -966,6 +967,7 @@ fwd_test_host()
__fwd_test_port_ip()
{
local grp=$1; shift
+ local dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mode=$1; shift
@@ -999,43 +1001,43 @@ __fwd_test_port_ip()
vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
src_ip $invalid_src action drop
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 0
check_err $? "Packet from valid source received on H2 before adding entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 0
check_err $? "Packet from invalid source received on H2 before adding entry"
bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
filter_mode $filter_mode source_list $src_list
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 1
check_err $? "Packet from valid source not received on H2 after adding entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 0
check_err $? "Packet from invalid source received on H2 after adding entry"
bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
filter_mode exclude
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 2
check_err $? "Packet from valid source not received on H2 after allowing all sources"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 1
check_err $? "Packet from invalid source not received on H2 after allowing all sources"
bridge mdb del dev br0 port $swp2 grp $grp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 2
check_err $? "Packet from valid source received on H2 after deleting entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 1
check_err $? "Packet from invalid source received on H2 after deleting entry"
@@ -1047,11 +1049,11 @@ __fwd_test_port_ip()
fwd_test_port_ip()
{
- __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
- __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
"exclude"
- __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
- __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
"include"
}
@@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test()
filter_mode include source_list 192.0.2.1
# IS_IN ( 192.0.2.2 )
- $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
@@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test()
filter_mode include source_list 192.0.2.1
# IS_IN ( 192.0.2.2 )
- $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
@@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test()
# IS_IN ( 2001:db8:1::2 )
local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
- $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
@@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test()
filter_mode include source_list 2001:db8:1::1
# IS_IN ( 2001:db8:1::2 )
- $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
@@ -1206,6 +1208,11 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
+if ! bridge mdb help 2>&1 | grep -q "replace"; then
+ echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+ exit $ksft_skip
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
index ae255b662ba38..3da9d93ab36fb 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -252,7 +252,8 @@ ctl4_entries_add()
local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
local peer=$(locus_dev_peer $locus)
local GRP=239.1.1.${grp}
- $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+ local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
-t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
sleep 1
@@ -272,7 +273,8 @@ ctl4_entries_del()
local peer=$(locus_dev_peer $locus)
local GRP=239.1.1.${grp}
- $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+ local dmac=01:00:5e:00:00:02
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
-t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
sleep 1
! bridge mdb show dev br0 | grep -q $GRP
@@ -289,8 +291,10 @@ ctl6_entries_add()
local peer=$(locus_dev_peer $locus)
local SIP=fe80::1
local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
local p=$(mldv2_is_in_get $SIP $GRP $IPs)
- $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
sleep 1
local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
@@ -310,8 +314,10 @@ ctl6_entries_del()
local peer=$(locus_dev_peer $locus)
local SIP=fe80::1
local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
local p=$(mldv1_done_get $SIP $GRP)
- $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
sleep 1
! bridge mdb show dev br0 | grep -q $GRP
}
@@ -1328,6 +1334,11 @@ test_8021qvs()
switch_destroy
}
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+ echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+ exit $ksft_skip
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index dbb9fcf759e0f..aa2eafb7b2437 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
ethtool -s $h1 autoneg on
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
index 072faa77f53bd..17f89c3b7c020 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -108,6 +108,8 @@ no_cable()
ip link set dev $swp3 down
}
+skip_on_veth
+
setup_prepare
tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
index c580ad6238483..39e736f30322a 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -258,11 +258,6 @@ h2_destroy()
setup_prepare()
{
- check_ethtool_mm_support
- check_tc_fp_support
- require_command lldptool
- bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
@@ -278,6 +273,19 @@ cleanup()
h1_destroy
}
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+ ethtool --show-mm $netif 2>&1 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: $netif does not support MAC Merge"
+ exit $ksft_skip
+ fi
+done
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
index eb9ec4a68f84b..7594bbb490292 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -99,6 +99,8 @@ test_stats_rx()
test_stats g2a rx
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
index 9f5b3e2e5e954..49fa94b53a1ca 100755
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -14,6 +14,8 @@ ALL_TESTS="
NUM_NETIFS=4
source lib.sh
+require_command $TROUTE6
+
h1_create()
{
simple_if_init $h1 2001:1:1::2/64
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9ddb68dd6a089..f69015bf2dea9 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes}
REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -163,6 +164,17 @@ check_port_mab_support()
fi
}
+skip_on_veth()
+{
+ local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+ jq -r '.[].linkinfo.info_kind')
+
+ if [[ $kind == veth ]]; then
+ echo "SKIP: Test cannot be run with veth pairs"
+ exit $ksft_skip
+ fi
+}
+
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
exit $ksft_skip
@@ -225,6 +237,11 @@ create_netif_veth()
for ((i = 1; i <= NUM_NETIFS; ++i)); do
local j=$((i+1))
+ if [ -z ${NETIFS[p$i]} ]; then
+ echo "SKIP: Cannot create interface. Name not specified"
+ exit $ksft_skip
+ fi
+
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
ip link add ${NETIFS[p$i]} type veth \
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
new file mode 100644
index 0000000000000..e7b9417537fbc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index a96cff8e72197..b0f5e55d2d0b2 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -9,6 +9,8 @@ NUM_NETIFS=4
source tc_common.sh
source lib.sh
+require_command ncat
+
tcflags="skip_hw"
h1_create()
@@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test()
ip_proto icmp \
action drop
- ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
local rpid=$!
- ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+ ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
wait -n $rpid
cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
check_err $? "server output check failed"
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 683711f41aa9b..b1daad19b01ec 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -52,8 +52,8 @@ match_dst_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -78,8 +78,8 @@ match_src_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
index e22c2d28b6ebf..20a7cb7222b8b 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -127,6 +127,7 @@ test_l2_miss_multicast_common()
local proto=$1; shift
local sip=$1; shift
local dip=$1; shift
+ local dmac=$1; shift
local mode=$1; shift
local name=$1; shift
@@ -142,7 +143,7 @@ test_l2_miss_multicast_common()
action pass
# Before adding MDB entry.
- $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
tc_check_packets "dev $swp2 egress" 101 1
check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
@@ -153,7 +154,7 @@ test_l2_miss_multicast_common()
# Adding MDB entry.
bridge mdb replace dev br1 port $swp2 grp $dip permanent
- $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
tc_check_packets "dev $swp2 egress" 101 1
check_err $? "Unregistered multicast filter was hit after adding MDB entry"
@@ -164,7 +165,7 @@ test_l2_miss_multicast_common()
# Deleting MDB entry.
bridge mdb del dev br1 port $swp2 grp $dip
- $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
tc_check_packets "dev $swp2 egress" 101 2
check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
@@ -183,10 +184,11 @@ test_l2_miss_multicast_ipv4()
local proto="ipv4"
local sip=192.0.2.1
local dip=239.1.1.1
+ local dmac=01:00:5e:01:01:01
local mode="-4"
local name="IPv4"
- test_l2_miss_multicast_common $proto $sip $dip $mode $name
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
}
test_l2_miss_multicast_ipv6()
@@ -194,10 +196,11 @@ test_l2_miss_multicast_ipv6()
local proto="ipv6"
local sip=2001:db8:1::1
local dip=ff0e::1
+ local dmac=33:33:00:00:00:01
local mode="-6"
local name="IPv6"
- test_l2_miss_multicast_common $proto $sip $dip $mode $name
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
}
test_l2_miss_multicast()
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
index 5ac184d518099..5a5dd90348195 100755
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -104,11 +104,14 @@ tunnel_key_nofrag_test()
local i
tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
- flower ip_flags nofrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofrag action drop
tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
- flower ip_flags firstfrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags firstfrag action drop
tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
- flower ip_flags nofirstfrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofirstfrag action drop
# test 'nofrag' set
tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 3c2096ac97ef4..d01b73a8ed0f0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -705,6 +705,7 @@ pm_nl_del_endpoint()
local addr=$3
if [ $ip_mptcp -eq 1 ]; then
+ [ $id -ne 0 ] && addr=''
ip -n $ns mptcp endpoint delete id $id $addr
else
ip netns exec $ns ./pm_nl_ctl del $id $addr
@@ -795,10 +796,11 @@ pm_nl_check_endpoint()
fi
if [ $ip_mptcp -eq 1 ]; then
+ # get line and trim trailing whitespace
line=$(ip -n $ns mptcp endpoint show $id)
+ line="${line% }"
# the dump order is: address id flags port dev
- expected_line="$addr"
- [ -n "$addr" ] && expected_line="$expected_line $addr"
+ [ -n "$addr" ] && expected_line="$addr"
expected_line="$expected_line $id"
[ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
[ -n "$dev" ] && expected_line="$expected_line $dev"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index dfe3d287f01d2..f838dd370f6af 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -361,6 +361,7 @@ err_buf=
tcpdump_pids=
nettest_pids=
socat_pids=
+tmpoutfile=
err() {
err_buf="${err_buf}${1}
@@ -951,6 +952,7 @@ cleanup() {
ip link del veth_A-R1 2>/dev/null
ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
+ rm -f "$tmpoutfile"
}
mtu() {
@@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+ tmpoutfile=$(mktemp)
+
+ # Flush Exceptions, retry with TCP
+ run_cmd ${ns_a} ip route flush cached ${dst}
+ run_cmd ${ns_b} ip route flush cached ${dst}
+ run_cmd ${ns_c} ip route flush cached ${dst}
+
+ for target in "${ns_a}" "${ns_c}" ; do
+ if [ ${family} -eq 4 ]; then
+ TCPDST=TCP:${dst}:50000
+ else
+ TCPDST="TCP:[${dst}]:50000"
+ fi
+ ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+
+ sleep 1
+
+ dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+ size=$(du -sb $tmpoutfile)
+ size=${size%%/tmp/*}
+
+ [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ done
+
+ rm -f "$tmpoutfile"
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
}
test_pmtu_ipv4_br_vxlan4_exception() {