aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-03-11 14:14:12 -0700
committerJakub Kicinski <kuba@kernel.org>2024-03-11 14:14:13 -0700
commit838b6c9cc47fc430818ec75e361151b3a3a45477 (patch)
tree9138e0b99192bdfe1134a0fa003b4bd32d6fcfe5
parente5b7aefe38f7f6258935d8a10c36552dd957048a (diff)
parenta22b042660ca1d695cc225401c72b3fb393acd49 (diff)
downloadnf-next-838b6c9cc47fc430818ec75e361151b3a3a45477.tar.gz
Merge branch 'mlxsw-support-for-nexthop-group-statistics'
Petr Machata says: ==================== mlxsw: Support for nexthop group statistics ECMP is a fundamental component in L3 designs. However, it's fragile. Many factors influence whether an ECMP group will operate as intended: hash policy (i.e. the set of fields that contribute to ECMP hash calculation), neighbor validity, hash seed (which might lead to polarization) or the type of ECMP group used (hash-threshold or resilient). At the same time, collection of statistics that would help an operator determine that the group performs as desired, is difficult. Support for nexthop group statistics and their HW collection has been introduced recently. In this patch set, add HW stats collection support to mlxsw. This patchset progresses as follows: - Patches #1 and #2 add nexthop IDs to notifiers. - Patches #3 and #4 are code-shaping. - Patches #5, #6 and #7 adjust the flow counter code. - Patches #8 and #9 add HW nexthop counters. - Patch #10 adjusts the HW counter code to allow sharing the same counter for several resilient group buckets with the same NH ID. - Patch #11 adds a selftest. ==================== Link: https://lore.kernel.org/r/cover.1709901020.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c312
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h6
-rw-r--r--include/net/nexthop.h2
-rw-r--r--net/ipv4/nexthop.c3
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh34
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh13
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh13
14 files changed, 505 insertions, 46 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ecde2086c703a4..bb642e9bb6cf8b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -176,13 +176,15 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16);
MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
- unsigned int counter_index, u64 *packets,
- u64 *bytes)
+ unsigned int counter_index, bool clear,
+ u64 *packets, u64 *bytes)
{
+ enum mlxsw_reg_mgpc_opcode op = clear ? MLXSW_REG_MGPC_OPCODE_CLEAR :
+ MLXSW_REG_MGPC_OPCODE_NOP;
char mgpc_pl[MLXSW_REG_MGPC_LEN];
int err;
- mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP,
+ mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, op,
MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 898d24232935db..3beb5d0847ab7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -706,8 +706,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
u16 vid_end, bool is_member, bool untagged);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
- unsigned int counter_index, u64 *packets,
- u64 *bytes);
+ unsigned int counter_index, bool clear,
+ u64 *packets, u64 *bytes);
int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
unsigned int *p_counter_index);
void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index b01b000bc71c14..3e70cee4d2f3e6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -1024,7 +1024,7 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
rulei = mlxsw_sp_acl_rule_rulei(rule);
if (rulei->counter_valid) {
err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index,
- &current_packets,
+ false, &current_packets,
&current_bytes);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index c8a356accdf8ef..ca80af06465f31 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -1181,9 +1181,11 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
char ratr_pl[MLXSW_REG_RATR_LEN];
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_nexthop *nh;
+ unsigned int n_done = 0;
u32 adj_hash_index = 0;
u32 adj_index = 0;
u32 adj_size = 0;
+ int err;
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_is_forward(nh) ||
@@ -1192,15 +1194,27 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
&adj_hash_index);
- if (enable)
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
- else
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ if (enable) {
+ err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ if (err)
+ goto err_counter_enable;
+ } else {
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+ }
mlxsw_sp_nexthop_eth_update(mlxsw_sp,
adj_index + adj_hash_index, nh,
true, ratr_pl);
+ n_done++;
}
return 0;
+
+err_counter_enable:
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+ if (!n_done--)
+ break;
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+ }
+ return err;
}
static u64
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
index 221aa6a474eb10..01d81ae3662a11 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -361,7 +361,7 @@ static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_mr_tcam_route *route = route_priv;
return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index,
- packets, bytes);
+ false, packets, bytes);
}
static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 87617df694ab29..40ba314fbc72ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -19,6 +19,7 @@
#include <linux/net_namespace.h>
#include <linux/mutex.h>
#include <linux/genalloc.h>
+#include <linux/xarray.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
@@ -2251,7 +2252,7 @@ int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
return -EINVAL;
return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
- p_counter, NULL);
+ false, p_counter, NULL);
}
static struct mlxsw_sp_neigh_entry *
@@ -3049,6 +3050,8 @@ struct mlxsw_sp_nexthop_key {
struct fib_nh *fib_nh;
};
+struct mlxsw_sp_nexthop_counter;
+
struct mlxsw_sp_nexthop {
struct list_head neigh_list_node; /* member of neigh entry list */
struct list_head crif_list_node;
@@ -3080,8 +3083,8 @@ struct mlxsw_sp_nexthop {
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
};
- unsigned int counter_index;
- bool counter_valid;
+ struct mlxsw_sp_nexthop_counter *counter;
+ u32 id; /* NH ID for members of a NH object group. */
};
static struct net_device *
@@ -3106,8 +3109,10 @@ struct mlxsw_sp_nexthop_group_info {
int sum_norm_weight;
u8 adj_index_valid:1,
gateway:1, /* routes using the group use a gateway */
- is_resilient:1;
+ is_resilient:1,
+ hw_stats:1;
struct list_head list; /* member in nh_res_grp_list */
+ struct xarray nexthop_counters;
struct mlxsw_sp_nexthop nexthops[] __counted_by(count);
};
@@ -3151,39 +3156,148 @@ struct mlxsw_sp_nexthop_group {
bool can_destroy;
};
-void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_nexthop_counter {
+ unsigned int counter_index;
+ refcount_t ref_count;
+};
+
+static struct mlxsw_sp_nexthop_counter *
+mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nexthop_counter *nhct;
+ int err;
+
+ nhct = kzalloc(sizeof(*nhct), GFP_KERNEL);
+ if (!nhct)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nhct->counter_index);
+ if (err)
+ goto err_counter_alloc;
+
+ refcount_set(&nhct->ref_count, 1);
+ return nhct;
+
+err_counter_alloc:
+ kfree(nhct);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_counter *nhct)
+{
+ mlxsw_sp_flow_counter_free(mlxsw_sp, nhct->counter_index);
+ kfree(nhct);
+}
+
+static struct mlxsw_sp_nexthop_counter *
+mlxsw_sp_nexthop_sh_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
+ struct mlxsw_sp_nexthop_counter *nhct;
+ void *ptr;
+ int err;
+
+ nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id);
+ if (nhct) {
+ refcount_inc(&nhct->ref_count);
+ return nhct;
+ }
+
+ nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp);
+ if (IS_ERR(nhct))
+ return nhct;
+
+ ptr = xa_store(&nh_grp->nhgi->nexthop_counters, nh->id, nhct,
+ GFP_KERNEL);
+ if (IS_ERR(ptr)) {
+ err = PTR_ERR(ptr);
+ goto err_store;
+ }
+
+ return nhct;
+
+err_store:
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nexthop_sh_counter_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
+ struct mlxsw_sp_nexthop_counter *nhct;
+
+ nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id);
+ if (WARN_ON(!nhct))
+ return;
+
+ if (!refcount_dec_and_test(&nhct->ref_count))
+ return;
+
+ xa_erase(&nh_grp->nhgi->nexthop_counters, nh->id);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct);
+}
+
+int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
+ const char *table_adj = MLXSW_SP_DPIPE_TABLE_NAME_ADJ;
+ struct mlxsw_sp_nexthop_counter *nhct;
struct devlink *devlink;
+ bool dpipe_stats;
+
+ if (nh->counter)
+ return 0;
devlink = priv_to_devlink(mlxsw_sp->core);
- if (!devlink_dpipe_table_counter_enabled(devlink,
- MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
- return;
+ dpipe_stats = devlink_dpipe_table_counter_enabled(devlink, table_adj);
+ if (!(nh->nhgi->hw_stats || dpipe_stats))
+ return 0;
- if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
- return;
+ if (nh->id)
+ nhct = mlxsw_sp_nexthop_sh_counter_get(mlxsw_sp, nh);
+ else
+ nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp);
+ if (IS_ERR(nhct))
+ return PTR_ERR(nhct);
- nh->counter_valid = true;
+ nh->counter = nhct;
+ return 0;
}
-void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_nexthop *nh)
+void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
{
- if (!nh->counter_valid)
+ if (!nh->counter)
return;
- mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
- nh->counter_valid = false;
+
+ if (nh->id)
+ mlxsw_sp_nexthop_sh_counter_put(mlxsw_sp, nh);
+ else
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh->counter);
+ nh->counter = NULL;
+}
+
+static int mlxsw_sp_nexthop_counter_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ if (nh->nhgi->hw_stats)
+ return mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+ return 0;
}
int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh, u64 *p_counter)
{
- if (!nh->counter_valid)
+ if (!nh->counter)
return -EINVAL;
- return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
- p_counter, NULL);
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter->counter_index,
+ true, p_counter, NULL);
}
struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
@@ -3656,8 +3770,9 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (nh->counter_valid)
- mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
+ if (nh->counter)
+ mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter->counter_index,
+ true);
else
mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
@@ -3744,6 +3859,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
nh = &nhgi->nexthops[i];
if (!nh->should_offload) {
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
nh->offloaded = 0;
continue;
}
@@ -3751,6 +3867,10 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
if (nh->update || reallocate) {
int err = 0;
+ err = mlxsw_sp_nexthop_counter_update(mlxsw_sp, nh);
+ if (err)
+ return err;
+
err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
true, ratr_pl);
if (err)
@@ -4507,7 +4627,10 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+ err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ if (err)
+ goto err_counter_enable;
+
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
if (!dev)
@@ -4531,7 +4654,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
err_nexthop_neigh_init:
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+err_counter_enable:
mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
return err;
}
@@ -4541,7 +4665,7 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
{
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
}
@@ -5006,9 +5130,9 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
break;
}
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
nh->ifindex = dev->ifindex;
+ nh->id = nh_obj->id;
err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
if (err)
@@ -5030,7 +5154,6 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
err_type_init:
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
return err;
}
@@ -5041,7 +5164,7 @@ static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
nh->should_offload = 0;
}
@@ -5053,6 +5176,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group_info *nhgi;
struct mlxsw_sp_nexthop *nh;
bool is_resilient = false;
+ bool hw_stats = false;
unsigned int nhs;
int err, i;
@@ -5062,9 +5186,11 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
break;
case NH_NOTIFIER_INFO_TYPE_GRP:
nhs = info->nh_grp->num_nh;
+ hw_stats = info->nh_grp->hw_stats;
break;
case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
nhs = info->nh_res_table->num_nh_buckets;
+ hw_stats = info->nh_res_table->hw_stats;
is_resilient = true;
break;
default:
@@ -5079,6 +5205,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
nhgi->is_resilient = is_resilient;
nhgi->count = nhs;
+ nhgi->hw_stats = hw_stats;
+
+ xa_init_flags(&nhgi->nexthop_counters, XA_FLAGS_ALLOC1);
+
for (i = 0; i < nhgi->count; i++) {
struct nh_notifier_single_info *nh_obj;
int weight;
@@ -5161,6 +5291,8 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
}
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
WARN_ON_ONCE(nhgi->adj_index_valid);
+ WARN_ON(!xa_empty(&nhgi->nexthop_counters));
+ xa_destroy(&nhgi->nexthop_counters);
kfree(nhgi);
}
@@ -5300,6 +5432,43 @@ err_out:
return err;
}
+static int mlxsw_sp_nexthop_obj_res_group_pre(struct mlxsw_sp *mlxsw_sp,
+ struct nh_notifier_info *info)
+{
+ struct nh_notifier_grp_info *grp_info = info->nh_grp;
+ struct mlxsw_sp_nexthop_group_info *nhgi;
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ int err;
+ int i;
+
+ nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
+ if (!nh_grp)
+ return 0;
+ nhgi = nh_grp->nhgi;
+
+ if (nhgi->hw_stats == grp_info->hw_stats)
+ return 0;
+
+ nhgi->hw_stats = grp_info->hw_stats;
+
+ for (i = 0; i < nhgi->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
+
+ if (nh->offloaded)
+ nh->update = 1;
+ }
+
+ err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ if (err)
+ goto err_group_refresh;
+
+ return 0;
+
+err_group_refresh:
+ nhgi->hw_stats = !grp_info->hw_stats;
+ return err;
+}
+
static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
struct nh_notifier_info *info)
{
@@ -5476,6 +5645,79 @@ err_nexthop_obj_init:
return err;
}
+static void
+mlxsw_sp_nexthop_obj_mp_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group_info *nhgi,
+ struct nh_notifier_grp_hw_stats_info *info)
+{
+ int nhi;
+
+ for (nhi = 0; nhi < info->num_nh; nhi++) {
+ struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[nhi];
+ u64 packets;
+ int err;
+
+ err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets);
+ if (err)
+ continue;
+
+ nh_grp_hw_stats_report_delta(info, nhi, packets);
+ }
+}
+
+static void
+mlxsw_sp_nexthop_obj_res_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group_info *nhgi,
+ struct nh_notifier_grp_hw_stats_info *info)
+{
+ int nhi = -1;
+ int bucket;
+
+ for (bucket = 0; bucket < nhgi->count; bucket++) {
+ struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[bucket];
+ u64 packets;
+ int err;
+
+ if (nhi == -1 || info->stats[nhi].id != nh->id) {
+ for (nhi = 0; nhi < info->num_nh; nhi++)
+ if (info->stats[nhi].id == nh->id)
+ break;
+ if (WARN_ON_ONCE(nhi == info->num_nh)) {
+ nhi = -1;
+ continue;
+ }
+ }
+
+ err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets);
+ if (err)
+ continue;
+
+ nh_grp_hw_stats_report_delta(info, nhi, packets);
+ }
+}
+
+static void mlxsw_sp_nexthop_obj_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
+ struct nh_notifier_info *info)
+{
+ struct mlxsw_sp_nexthop_group_info *nhgi;
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ if (info->type != NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS)
+ return;
+
+ nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
+ if (!nh_grp)
+ return;
+ nhgi = nh_grp->nhgi;
+
+ if (nhgi->is_resilient)
+ mlxsw_sp_nexthop_obj_res_hw_stats_get(mlxsw_sp, nhgi,
+ info->nh_grp_hw_stats);
+ else
+ mlxsw_sp_nexthop_obj_mp_hw_stats_get(mlxsw_sp, nhgi,
+ info->nh_grp_hw_stats);
+}
+
static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -5491,6 +5733,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
mutex_lock(&router->lock);
switch (event) {
+ case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE:
+ err = mlxsw_sp_nexthop_obj_res_group_pre(router->mlxsw_sp,
+ info);
+ break;
case NEXTHOP_EVENT_REPLACE:
err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
break;
@@ -5501,6 +5747,9 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
info);
break;
+ case NEXTHOP_EVENT_HW_STATS_REPORT_DELTA:
+ mlxsw_sp_nexthop_obj_hw_stats_get(router->mlxsw_sp, info);
+ break;
default:
break;
}
@@ -6734,7 +6983,10 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
#if IS_ENABLED(CONFIG_IPV6)
nh->neigh_tbl = &nd_tbl;
#endif
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+
+ err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ if (err)
+ return err;
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -6750,7 +7002,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
err_nexthop_type_init:
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
return err;
}
@@ -6759,7 +7011,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
{
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
}
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index ed3b628caafe5c..0432c7cc6b07af 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -156,10 +156,10 @@ int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl);
-void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh);
-void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh);
static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
const union mlxsw_sp_l3addr *addr2)
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 7ec9cc80f11c72..7ca315ad500e7f 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -185,6 +185,7 @@ struct nh_notifier_single_info {
__be32 ipv4;
struct in6_addr ipv6;
};
+ u32 id;
u8 is_reject:1,
is_fdb:1,
has_encap:1;
@@ -192,7 +193,6 @@ struct nh_notifier_single_info {
struct nh_notifier_grp_entry_info {
u8 weight;
- u32 id;
struct nh_notifier_single_info nh;
};
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5eb3ba568f4e74..73b849be65042a 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -104,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
+ nh_info->id = nhi->nh_parent->id;
nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
@@ -150,7 +151,6 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
struct nh_info *nhi;
nhi = rtnl_dereference(nhge->nh->nh_info);
- info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhi);
@@ -407,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
struct nh_notifier_info info = {
.net = net,
.extack = extack,
+ .id = nh->id,
};
struct nh_group *nhg;
int err;
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index cdefc9a5ec340b..535865b3d1d6cc 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -123,6 +123,7 @@ TEST_FILES := devlink_lib.sh \
mirror_gre_topo_lib.sh \
mirror_lib.sh \
mirror_topo_lib.sh \
+ router_mpath_nh_lib.sh \
sch_ets_core.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index d1bf39eaf2b361..e579c2e0c462af 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -900,6 +900,33 @@ hw_stats_get()
jq ".[0].stats64.$dir.$stat"
}
+__nh_stats_get()
+{
+ local key=$1; shift
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ ip -j -s -s nexthop show id $group_id |
+ jq --argjson member_id "$member_id" --arg key "$key" \
+ '.[].group_stats[] | select(.id == $member_id) | .[$key]'
+}
+
+nh_stats_get()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets "$group_id" "$member_id"
+}
+
+nh_stats_get_hw()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets_hw "$group_id" "$member_id"
+}
+
humanize()
{
local speed=$1; shift
@@ -2010,3 +2037,10 @@ bail_on_lldpad()
fi
fi
}
+
+absval()
+{
+ local v=$1; shift
+
+ echo $((v > 0 ? v : -v))
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 982e0d098ea9a3..3f0f5dc95542be 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -7,9 +7,12 @@ ALL_TESTS="
multipath_test
ping_ipv4_blackhole
ping_ipv6_blackhole
+ nh_stats_test_v4
+ nh_stats_test_v6
"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -325,6 +328,16 @@ ping_ipv6_blackhole()
ip -6 nexthop del id 1001
}
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 mpath
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 mpath
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
new file mode 100644
index 00000000000000..7e7d62161c345a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+nh_stats_do_test()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local stats_get=$1; shift
+ local mz="$@"
+
+ local dp
+
+ RET=0
+
+ sleep 2
+ for ((dp=0; dp < 60000; dp += 10000)); do
+ local dd
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+ local t0_rp13=$(link_stats_tx_packets_get $rp13)
+ local t0_nh1=$($stats_get $group_id $nh1_id)
+ local t0_nh2=$($stats_get $group_id $nh2_id)
+
+ ip vrf exec vrf-h1 \
+ $mz -q -p 64 -d 0 -t udp \
+ "sp=1024,dp=$((dp))-$((dp + 10000))"
+ sleep 2
+
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+ local t1_rp13=$(link_stats_tx_packets_get $rp13)
+ local t1_nh1=$($stats_get $group_id $nh1_id)
+ local t1_nh2=$($stats_get $group_id $nh2_id)
+
+ local d_rp12=$((t1_rp12 - t0_rp12))
+ local d_rp13=$((t1_rp13 - t0_rp13))
+ local d_nh1=$((t1_nh1 - t0_nh1))
+ local d_nh2=$((t1_nh2 - t0_nh2))
+
+ dd=$(absval $((d_rp12 - d_nh1)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1"
+
+ dd=$(absval $((d_rp13 - d_nh2)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2"
+ done
+
+ log_test "NH stats test $what"
+}
+
+nh_stats_test_dispatch_swhw()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local used
+
+ nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get "${mz[@]}"
+
+ used=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.used')
+ kind=$(ip -j -d link show dev $rp11 |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $used == true ]]; then
+ nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get_hw "${mz[@]}"
+ elif [[ $kind == veth ]]; then
+ log_test_skip "HW stats not offloaded on veth topology"
+ fi
+}
+
+nh_stats_test_dispatch()
+{
+ local nhgtype=$1; shift
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local enabled
+ local kind
+
+ if ! ip nexthop help 2>&1 | grep -q hw_stats; then
+ log_test_skip "NH stats test: ip doesn't support HW stats"
+ return
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats on type $nhgtype
+ enabled=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.enabled')
+ if [[ $enabled == true ]]; then
+ nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \
+ "$group_id" "${mz[@]}"
+ elif [[ $enabled == false ]]; then
+ check_err 1 "HW stats still disabled after enabling"
+ log_test "NH stats test"
+ else
+ log_test_skip "NH stats test: ip doesn't report hw_stats info"
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats off type $nhgtype
+}
+
+__nh_stats_test_v4()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \
+ $MZ $h1 -A 192.0.2.2 -B 198.51.100.2
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+__nh_stats_test_v6()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \
+ $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index a60ff54723b7c5..4b483d24ad007a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -5,9 +5,12 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ nh_stats_test_v4
+ nh_stats_test_v6
"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -333,6 +336,16 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient
}
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 resilient
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 resilient
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}