aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-02-16 10:39:30 +0100
committerPaolo Abeni <pabeni@redhat.com>2023-02-16 10:39:30 +0100
commite9ab2559e2c501593c58a02a956a6005a2a749fb (patch)
tree45bc5299e267357a6b3732102e4158227138b209
parent10d13421a6ae414ab91d4f2103fc90d1f4216736 (diff)
parent2d2e75d2d4a2245175d77899764b56e19c5769b4 (diff)
downloadwpan-next-e9ab2559e2c501593c58a02a956a6005a2a749fb.tar.gz
Merge branch 'net-sched-transition-actions-to-pcpu-stats-and-rcu'
Pedro Tammela says: ==================== net/sched: transition actions to pcpu stats and rcu Following the work done for act_pedit[0], transition the remaining tc actions to percpu stats and rcu, whenever possible. Percpu stats make updating the action stats very cheap, while combining it with rcu action parameters makes it possible to get rid of the per action lock in the datapath. For act_connmark and act_nat we run the following tests: - tc filter add dev ens2f0 ingress matchall action connmark - tc filter add dev ens2f0 ingress matchall action nat ingress any 10.10.10.10 Our setup consists of a 26 cores Intel CPU and a 25G NIC. We use TRex to shoot 10mpps TCP packets and take perf measurements. Both actions improved performance as expected since the datapath lock disappeared. For act_pedit we move the drop counter to percpu, when available. For act_gate we move the counters to percpu, when available. [0] https://lore.kernel.org/all/20230131145149.3776656-1-pctammela@mojatatu.com/ ==================== Link: https://lore.kernel.org/r/20230214211534.735718-1-pctammela@mojatatu.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/net/tc_act/tc_connmark.h9
-rw-r--r--include/net/tc_act/tc_nat.h10
-rw-r--r--net/sched/act_connmark.c107
-rw-r--r--net/sched/act_gate.c30
-rw-r--r--net/sched/act_nat.c72
-rw-r--r--net/sched/act_pedit.c4
6 files changed, 148 insertions, 84 deletions
diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
index 1f4cb477bb5d61..e8dd77a9674803 100644
--- a/include/net/tc_act/tc_connmark.h
+++ b/include/net/tc_act/tc_connmark.h
@@ -4,10 +4,15 @@
#include <net/act_api.h>
-struct tcf_connmark_info {
- struct tc_action common;
+struct tcf_connmark_parms {
struct net *net;
u16 zone;
+ struct rcu_head rcu;
+};
+
+struct tcf_connmark_info {
+ struct tc_action common;
+ struct tcf_connmark_parms __rcu *parms;
};
#define to_connmark(a) ((struct tcf_connmark_info *)a)
diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h
index c1440716081287..c869274ac529b2 100644
--- a/include/net/tc_act/tc_nat.h
+++ b/include/net/tc_act/tc_nat.h
@@ -5,13 +5,17 @@
#include <linux/types.h>
#include <net/act_api.h>
-struct tcf_nat {
- struct tc_action common;
-
+struct tcf_nat_parms {
__be32 old_addr;
__be32 new_addr;
__be32 mask;
u32 flags;
+ struct rcu_head rcu;
+};
+
+struct tcf_nat {
+ struct tc_action common;
+ struct tcf_nat_parms __rcu *parms;
};
#define to_tcf_nat(a) ((struct tcf_nat *)a)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 7e63ff7e3ed7a3..8dabfb52ea3db3 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -36,13 +36,15 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = to_connmark(a);
+ struct tcf_connmark_parms *parms;
struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
- spin_lock(&ca->tcf_lock);
tcf_lastuse_update(&ca->tcf_tm);
- bstats_update(&ca->tcf_bstats, skb);
+ tcf_action_update_bstats(&ca->common, skb);
+
+ parms = rcu_dereference_bh(ca->parms);
switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
@@ -64,31 +66,29 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
c = nf_ct_get(skb, &ctinfo);
if (c) {
skb->mark = READ_ONCE(c->mark);
- /* using overlimits stats to count how many packets marked */
- ca->tcf_qstats.overlimits++;
- goto out;
+ goto count;
}
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- proto, ca->net, &tuple))
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net,
+ &tuple))
goto out;
- zone.id = ca->zone;
+ zone.id = parms->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;
- thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
+ thash = nf_conntrack_find_get(parms->net, &zone, &tuple);
if (!thash)
goto out;
c = nf_ct_tuplehash_to_ctrack(thash);
- /* using overlimits stats to count how many packets marked */
- ca->tcf_qstats.overlimits++;
skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);
+count:
+ /* using overlimits stats to count how many packets marked */
+ tcf_action_inc_overlimit_qstats(&ca->common);
out:
- spin_unlock(&ca->tcf_lock);
- return ca->tcf_action;
+ return READ_ONCE(ca->tcf_action);
}
static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
@@ -101,6 +101,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
+ struct tcf_connmark_parms *nparms, *oparms;
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
@@ -120,52 +121,66 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_CONNMARK_PARMS])
return -EINVAL;
+ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+ if (!nparms)
+ return -ENOMEM;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
index = parm->index;
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_connmark_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_connmark_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
- return ret;
+ err = ret;
+ goto out_free;
}
ci = to_connmark(*a);
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
- extack);
- if (err < 0)
- goto release_idr;
- tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- ci->net = net;
- ci->zone = parm->zone;
+
+ nparms->net = net;
+ nparms->zone = parm->zone;
ret = ACT_P_CREATED;
} else if (ret > 0) {
ci = to_connmark(*a);
- if (bind)
- return 0;
- if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ if (bind) {
+ err = 0;
+ goto out_free;
}
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
- extack);
- if (err < 0)
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ err = -EEXIST;
goto release_idr;
- /* replacing action and zone */
- spin_lock_bh(&ci->tcf_lock);
- goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- ci->zone = parm->zone;
- spin_unlock_bh(&ci->tcf_lock);
- if (goto_ch)
- tcf_chain_put_by_act(goto_ch);
+ }
+
+ nparms->net = rtnl_dereference(ci->parms)->net;
+ nparms->zone = parm->zone;
+
ret = 0;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ spin_lock_bh(&ci->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock));
+ spin_unlock_bh(&ci->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ if (oparms)
+ kfree_rcu(oparms, rcu);
+
return ret;
+
release_idr:
tcf_idr_release(*a, bind);
+out_free:
+ kfree(nparms);
return err;
}
@@ -179,11 +194,14 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
+ struct tcf_connmark_parms *parms;
struct tcf_t t;
spin_lock_bh(&ci->tcf_lock);
+ parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock));
+
opt.action = ci->tcf_action;
- opt.zone = ci->zone;
+ opt.zone = parms->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -201,6 +219,16 @@ nla_put_failure:
return -1;
}
+static void tcf_connmark_cleanup(struct tc_action *a)
+{
+ struct tcf_connmark_info *ci = to_connmark(a);
+ struct tcf_connmark_parms *parms;
+
+ parms = rcu_dereference_protected(ci->parms, 1);
+ if (parms)
+ kfree_rcu(parms, rcu);
+}
+
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.id = TCA_ID_CONNMARK,
@@ -208,6 +236,7 @@ static struct tc_action_ops act_connmark_ops = {
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
+ .cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 9b8def0be41e99..c9a811f4c7eeed 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -119,35 +119,37 @@ TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
-
- spin_lock(&gact->tcf_lock);
+ int action = READ_ONCE(gact->tcf_action);
tcf_lastuse_update(&gact->tcf_tm);
- bstats_update(&gact->tcf_bstats, skb);
+ tcf_action_update_bstats(&gact->common, skb);
+ spin_lock(&gact->tcf_lock);
if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
spin_unlock(&gact->tcf_lock);
- return gact->tcf_action;
+ return action;
}
- if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+ if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) {
+ spin_unlock(&gact->tcf_lock);
goto drop;
+ }
if (gact->current_max_octets >= 0) {
gact->current_entry_octets += qdisc_pkt_len(skb);
if (gact->current_entry_octets > gact->current_max_octets) {
- gact->tcf_qstats.overlimits++;
- goto drop;
+ spin_unlock(&gact->tcf_lock);
+ goto overlimit;
}
}
-
spin_unlock(&gact->tcf_lock);
- return gact->tcf_action;
-drop:
- gact->tcf_qstats.drops++;
- spin_unlock(&gact->tcf_lock);
+ return action;
+overlimit:
+ tcf_action_inc_overlimit_qstats(&gact->common);
+drop:
+ tcf_action_inc_drop_qstats(&gact->common);
return TC_ACT_SHOT;
}
@@ -357,8 +359,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return 0;
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_gate_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_gate_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 74c74be3304844..4184af5abbf36b 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -38,6 +38,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
+ struct tcf_nat_parms *nparm, *oparm;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
@@ -59,8 +60,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_nat_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a, &act_nat_ops,
+ bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -79,19 +80,31 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
+
+ nparm = kzalloc(sizeof(*nparm), GFP_KERNEL);
+ if (!nparm) {
+ err = -ENOMEM;
+ goto release_idr;
+ }
+
+ nparm->old_addr = parm->old_addr;
+ nparm->new_addr = parm->new_addr;
+ nparm->mask = parm->mask;
+ nparm->flags = parm->flags;
+
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
- p->old_addr = parm->old_addr;
- p->new_addr = parm->new_addr;
- p->mask = parm->mask;
- p->flags = parm->flags;
-
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparm = rcu_replace_pointer(p->parms, nparm, lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
+
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+ if (oparm)
+ kfree_rcu(oparm, rcu);
+
return ret;
release_idr:
tcf_idr_release(*a, bind);
@@ -103,6 +116,7 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_nat *p = to_tcf_nat(a);
+ struct tcf_nat_parms *parms;
struct iphdr *iph;
__be32 old_addr;
__be32 new_addr;
@@ -113,18 +127,16 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
int ihl;
int noff;
- spin_lock(&p->tcf_lock);
-
tcf_lastuse_update(&p->tcf_tm);
- old_addr = p->old_addr;
- new_addr = p->new_addr;
- mask = p->mask;
- egress = p->flags & TCA_NAT_FLAG_EGRESS;
- action = p->tcf_action;
+ tcf_action_update_bstats(&p->common, skb);
- bstats_update(&p->tcf_bstats, skb);
+ action = READ_ONCE(p->tcf_action);
- spin_unlock(&p->tcf_lock);
+ parms = rcu_dereference_bh(p->parms);
+ old_addr = parms->old_addr;
+ new_addr = parms->new_addr;
+ mask = parms->mask;
+ egress = parms->flags & TCA_NAT_FLAG_EGRESS;
if (unlikely(action == TC_ACT_SHOT))
goto drop;
@@ -248,9 +260,7 @@ out:
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
+ tcf_action_inc_drop_qstats(&p->common);
return TC_ACT_SHOT;
}
@@ -264,15 +274,20 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
+ struct tcf_nat_parms *parms;
struct tcf_t t;
spin_lock_bh(&p->tcf_lock);
- opt.old_addr = p->old_addr;
- opt.new_addr = p->new_addr;
- opt.mask = p->mask;
- opt.flags = p->flags;
+
opt.action = p->tcf_action;
+ parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock));
+
+ opt.old_addr = parms->old_addr;
+ opt.new_addr = parms->new_addr;
+ opt.mask = parms->mask;
+ opt.flags = parms->flags;
+
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -289,6 +304,16 @@ nla_put_failure:
return -1;
}
+static void tcf_nat_cleanup(struct tc_action *a)
+{
+ struct tcf_nat *p = to_tcf_nat(a);
+ struct tcf_nat_parms *parms;
+
+ parms = rcu_dereference_protected(p->parms, 1);
+ if (parms)
+ kfree_rcu(parms, rcu);
+}
+
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.id = TCA_ID_NAT,
@@ -296,6 +321,7 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat_act,
.dump = tcf_nat_dump,
.init = tcf_nat_init,
+ .cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 35ebe5d5c261aa..77d288d384ae63 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -443,9 +443,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
goto done;
bad:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.overlimits++;
- spin_unlock(&p->tcf_lock);
+ tcf_action_inc_overlimit_qstats(&p->common);
done:
return p->tcf_action;
}