aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2021-12-07 23:47:35 +0100
committerDaniel Borkmann <daniel@iogearbox.net>2021-12-10 12:09:07 +0100
commit98ef069346bd1888636c8d87af25790e820472cc (patch)
tree7bbfbcd4817962a8fde6b48ce18c8c621db75750
parentdb52f57211b4e45f0ebb274e2c877b211dc18591 (diff)
downloadbpf-pr/bpf-tstamps.tar.gz
bpf, net: xfer and access of egress tstamps for pacingpr/bpf-tstamps
Discussion: https://lore.kernel.org/netdev/20211207020102.3690724-1-kafai@fb.com/ Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/filter.c9
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/skbuff.c15
6 files changed, 42 insertions, 11 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec42495a43a5..df9141f92bbff 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2172,6 +2172,7 @@ struct net_device {
struct timer_list watchdog_timer;
int watchdog_timeo;
+ u32 xnet_flush_tstamp;
u32 proto_down_reason;
struct list_head todo_list;
@@ -4137,7 +4138,8 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
return NET_RX_DROP;
}
- skb_scrub_packet(skb, !net_eq(dev_net(dev), dev_net(skb->dev)));
+ __skb_scrub_packet(skb, !net_eq(dev_net(dev), dev_net(skb->dev)),
+ READ_ONCE(dev->xnet_flush_tstamp));
skb->priority = 0;
return 0;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 686a666d073d5..09b670bcd7fdb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3688,7 +3688,11 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
int len, int hlen);
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
-void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+void __skb_scrub_packet(struct sk_buff *skb, bool xnet, bool tstamp);
+static __always_inline void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+{
+ __skb_scrub_packet(skb, xnet, true);
+}
bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
diff --git a/net/core/dev.c b/net/core/dev.c
index 15ac064b5562d..1678032bd5a37 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10853,6 +10853,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
+ dev->xnet_flush_tstamp = 1;
#ifdef CONFIG_LOCKDEP
dev->nested_level = 0;
INIT_LIST_HEAD(&dev->unlink_list);
diff --git a/net/core/filter.c b/net/core/filter.c
index fe27c91e37580..69366af42141c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2107,7 +2107,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
- skb->tstamp = 0;
+ if (READ_ONCE(dev->xnet_flush_tstamp))
+ skb->tstamp = 0;
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -2176,7 +2177,8 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ if (READ_ONCE(dev->xnet_flush_tstamp))
+ skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2274,7 +2276,8 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ if (READ_ONCE(dev->xnet_flush_tstamp))
+ skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9c01c642cf9ef..d8ad9dbbbf55b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -403,6 +403,23 @@ static ssize_t gro_flush_timeout_store(struct device *dev,
}
NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
+static int change_xnet_flush_tstamp(struct net_device *dev, unsigned long val)
+{
+ WRITE_ONCE(dev->xnet_flush_tstamp, val);
+ return 0;
+}
+
+static ssize_t xnet_flush_tstamp_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ return netdev_store(dev, attr, buf, len, change_xnet_flush_tstamp);
+}
+NETDEVICE_SHOW_RW(xnet_flush_tstamp, fmt_dec);
+
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
WRITE_ONCE(dev->napi_defer_hard_irqs, val);
@@ -651,6 +668,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_flags.attr,
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
+ &dev_attr_xnet_flush_tstamp.attr,
&dev_attr_napi_defer_hard_irqs.attr,
&dev_attr_phys_port_id.attr,
&dev_attr_phys_port_name.attr,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ba2f38246f07e..b0f6b96c7b2a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5440,19 +5440,21 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
EXPORT_SYMBOL(skb_try_coalesce);
/**
- * skb_scrub_packet - scrub an skb
+ * __skb_scrub_packet - scrub an skb
*
* @skb: buffer to clean
* @xnet: packet is crossing netns
+ * @tstamp: timestamp needs scrubbing
*
- * skb_scrub_packet can be used after encapsulating or decapsulting a packet
+ * __skb_scrub_packet can be used after encapsulating or decapsulting a packet
* into/from a tunnel. Some information have to be cleared during these
* operations.
- * skb_scrub_packet can also be used to clean a skb before injecting it in
+ *
+ * __skb_scrub_packet can also be used to clean a skb before injecting it in
* another namespace (@xnet == true). We have to clear all information in the
* skb that could impact namespace isolation.
*/
-void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+void __skb_scrub_packet(struct sk_buff *skb, bool xnet, bool tstamp)
{
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
@@ -5472,9 +5474,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
ipvs_reset(skb);
skb->mark = 0;
- skb->tstamp = 0;
+ if (tstamp)
+ skb->tstamp = 0;
}
-EXPORT_SYMBOL_GPL(skb_scrub_packet);
+EXPORT_SYMBOL_GPL(__skb_scrub_packet);
/**
* skb_gso_transport_seglen - Return length of individual segments of a gso packet