aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2021-09-16 12:33:25 +0000
committerDaniel Borkmann <daniel@iogearbox.net>2021-09-17 00:31:03 +0200
commitf7d619a946e981177777983af26e9e31163ffb38 (patch)
tree5230a4eed85fef015778eb9d0cd8ffd55a81e443
parent3a029e1f3d6e2ee809e85abecce619a48016bd4b (diff)
downloadbpf-pr/bpf-tstamp.tar.gz
net: skb clock basespr/bpf-tstamp
TODO: - remove all skb_mstamp_ns users - move __net_timestamp from input path to later in the stack so that tc ingress doesn't hit/override it yet Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/loopback.c3
-rw-r--r--include/linux/skbuff.h35
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv6/ip6_output.c1
-rw-r--r--net/netfilter/xt_time.c3
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/sched/sch_fq.c3
-rw-r--r--net/xfrm/xfrm_interface.c1
13 files changed, 38 insertions, 34 deletions
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a1c77cc0041657..83ec069670532f 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -72,9 +72,6 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
skb_tx_timestamp(skb);
- /* do not fool net_timestamp_check() with various clock bases */
- skb->tstamp = 0;
-
skb_orphan(skb);
/* Before queueing this packet to netif_rx(),
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db3e8258a..c2e0b977569b29 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -611,13 +611,15 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
+/* skb->tstamp clock bases: */
+#define SKB_TSTAMP_BASE_TAI 0
+#define SKB_TSTAMP_BASE_MONO 1
+
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
* @prev: Previous buffer in list
* @tstamp: Time we arrived/left
- * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
- * for retransmit timer
* @rbnode: RB tree node, alternative to next/prev for netem/tcp
* @list: queue head
* @sk: Socket we are owned by
@@ -743,10 +745,9 @@ struct sk_buff {
int ip_defrag_offset;
};
- union {
- ktime_t tstamp;
- u64 skb_mstamp_ns; /* earliest departure time */
- };
+ /* tstamp_base bit clarifies the clock base (monotonic vs tai) */
+ ktime_t tstamp;
+
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
@@ -872,6 +873,7 @@ struct sk_buff {
__u8 decrypted:1;
#endif
__u8 slow_gro:1;
+ __u8 tstamp_base:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -3812,9 +3814,26 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb,
stamp->tv_nsec = ts.tv_nsec;
}
+static inline bool skb_has_tstamp_mono(const struct sk_buff *skb)
+{
+ return skb->tstamp_base == SKB_TSTAMP_BASE_MONO;
+}
+
+static inline void skb_set_tstamp_mono(struct sk_buff *skb, ktime_t time)
+{
+ skb->tstamp = time;
+ skb->tstamp_base = SKB_TSTAMP_BASE_MONO;
+}
+
+static inline void skb_set_tstamp_tai(struct sk_buff *skb, ktime_t time)
+{
+ skb->tstamp = time;
+ skb->tstamp_base = SKB_TSTAMP_BASE_TAI;
+}
+
static inline void __net_timestamp(struct sk_buff *skb)
{
- skb->tstamp = ktime_get_real();
+ skb_set_tstamp_tai(skb, ktime_get_real());
}
static inline ktime_t net_timedelta(ktime_t t)
@@ -4679,8 +4698,6 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
skb->redirected = 1;
#ifdef CONFIG_NET_REDIRECT
skb->from_ingress = from_ingress;
- if (skb->from_ingress)
- skb->tstamp = 0;
#endif
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ec646656dbf14d..32ec8debca805f 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -62,7 +62,6 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->tstamp = 0;
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26ddeb..dc17b454426822 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2137,7 +2137,7 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
- skb->tstamp = 0;
+ skb_set_tstamp_tai(skb, 0);
if (static_branch_unlikely(&netstamp_needed_key))
__net_timestamp(skb);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e32cee2c46900..59dfc3fa1c2d10 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2108,7 +2108,6 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
- skb->tstamp = 0;
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -2177,7 +2176,6 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2275,7 +2273,6 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f9311762cc475b..4afceb338083d7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5467,7 +5467,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
ipvs_reset(skb);
skb->mark = 0;
- skb->tstamp = 0;
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 00ec819f949b5e..06ee4696703c0c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -79,7 +79,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
if (unlikely(opt->optlen))
ip_forward_options(skb);
- skb->tstamp = 0;
return dst_output(net, sk, skb);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6d72f3ea48c4ef..224770d6e8ffc5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1254,7 +1254,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp = tcp_sk(sk);
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ skb_set_tstamp_mono(skb, tp->tcp_wstamp_ns);
if (clone_it) {
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
@@ -2625,8 +2625,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unsigned int limit;
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
- /* "skb_mstamp_ns" is used as a start point for the retransmit timer */
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ /* tstamp is used as a start point for the retransmit timer */
+ tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ skb_set_tstamp_mono(skb, tp->tcp_wstamp_ns);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
goto repair; /* Skip network transmission */
@@ -3556,11 +3557,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
- skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
+ skb_set_tstamp_mono(skb, cookie_init_timestamp(req, now));
else
#endif
{
- skb->skb_mstamp_ns = now;
+ skb_set_tstamp_mono(skb, now)
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3609,7 +3610,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
synack_type, &opts);
- skb->skb_mstamp_ns = now;
+ skb_set_tstamp_mono(skb, now);
tcp_add_tx_delay(skb, tp);
return skb;
@@ -3786,9 +3787,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
tcp_chrono_start(sk, TCP_CHRONO_BUSY);
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
-
- syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
-
+ skb_set_tstamp_mono(syn, syn_data->skb_mstamp_ns);
/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
* we keep in write queue in case of a retransmit, as we
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 12f985f43bccfb..7ee1c861805974 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -440,7 +440,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
}
#endif
- skb->tstamp = 0;
return dst_output(net, sk, skb);
}
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 6aa12d0f54e23c..478183a792d6a9 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -166,8 +166,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
* We need real time here, but we can neither use skb->tstamp
* nor __net_timestamp().
*
- * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
- * use different clock types (real vs monotonic).
+ * skb->tstamp uses different clock types (real vs monotonic).
*
* Suppose you have two rules:
* 1. match before 13:00
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index cf2ce581248968..88deb5b41429f5 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -507,7 +507,6 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
}
skb->dev = vport->dev;
- skb->tstamp = 0;
vport->ops->send(skb);
return;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 2fb76fc0cc31bf..982c2d43bfde8e 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -448,7 +448,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free);
- if (!skb->tstamp) {
+ if (!skb_has_tstamp_mono(skb) || !skb->tstamp) {
+ skb_set_tstamp_mono(skb, 0);
fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
} else {
/* Check if packet timestamp is too far in the future.
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 41de46b5ffa94a..df5c3dd628b6eb 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -190,7 +190,6 @@ static void xfrmi_dev_uninit(struct net_device *dev)
static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
{
- skb->tstamp = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;