diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2021-09-16 12:33:25 +0000 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2021-09-17 00:31:03 +0200 |
commit | f7d619a946e981177777983af26e9e31163ffb38 (patch) | |
tree | 5230a4eed85fef015778eb9d0cd8ffd55a81e443 | |
parent | 3a029e1f3d6e2ee809e85abecce619a48016bd4b (diff) | |
download | bpf-pr/bpf-tstamp.tar.gz |
net: skb clock basespr/bpf-tstamp
TODO:
- remove all skb_mstamp_ns users
- move __net_timestamp from input path to later in the stack
so that tc ingress doesn't hit/override it yet
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | drivers/net/loopback.c | 3 | ||||
-rw-r--r-- | include/linux/skbuff.h | 35 | ||||
-rw-r--r-- | net/bridge/br_forward.c | 1 | ||||
-rw-r--r-- | net/core/dev.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 17 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 1 | ||||
-rw-r--r-- | net/netfilter/xt_time.c | 3 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 1 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 3 | ||||
-rw-r--r-- | net/xfrm/xfrm_interface.c | 1 |
13 files changed, 38 insertions, 34 deletions
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a1c77cc0041657..83ec069670532f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -72,9 +72,6 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); - /* do not fool net_timestamp_check() with various clock bases */ - skb->tstamp = 0; - skb_orphan(skb); /* Before queueing this packet to netif_rx(), diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6bdb0db3e8258a..c2e0b977569b29 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -611,13 +611,15 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +/* skb->tstamp clock bases: */ +#define SKB_TSTAMP_BASE_TAI 0 +#define SKB_TSTAMP_BASE_MONO 1 + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left - * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point - * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @list: queue head * @sk: Socket we are owned by @@ -743,10 +745,9 @@ struct sk_buff { int ip_defrag_offset; }; - union { - ktime_t tstamp; - u64 skb_mstamp_ns; /* earliest departure time */ - }; + /* tstamp_base bit clarifies the clock base (monotonic vs tai) */ + ktime_t tstamp; + /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -872,6 +873,7 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; + __u8 tstamp_base:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -3812,9 +3814,26 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, stamp->tv_nsec = ts.tv_nsec; } +static inline bool skb_has_tstamp_mono(const struct sk_buff *skb) +{ + return skb->tstamp_base == SKB_TSTAMP_BASE_MONO; +} + +static inline void skb_set_tstamp_mono(struct sk_buff *skb, ktime_t time) +{ + skb->tstamp = time; + skb->tstamp_base = SKB_TSTAMP_BASE_MONO; +} + +static inline void skb_set_tstamp_tai(struct sk_buff *skb, ktime_t time) +{ + skb->tstamp = time; + skb->tstamp_base = SKB_TSTAMP_BASE_TAI; +} + static inline void __net_timestamp(struct sk_buff *skb) { - skb->tstamp = ktime_get_real(); + skb_set_tstamp_tai(skb, ktime_get_real()); } static inline ktime_t net_timedelta(ktime_t t) @@ -4679,8 +4698,6 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) skb->redirected = 1; #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; - if (skb->from_ingress) - skb->tstamp = 0; #endif } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ec646656dbf14d..32ec8debca805f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -62,7 +62,6 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->tstamp = 0; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); diff --git a/net/core/dev.c b/net/core/dev.c index 74fd402d26ddeb..dc17b454426822 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2137,7 +2137,7 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { - skb->tstamp = 0; + skb_set_tstamp_tai(skb, 0); if (static_branch_unlikely(&netstamp_needed_key)) __net_timestamp(skb); } diff --git a/net/core/filter.c b/net/core/filter.c index 2e32cee2c46900..59dfc3fa1c2d10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2108,7 +2108,6 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) } skb->dev = dev; - skb->tstamp = 0; dev_xmit_recursion_inc(); ret = dev_queue_xmit(skb); @@ -2177,7 +2176,6 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, } skb->dev = dev; - skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); @@ -2275,7 +2273,6 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, } skb->dev = dev; - skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f9311762cc475b..4afceb338083d7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5467,7 +5467,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) ipvs_reset(skb); skb->mark = 0; - skb->tstamp = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 00ec819f949b5e..06ee4696703c0c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -79,7 +79,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(opt->optlen)) ip_forward_options(skb); - skb->tstamp = 0; return dst_output(net, sk, skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6d72f3ea48c4ef..224770d6e8ffc5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1254,7 +1254,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp = tcp_sk(sk); prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; + skb_set_tstamp_mono(skb, tp->tcp_wstamp_ns); if (clone_it) { TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; @@ -2625,8 +2625,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unsigned int limit; if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { - /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ - skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; + /* tstamp is used as a start point for the retransmit timer */ + tp->tcp_wstamp_ns = tp->tcp_clock_cache; + skb_set_tstamp_mono(skb, tp->tcp_wstamp_ns); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ @@ -3556,11 +3557,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) - skb->skb_mstamp_ns = cookie_init_timestamp(req, now); + skb_set_tstamp_mono(skb, cookie_init_timestamp(req, now)); else #endif { - skb->skb_mstamp_ns = now; + skb_set_tstamp_mono(skb, now) if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3609,7 +3610,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts); - skb->skb_mstamp_ns = now; + skb_set_tstamp_mono(skb, now); tcp_add_tx_delay(skb, tp); return skb; @@ -3786,9 +3787,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) tcp_chrono_start(sk, TCP_CHRONO_BUSY); err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - - syn->skb_mstamp_ns = syn_data->skb_mstamp_ns; - + skb_set_tstamp_mono(syn, syn_data->skb_mstamp_ns); /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) * we keep in write queue in case of a retransmit, as we diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 12f985f43bccfb..7ee1c861805974 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -440,7 +440,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, } #endif - skb->tstamp = 0; return dst_output(net, sk, skb); } diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 6aa12d0f54e23c..478183a792d6a9 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -166,8 +166,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) * We need real time here, but we can neither use skb->tstamp * nor __net_timestamp(). * - * skb->tstamp and skb->skb_mstamp_ns overlap, however, they - * use different clock types (real vs monotonic). + * skb->tstamp uses different clock types (real vs monotonic). * * Suppose you have two rules: * 1. match before 13:00 diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index cf2ce581248968..88deb5b41429f5 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -507,7 +507,6 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; - skb->tstamp = 0; vport->ops->send(skb); return; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 2fb76fc0cc31bf..982c2d43bfde8e 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -448,7 +448,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); - if (!skb->tstamp) { + if (!skb_has_tstamp_mono(skb) || !skb->tstamp) { + skb_set_tstamp_mono(skb, 0); fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); } else { /* Check if packet timestamp is too far in the future. diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 41de46b5ffa94a..df5c3dd628b6eb 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -190,7 +190,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) { - skb->tstamp = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; |