From 9807080e2170a9e5a7428d0a0e8d082585b6451f Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 1 Apr 2024 15:36:04 +0800 Subject: trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters Introducing entry_saddr and entry_daddr parameters in this macro for later use can help us record the reverse 4-tuple by analyzing the 4-tuple of the incoming skb when receiving. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240401073605.37335-2-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- include/trace/events/net_probe_common.h | 20 +++++++++++--------- include/trace/events/tcp.h | 2 +- include/trace/events/udp.h | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/trace/events/net_probe_common.h b/include/trace/events/net_probe_common.h index 5e33f91bdea373..976a58364bff15 100644 --- a/include/trace/events/net_probe_common.h +++ b/include/trace/events/net_probe_common.h @@ -70,14 +70,14 @@ TP_STORE_V4MAPPED(__entry, saddr, daddr) #endif -#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb, protoh) \ +#define TP_STORE_ADDR_PORTS_SKB_V4(skb, protoh, entry_saddr, entry_daddr) \ do { \ - struct sockaddr_in *v4 = (void *)__entry->saddr; \ + struct sockaddr_in *v4 = (void *)entry_saddr; \ \ v4->sin_family = AF_INET; \ v4->sin_port = protoh->source; \ v4->sin_addr.s_addr = ip_hdr(skb)->saddr; \ - v4 = (void *)__entry->daddr; \ + v4 = (void *)entry_daddr; \ v4->sin_family = AF_INET; \ v4->sin_port = protoh->dest; \ v4->sin_addr.s_addr = ip_hdr(skb)->daddr; \ @@ -85,28 +85,30 @@ #if IS_ENABLED(CONFIG_IPV6) -#define TP_STORE_ADDR_PORTS_SKB(__entry, skb, protoh) \ +#define TP_STORE_ADDR_PORTS_SKB(skb, protoh, entry_saddr, entry_daddr) \ do { \ const struct iphdr *iph = ip_hdr(skb); \ \ if (iph->version == 6) { \ - struct sockaddr_in6 *v6 = (void *)__entry->saddr; \ + struct sockaddr_in6 *v6 = (void *)entry_saddr; \ \ v6->sin6_family = AF_INET6; \ v6->sin6_port = protoh->source; \ v6->sin6_addr = ipv6_hdr(skb)->saddr; \ - v6 = (void *)__entry->daddr; \ + v6 = (void *)entry_daddr; \ v6->sin6_family = AF_INET6; \ v6->sin6_port = protoh->dest; \ v6->sin6_addr = ipv6_hdr(skb)->daddr; \ } else \ - TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb, protoh); \ + TP_STORE_ADDR_PORTS_SKB_V4(skb, protoh, \ + entry_saddr, \ + entry_daddr); \ } while (0) #else -#define TP_STORE_ADDR_PORTS_SKB(__entry, skb, protoh) \ - TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb, protoh) +#define TP_STORE_ADDR_PORTS_SKB(skb, protoh, entry_saddr, entry_daddr) \ + TP_STORE_ADDR_PORTS_SKB_V4(skb, protoh, entry_saddr, entry_daddr) #endif diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1db95175c1e528..cf14b6fcbeed7f 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -295,7 +295,7 @@ DECLARE_EVENT_CLASS(tcp_event_skb, memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); - TP_STORE_ADDR_PORTS_SKB(__entry, skb, th); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); ), TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h index 62bebe2a6ecebd..6142be4068e29b 100644 --- a/include/trace/events/udp.h +++ b/include/trace/events/udp.h @@ -38,7 +38,7 @@ TRACE_EVENT(udp_fail_queue_rcv_skb, memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); - TP_STORE_ADDR_PORTS_SKB(__entry, skb, uh); + TP_STORE_ADDR_PORTS_SKB(skb, uh, __entry->saddr, __entry->daddr); ), TP_printk("rc=%d family=%s src=%pISpc dest=%pISpc", __entry->rc, -- cgit 1.2.3-korg From 19822a980e1956a6572998887a7df5a0607a32f6 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 1 Apr 2024 15:36:05 +0800 Subject: trace: tcp: fully support trace_tcp_send_reset Prior to this patch, what we can see by enabling trace_tcp_send is only happening under two circumstances: 1) active rst mode 2) non-active rst mode and based on the full socket That means the inconsistency occurs if we use tcpdump and trace simultaneously to see how rst happens. It's necessary that we should take into other cases into considerations, say: 1) time-wait socket 2) no socket ... By parsing the incoming skb and reversing its 4-tuple can we know the exact 'flow' which might not exist. Samples after applied this patch: 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port state=TCP_ESTABLISHED 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port state=UNKNOWN Note: 1) UNKNOWN means we cannot extract the right information from skb. 2) skbaddr/skaddr could be 0 Signed-off-by: Jason Xing Link: https://lore.kernel.org/r/20240401073605.37335-3-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 40 ++++++++++++++++++++++++++++++++++++++-- net/ipv4/tcp_ipv4.c | 7 +++---- net/ipv6/tcp_ipv6.c | 3 ++- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index cf14b6fcbeed7f..5c04a61a11c2c8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -78,11 +78,47 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, +TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), - TP_ARGS(sk, skb) + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->skaddr = sk; + /* Zero means unknown state. */ + __entry->state = sk ? sk->sk_state : 0; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + if (sk && sk_fullsock(sk)) { + const struct inet_sock *inet = inet_sk(sk); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + } else if (skb) { + const struct tcphdr *th = (const struct tcphdr *)skb->data; + /* + * We should reverse the 4-tuple of skb, so later + * it can print the right flow direction of rst. + */ + TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr); + } + ), + + TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s", + __entry->skbaddr, __entry->skaddr, + __entry->saddr, __entry->daddr, + __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN") ); /* diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1e0a9762f92e60..56b75efcfd12bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -866,11 +866,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. */ - if (sk) { + if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; - if (sk_fullsock(sk)) - trace_tcp_send_reset(sk, skb); - } + + trace_tcp_send_reset(sk, skb); BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5ae74f661d25f9..cffebaec66f1ab 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1113,7 +1113,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { - trace_tcp_send_reset(sk, skb); if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = READ_ONCE(sk->sk_priority); @@ -1129,6 +1128,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } + trace_tcp_send_reset(sk, skb); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, ipv6_get_dsfield(ipv6h), label, priority, txhash, &key); -- cgit 1.2.3-korg