aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2024-04-26 15:34:04 +0200
committerPaolo Abeni <pabeni@redhat.com>2024-04-26 15:34:05 +0200
commitd5115a55ffb5253743346ddf628a890417e2935e (patch)
tree3441c70fb71b9bb73af14f90af4c1ece881d4d23
parent15fd021bc4270273d8f4b7f58fdda8a16214a377 (diff)
parentb533fb9cf4f7c6ca2aa255a5a1fdcde49fff2b24 (diff)
downloadnet-next-d5115a55ffb5253743346ddf628a890417e2935e.tar.gz
Merge branch 'implement-reset-reason-mechanism-to-detect'
Jason Xing says: ==================== Implement reset reason mechanism to detect From: Jason Xing <kernelxing@tencent.com> In production, there are so many cases about why the RST skb is sent but we don't have a very convenient/fast method to detect the exact underlying reasons. RST is implemented in two kinds: passive kind (like tcp_v4_send_reset()) and active kind (like tcp_send_active_reset()). The former can be traced carefully 1) in TCP, with the help of drop reasons, which is based on Eric's idea[1], 2) in MPTCP, with the help of reset options defined in RFC 8684. The latter is relatively independent, which should be implemented on our own, such as active reset reasons which can not be replace by skb drop reason or something like this. In this series, I focus on the fundamental implement mostly about how the rstreason mechanism works and give the detailed passive part as an example, not including the active reset part. In future, we can go further and refine those NOT_SPECIFIED reasons. Here are some examples when tracing: <idle>-0 [002] ..s1. 1830.262425: tcp_send_reset: skbaddr=x skaddr=x src=x dest=x state=x reason=NOT_SPECIFIED <idle>-0 [002] ..s1. 1830.262425: tcp_send_reset: skbaddr=x skaddr=x src=x dest=x state=x reason=NO_SOCKET [1] Link: https://lore.kernel.org/all/CANn89iJw8x-LqgsWOeJQQvgVg6DnL5aBRLi10QN2WBdr+X4k=w@mail.gmail.com/ ==================== Link: https://lore.kernel.org/r/20240425031340.46946-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/net/request_sock.h4
-rw-r--r--include/net/rstreason.h121
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/trace/events/tcp.h26
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_ipv4.c17
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_timer.c9
-rw-r--r--net/ipv6/tcp_ipv6.c20
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/mptcp/protocol.h38
-rw-r--r--net/mptcp/subflow.c27
16 files changed, 266 insertions, 47 deletions
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 004e651e6067e7..bdc737832da66a 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -18,6 +18,7 @@
#include <linux/refcount.h>
#include <net/sock.h>
+#include <net/rstreason.h>
struct request_sock;
struct sk_buff;
@@ -34,7 +35,8 @@ struct request_sock_ops {
void (*send_ack)(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
void (*send_reset)(const struct sock *sk,
- struct sk_buff *skb);
+ struct sk_buff *skb,
+ enum sk_rst_reason reason);
void (*destructor)(struct request_sock *req);
void (*syn_ack_timeout)(const struct request_sock *req);
};
diff --git a/include/net/rstreason.h b/include/net/rstreason.h
new file mode 100644
index 00000000000000..df3b6ac0c9b3fe
--- /dev/null
+++ b/include/net/rstreason.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_RSTREASON_H
+#define _LINUX_RSTREASON_H
+#include <net/dropreason-core.h>
+#include <uapi/linux/mptcp.h>
+
+#define DEFINE_RST_REASON(FN, FNe) \
+ FN(NOT_SPECIFIED) \
+ FN(NO_SOCKET) \
+ FN(MPTCP_RST_EUNSPEC) \
+ FN(MPTCP_RST_EMPTCP) \
+ FN(MPTCP_RST_ERESOURCE) \
+ FN(MPTCP_RST_EPROHIBIT) \
+ FN(MPTCP_RST_EWQ2BIG) \
+ FN(MPTCP_RST_EBADPERF) \
+ FN(MPTCP_RST_EMIDDLEBOX) \
+ FN(ERROR) \
+ FNe(MAX)
+
+/**
+ * enum sk_rst_reason - the reasons of socket reset
+ *
+ * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols.
+ *
+ * There are three parts in order:
+ * 1) skb drop reasons: relying on drop reasons for such as passive reset
+ * 2) independent reset reasons: such as active reset reasons
+ * 3) reset reasons in MPTCP: only for MPTCP use
+ */
+enum sk_rst_reason {
+ /* Refer to include/net/dropreason-core.h
+ * Rely on skb drop reasons because it indicates exactly why RST
+ * could happen.
+ */
+ /** @SK_RST_REASON_NOT_SPECIFIED: reset reason is not specified */
+ SK_RST_REASON_NOT_SPECIFIED,
+ /** @SK_RST_REASON_NO_SOCKET: no valid socket that can be used */
+ SK_RST_REASON_NO_SOCKET,
+
+ /* Copy from include/uapi/linux/mptcp.h.
+ * These reset fields will not be changed since they adhere to
+ * RFC 8684. So do not touch them. I'm going to list each definition
+ * of them respectively.
+ */
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EUNSPEC: Unspecified error.
+ * This is the default error; it implies that the subflow is no
+ * longer available. The presence of this option shows that the
+ * RST was generated by an MPTCP-aware device.
+ */
+ SK_RST_REASON_MPTCP_RST_EUNSPEC,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EMPTCP: MPTCP-specific error.
+ * An error has been detected in the processing of MPTCP options.
+ * This is the usual reason code to return in the cases where a RST
+ * is being sent to close a subflow because of an invalid response.
+ */
+ SK_RST_REASON_MPTCP_RST_EMPTCP,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_ERESOURCE: Lack of resources.
+ * This code indicates that the sending host does not have enough
+ * resources to support the terminated subflow.
+ */
+ SK_RST_REASON_MPTCP_RST_ERESOURCE,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EPROHIBIT: Administratively prohibited.
+ * This code indicates that the requested subflow is prohibited by
+ * the policies of the sending host.
+ */
+ SK_RST_REASON_MPTCP_RST_EPROHIBIT,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EWQ2BIG: Too much outstanding data.
+ * This code indicates that there is an excessive amount of data
+ * that needs to be transmitted over the terminated subflow while
+ * having already been acknowledged over one or more other subflows.
+ * This may occur if a path has been unavailable for a short period
+ * and it is more efficient to reset and start again than it is to
+ * retransmit the queued data.
+ */
+ SK_RST_REASON_MPTCP_RST_EWQ2BIG,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EBADPERF: Unacceptable performance.
+ * This code indicates that the performance of this subflow was
+ * too low compared to the other subflows of this Multipath TCP
+ * connection.
+ */
+ SK_RST_REASON_MPTCP_RST_EBADPERF,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EMIDDLEBOX: Middlebox interference.
+ * Middlebox interference has been detected over this subflow,
+ * making MPTCP signaling invalid. For example, this may be sent
+ * if the checksum does not validate.
+ */
+ SK_RST_REASON_MPTCP_RST_EMIDDLEBOX,
+
+ /** @SK_RST_REASON_ERROR: unexpected error happens */
+ SK_RST_REASON_ERROR,
+
+ /**
+ * @SK_RST_REASON_MAX: Maximum of socket reset reasons.
+ * It shouldn't be used as a real 'reason'.
+ */
+ SK_RST_REASON_MAX,
+};
+
+/* Convert skb drop reasons to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_drop_reason(enum skb_drop_reason reason)
+{
+ switch (reason) {
+ case SKB_DROP_REASON_NOT_SPECIFIED:
+ return SK_RST_REASON_NOT_SPECIFIED;
+ case SKB_DROP_REASON_NO_SOCKET:
+ return SK_RST_REASON_NO_SOCKET;
+ default:
+ /* If we don't have our own corresponding reason */
+ return SK_RST_REASON_NOT_SPECIFIED;
+ }
+}
+#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ffc9371fe9dea5..a9eb21251195c3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -670,7 +670,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
void tcp_send_probe0(struct sock *);
int tcp_write_wakeup(struct sock *, int mib);
void tcp_send_fin(struct sock *sk);
-void tcp_send_active_reset(struct sock *sk, gfp_t priority);
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 5c04a61a11c2c8..49b5ee091cf6a9 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -11,6 +11,7 @@
#include <net/ipv6.h>
#include <net/tcp.h>
#include <linux/sock_diag.h>
+#include <net/rstreason.h>
/*
* tcp event with arguments sk and skb
@@ -74,20 +75,32 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
TP_ARGS(sk, skb)
);
+#undef FN
+#define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason);
+DEFINE_RST_REASON(FN, FN)
+
+#undef FN
+#undef FNe
+#define FN(reason) { SK_RST_REASON_##reason, #reason },
+#define FNe(reason) { SK_RST_REASON_##reason, #reason }
+
/*
* skb of trace_tcp_send_reset is the skb that caused RST. In case of
* active reset, skb should be NULL
*/
TRACE_EVENT(tcp_send_reset,
- TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+ TP_PROTO(const struct sock *sk,
+ const struct sk_buff *skb,
+ const enum sk_rst_reason reason),
- TP_ARGS(sk, skb),
+ TP_ARGS(sk, skb, reason),
TP_STRUCT__entry(
__field(const void *, skbaddr)
__field(const void *, skaddr)
__field(int, state)
+ __field(enum sk_rst_reason, reason)
__array(__u8, saddr, sizeof(struct sockaddr_in6))
__array(__u8, daddr, sizeof(struct sockaddr_in6))
),
@@ -113,14 +126,19 @@ TRACE_EVENT(tcp_send_reset,
*/
TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr);
}
+ __entry->reason = reason;
),
- TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
+ TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s",
__entry->skbaddr, __entry->skaddr,
__entry->saddr, __entry->daddr,
- __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
+ __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN",
+ __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe)))
);
+#undef FN
+#undef FNe
+
/*
* tcp event with arguments sk
*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9fc9cea4c251bf..ff41bd6f99c31c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -24,6 +24,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/netns/generic.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -521,7 +522,8 @@ out:
return err;
}
-static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
int err;
const struct iphdr *rxiph;
@@ -706,7 +708,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
kfree_skb(skb);
return 0;
}
@@ -869,7 +871,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -909,7 +911,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c8ca703dc331a1..85f4b8fdbe5e08 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
#include <net/secure_seq.h>
#include <net/netns/generic.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include "dccp.h"
#include "ipv6.h"
@@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
const struct ipv6hdr *rxip6h;
struct sk_buff *skb;
@@ -656,7 +658,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
discard:
if (opt_skb != NULL)
__kfree_skb(opt_skb);
@@ -762,7 +764,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -801,7 +803,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 64d805b27addea..251a57cf58223b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -15,6 +15,7 @@
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -202,7 +203,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
inet_csk_reqsk_queue_drop(sk, req);
out:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f23b97777ea5e9..4ec0f4feee003d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -275,6 +275,7 @@
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -2811,7 +2812,8 @@ void __tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, sk->sk_allocation);
+ tcp_send_active_reset(sk, sk->sk_allocation,
+ SK_RST_REASON_NOT_SPECIFIED);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2885,7 +2887,8 @@ adjudge_to_death:
struct tcp_sock *tp = tcp_sk(sk);
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2903,7 +2906,8 @@ adjudge_to_death:
if (sk->sk_state != TCP_CLOSE) {
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3007,7 +3011,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any());
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -4564,7 +4568,8 @@ int tcp_abort(struct sock *sk, int err)
smp_wmb();
sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e06f0cd04f7eee..0427deca3e0eb9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -723,7 +724,8 @@ out:
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -869,7 +871,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
- trace_tcp_send_reset(sk, skb);
+ trace_tcp_send_reset(sk, skb, reason);
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1934,7 +1936,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v4_send_reset(rsk, skb);
+ tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
discard:
kfree_skb_reason(skb, reason);
/* Be careful here. If this function gets more complicated and
@@ -2285,7 +2287,10 @@ lookup:
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
- tcp_v4_send_reset(nsk, skb);
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v4_send_reset(nsk, skb, rst_reason);
goto discard_and_relse;
}
sock_put(sk);
@@ -2364,7 +2369,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v4_send_reset(NULL, skb);
+ tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -2416,7 +2421,7 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v4_send_reset(sk, skb);
+ tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(drop_reason));
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 146c061145b460..7d543569a18099 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,6 +22,7 @@
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
@@ -878,7 +879,7 @@ embryonic_reset:
* avoid becoming vulnerable to outside attack aiming at
* resetting legit local connections.
*/
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
} else if (fastopen) { /* received a valid RST pkt */
reqsk_fastopen_remove(sk, req, true);
tcp_reset(sk, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ce59e4499b66df..ece4726c875100 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3615,7 +3615,8 @@ void tcp_send_fin(struct sock *sk)
* was unread data in the receive queue. This behavior is recommended
* by RFC 2525, section 2.17. -DaveM
*/
-void tcp_send_active_reset(struct sock *sk, gfp_t priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason)
{
struct sk_buff *skb;
@@ -3640,7 +3641,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL);
+ trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
}
/* Send a crossed SYN-ACK during socket establishment.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 976db57b95d401..83fe7f62f7f10a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <net/tcp.h>
+#include <net/rstreason.h>
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
@@ -127,7 +128,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
(!tp->snd_wnd && !tp->packets_out))
do_reset = true;
if (do_reset)
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -768,7 +770,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
goto death;
}
@@ -795,7 +797,8 @@ static void tcp_keepalive_timer (struct timer_list *t)
icsk->icsk_probes_out > 0) ||
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_write_err(sk);
goto out;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb7c3caf4f8536..77958adf2e165c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -60,6 +60,7 @@
#include <net/secure_seq.h>
#include <net/hotdata.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -69,7 +70,8 @@
#include <trace/events/tcp.h>
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -1008,7 +1010,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
kfree_skb(buff);
}
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -1130,7 +1133,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
- trace_tcp_send_reset(sk, skb);
+ trace_tcp_send_reset(sk, skb, reason);
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
ipv6_get_dsfield(ipv6h), label, priority, txhash,
@@ -1677,7 +1680,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
discard:
if (opt_skb)
__kfree_skb(opt_skb);
@@ -1862,7 +1865,10 @@ lookup:
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
- tcp_v6_send_reset(nsk, skb);
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v6_send_reset(nsk, skb, rst_reason);
goto discard_and_relse;
}
sock_put(sk);
@@ -1939,7 +1945,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v6_send_reset(NULL, skb);
+ tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -1995,7 +2001,7 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(drop_reason));
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f8bc34f0d973eb..4b13ca362efa39 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2569,7 +2569,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
- tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(tcp_sk);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
unlock_sock_fast(tcp_sk, slow);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index fdfa843e2d88f3..cfc5f9c3f11348 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -12,6 +12,7 @@
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
#include <net/genetlink.h>
+#include <net/rstreason.h>
#include "mptcp_pm_gen.h"
@@ -581,6 +582,43 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
WRITE_ONCE(subflow->local_id, -1);
}
+/* Convert reset reasons in MPTCP to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_mptcp_reason(u32 reason)
+{
+ switch (reason) {
+ case MPTCP_RST_EUNSPEC:
+ return SK_RST_REASON_MPTCP_RST_EUNSPEC;
+ case MPTCP_RST_EMPTCP:
+ return SK_RST_REASON_MPTCP_RST_EMPTCP;
+ case MPTCP_RST_ERESOURCE:
+ return SK_RST_REASON_MPTCP_RST_ERESOURCE;
+ case MPTCP_RST_EPROHIBIT:
+ return SK_RST_REASON_MPTCP_RST_EPROHIBIT;
+ case MPTCP_RST_EWQ2BIG:
+ return SK_RST_REASON_MPTCP_RST_EWQ2BIG;
+ case MPTCP_RST_EBADPERF:
+ return SK_RST_REASON_MPTCP_RST_EBADPERF;
+ case MPTCP_RST_EMIDDLEBOX:
+ return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX;
+ default:
+ /* It should not happen, or else errors may occur
+ * in MPTCP layer
+ */
+ return SK_RST_REASON_ERROR;
+ }
+}
+
+static inline void
+mptcp_send_active_reset_reason(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ enum sk_rst_reason reason;
+
+ reason = sk_rst_convert_mptcp_reason(subflow->reset_reason);
+ tcp_send_active_reset(sk, GFP_ATOMIC, reason);
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b94d1dca1094f4..97ec44d1df308f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -20,6 +20,7 @@
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+
#include "protocol.h"
#include "mib.h"
@@ -307,8 +308,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
return dst;
dst_release(dst);
- if (!req->syncookie)
- tcp_request_sock_ops.send_reset(sk, skb);
+ if (!req->syncookie) {
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ enum sk_rst_reason reason;
+
+ reason = sk_rst_convert_mptcp_reason(mpext->reset_reason);
+ tcp_request_sock_ops.send_reset(sk, skb, reason);
+ }
return NULL;
}
@@ -375,8 +381,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
return dst;
dst_release(dst);
- if (!req->syncookie)
- tcp6_request_sock_ops.send_reset(sk, skb);
+ if (!req->syncookie) {
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ enum sk_rst_reason reason;
+
+ reason = sk_rst_convert_mptcp_reason(mpext->reset_reason);
+ tcp6_request_sock_ops.send_reset(sk, skb, reason);
+ }
return NULL;
}
#endif
@@ -412,7 +423,7 @@ void mptcp_subflow_reset(struct sock *ssk)
/* must hold: tcp_done() could drop last reference on parent */
sock_hold(sk);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
mptcp_schedule_work(sk);
@@ -781,6 +792,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
+ enum sk_rst_reason reason;
struct mptcp_sock *owner;
struct sock *child;
@@ -911,7 +923,8 @@ dispose_child:
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
- req->rsk_ops->send_reset(sk, skb);
+ reason = sk_rst_convert_mptcp_reason(mptcp_get_ext(skb)->reset_reason);
+ req->rsk_ops->send_reset(sk, skb, reason);
/* The last child reference will be released by the caller */
return child;
@@ -1348,7 +1361,7 @@ reset:
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
WRITE_ONCE(subflow->data_avail, false);
return false;
}