aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-10-05 02:46:26 -0700
committerDavid S. Miller <davem@davemloft.net>2015-10-05 02:46:26 -0700
commit2472186f58ee1e4b9ca194245fef03931f6de90a (patch)
tree75c793a05dd64db3df05f804e429bae45ead20d1
parent3e087caa23ef36370bfb925d3bbca78e8302d3ce (diff)
parenta1a5344ddbe8fd3e080013b317ac9a664490cfdf (diff)
downloadnet-next-2472186f58ee1e4b9ca194245fef03931f6de90a.tar.gz
Merge branch 'tcp-listener-fixes-and-improvement'
Eric Dumazet says: ==================== tcp: lockless listener fixes and improvement This fixes issues with TCP FastOpen vs lockless listeners, and SYNACK being attached to request sockets. Then, last patch brings performance improvement for syncookies generation and validation. Tested under a 4.3 Mpps SYNFLOOD attack, new perf profile looks like : 12.11% [kernel] [k] sha_transform 5.83% [kernel] [k] tcp_conn_request 4.59% [kernel] [k] __inet_lookup_listener 4.11% [kernel] [k] ipt_do_table 3.91% [kernel] [k] tcp_make_synack 3.05% [kernel] [k] fib_table_lookup 2.74% [kernel] [k] sock_wfree 2.66% [kernel] [k] memcpy_erms 2.12% [kernel] [k] tcp_v4_rcv ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/ipv6.h4
-rw-r--r--include/net/inet_sock.h3
-rw-r--r--include/net/ip.h9
-rw-r--r--include/net/request_sock.h12
-rw-r--r--net/core/dev.c1
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp_fastopen.c26
-rw-r--r--net/ipv4/tcp_input.c14
-rw-r--r--net/ipv6/syncookies.c2
11 files changed, 41 insertions, 36 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f1f32af6d9b96c..0ef2a97ccdb50b 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -264,9 +264,9 @@ struct tcp6_timewait_sock {
};
#if IS_ENABLED(CONFIG_IPV6)
-static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
+static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
{
- return inet_sk(__sk)->pinet6;
+ return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL;
}
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 47eb67b08abdf2..f5bf7310e33434 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -245,7 +245,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
- struct sock *sk_listener);
+ struct sock *sk_listener,
+ bool attach_listener);
static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
diff --git a/include/net/ip.h b/include/net/ip.h
index 91a6b2c88341b8..aa7811993276fb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -323,12 +323,15 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
{
- if (!skb->sk || ip_sk_use_pmtu(skb->sk)) {
+ struct sock *sk = skb->sk;
+
+ if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
+
return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
- } else {
- return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
}
+
+ return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
}
u32 ip_idents_reserve(u32 hash, int segs);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index dd423d84085247..95ab5d7aab96f9 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -80,7 +80,8 @@ static inline struct sock *req_to_sk(struct request_sock *req)
}
static inline struct request_sock *
-reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
+reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
+ bool attach_listener)
{
struct request_sock *req;
@@ -88,10 +89,15 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
if (req) {
req->rsk_ops = ops;
- sock_hold(sk_listener);
- req->rsk_listener = sk_listener;
+ if (attach_listener) {
+ sock_hold(sk_listener);
+ req->rsk_listener = sk_listener;
+ } else {
+ req->rsk_listener = NULL;
+ }
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
sk_node_init(&req_to_sk(req)->sk_node);
+ sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
/* Following is temporary. It is coupled with debugging
* helpers in reqsk_put() & reqsk_free()
diff --git a/net/core/dev.c b/net/core/dev.c
index 323c04edd779af..a229bf0d649dc9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2974,6 +2974,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk &&
+ sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
sk_tx_queue_set(sk, new_index);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8910c95677194f..8e99681c8189d8 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
if (req == NULL)
goto drop;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1361a3f45df763..aed314f8c7c60b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -319,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
if (req == NULL)
goto drop;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 729ceb5f63c69d..8113c30ccf9641 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -326,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
+ req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
if (!req)
goto out;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 410ac481fda03c..93396bf7b475f3 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -168,8 +168,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
atomic_set(&req->rsk_refcnt, 2);
- /* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, child);
/* Now finish processing the fastopen child socket. */
inet_csk(child)->icsk_af_ops->rebuild_header(child);
@@ -178,12 +176,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
tcp_init_metrics(child);
tcp_init_buffer_space(child);
- /* Queue the data carried in the SYN packet. We need to first
- * bump skb's refcnt because the caller will attempt to free it.
- * Note that IPv6 might also have used skb_get() trick
- * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts)
- * So we need to eventually get a clone of the packet,
- * before inserting it in sk_receive_queue.
+ /* Queue the data carried in the SYN packet.
+ * We used to play tricky games with skb_get().
+ * With lockless listener, it is a dead end.
+ * Do not think about it.
*
* XXX (TFO) - we honor a zero-payload TFO request for now,
* (any reason not to?) but no need to queue the skb since
@@ -191,12 +187,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
*/
end_seq = TCP_SKB_CB(skb)->end_seq;
if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
- struct sk_buff *skb2;
-
- if (unlikely(skb_shared(skb)))
- skb2 = skb_clone(skb, GFP_ATOMIC);
- else
- skb2 = skb_get(skb);
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (likely(skb2)) {
skb_dst_drop(skb2);
@@ -214,12 +205,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
}
}
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
- sk->sk_data_ready(sk);
- bh_unlock_sock(child);
- /* Note: sock_put(child) will be done by tcp_conn_request()
- * after SYNACK packet is sent.
+ /* tcp_conn_request() is sending the SYNACK,
+ * and queues the child into listener accept queue.
*/
- WARN_ON(!req->sk);
return child;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 27108757c310db..ddadb318e8507f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6042,9 +6042,11 @@ static void tcp_openreq_init(struct request_sock *req,
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
- struct sock *sk_listener)
+ struct sock *sk_listener,
+ bool attach_listener)
{
- struct request_sock *req = reqsk_alloc(ops, sk_listener);
+ struct request_sock *req = reqsk_alloc(ops, sk_listener,
+ attach_listener);
if (req) {
struct inet_request_sock *ireq = inet_rsk(req);
@@ -6143,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
- req = inet_reqsk_alloc(rsk_ops, sk);
+ req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
if (!req)
goto drop;
@@ -6229,12 +6231,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_openreq_init_rwin(req, sk, dst);
if (!want_cookie) {
- fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
tcp_reqsk_record_syn(sk, req, skb);
+ fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
skb_get_queue_mapping(skb), &foc, false);
+ /* Add the child socket directly into the accept queue */
+ inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+ sk->sk_data_ready(sk);
+ bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk);
} else {
tcp_rsk(req)->tfo_listener = false;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7606eba83e7b36..f610b5310b1779 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
if (!req)
goto out;