aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/snmp_counter.rst240
-rw-r--r--drivers/isdn/capi/kcapi.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c7
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c6
-rw-r--r--drivers/net/hamradio/6pack.c16
-rw-r--r--drivers/net/tap.c3
-rw-r--r--include/linux/ptr_ring.h2
-rw-r--r--include/net/ip_tunnels.h20
-rw-r--r--include/net/sock.h38
-rw-r--r--net/compat.c15
-rw-r--r--net/core/sock.c15
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/ip_vti.c12
-rw-r--r--net/ipv6/ip6_gre.c10
-rw-r--r--net/ipv6/ip6_tunnel.c10
-rw-r--r--net/ipv6/ip6_vti.c8
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/sunrpc/svcsock.c2
22 files changed, 383 insertions, 69 deletions
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index f8eb77ddbd4403..b0dfdaaca512b3 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -571,7 +571,97 @@ duplicate packet is received.
* TcpExtTCPDSACKOfoRecv
The TCP stack receives a DSACK, which indicate an out of order
-duplciate packet is received.
+duplicate packet is received.
+
+TCP out of order
+===============
+* TcpExtTCPOFOQueue
+The TCP layer receives an out of order packet and has enough memory
+to queue it.
+
+* TcpExtTCPOFODrop
+The TCP layer receives an out of order packet but doesn't have enough
+memory, so drops it. Such packets won't be counted into
+TcpExtTCPOFOQueue.
+
+* TcpExtTCPOFOMerge
+The received out of order packet has an overlay with the previous
+packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge
+packets will also be counted into TcpExtTCPOFOQueue.
+
+TCP PAWS
+=======
+PAWS (Protection Against Wrapped Sequence numbers) is an algorithm
+which is used to drop old packets. It depends on the TCP
+timestamps. For detail information, please refer the `timestamp wiki`_
+and the `RFC of PAWS`_.
+
+.. _RFC of PAWS: https://tools.ietf.org/html/rfc1323#page-17
+.. _timestamp wiki: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_timestamps
+
+* TcpExtPAWSActive
+Packets are dropped by PAWS in Syn-Sent status.
+
+* TcpExtPAWSEstab
+Packets are dropped by PAWS in any status other than Syn-Sent.
+
+TCP ACK skip
+===========
+In some scenarios, kernel would avoid sending duplicate ACKs too
+frequently. Please find more details in the tcp_invalid_ratelimit
+section of the `sysctl document`_. When kernel decides to skip an ACK
+due to tcp_invalid_ratelimit, kernel would update one of below
+counters to indicate the ACK is skipped in which scenario. The ACK
+would only be skipped if the received packet is either a SYN packet or
+it has no data.
+
+.. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt
+
+* TcpExtTCPACKSkippedSynRecv
+The ACK is skipped in Syn-Recv status. The Syn-Recv status means the
+TCP stack receives a SYN and replies SYN+ACK. Now the TCP stack is
+waiting for an ACK. Generally, the TCP stack doesn't need to send ACK
+in the Syn-Recv status. But in several scenarios, the TCP stack need
+to send an ACK. E.g., the TCP stack receives the same SYN packet
+repeately, the received packet does not pass the PAWS check, or the
+received packet sequence number is out of window. In these scenarios,
+the TCP stack needs to send ACK. If the ACk sending frequency is higher than
+tcp_invalid_ratelimit allows, the TCP stack will skip sending ACK and
+increase TcpExtTCPACKSkippedSynRecv.
+
+
+* TcpExtTCPACKSkippedPAWS
+The ACK is skipped due to PAWS (Protect Against Wrapped Sequence
+numbers) check fails. If the PAWS check fails in Syn-Recv, Fin-Wait-2
+or Time-Wait statuses, the skipped ACK would be counted to
+TcpExtTCPACKSkippedSynRecv, TcpExtTCPACKSkippedFinWait2 or
+TcpExtTCPACKSkippedTimeWait. In all other statuses, the skipped ACK
+would be counted to TcpExtTCPACKSkippedPAWS.
+
+* TcpExtTCPACKSkippedSeq
+The sequence number is out of window and the timestamp passes the PAWS
+check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait.
+
+* TcpExtTCPACKSkippedFinWait2
+The ACK is skipped in Fin-Wait-2 status, the reason would be either
+PAWS check fails or the received sequence number is out of window.
+
+* TcpExtTCPACKSkippedTimeWait
+Tha ACK is skipped in Time-Wait status, the reason would be either
+PAWS check failed or the received sequence number is out of window.
+
+* TcpExtTCPACKSkippedChallenge
+The ACK is skipped if the ACK is a challenge ACK. The RFC 5961 defines
+3 kind of challenge ACK, please refer `RFC 5961 section 3.2`_,
+`RFC 5961 section 4.2`_ and `RFC 5961 section 5.2`_. Besides these
+three scenarios, In some TCP status, the linux TCP stack would also
+send challenge ACKs if the ACK number is before the first
+unacknowledged number (more strict than `RFC 5961 section 5.2`_).
+
+.. _RFC 5961 section 3.2: https://tools.ietf.org/html/rfc5961#page-7
+.. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9
+.. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11
+
examples
=======
@@ -1188,3 +1278,151 @@ Run nstat on server B::
We have deleted the default route on server B. Server B couldn't find
a route for the 8.8.8.8 IP address, so server B increased
IpOutNoRoutes.
+
+TcpExtTCPACKSkippedSynRecv
+------------------------
+In this test, we send 3 same SYN packets from client to server. The
+first SYN will let server create a socket, set it to Syn-Recv status,
+and reply a SYN/ACK. The second SYN will let server reply the SYN/ACK
+again, and record the reply time (the duplicate ACK reply time). The
+third SYN will let server check the previous duplicate ACK reply time,
+and decide to skip the duplicate ACK, then increase the
+TcpExtTCPACKSkippedSynRecv counter.
+
+Run tcpdump to capture a SYN packet::
+
+ nstatuser@nstat-a:~$ sudo tcpdump -c 1 -w /tmp/syn.pcap port 9000
+ tcpdump: listening on ens3, link-type EN10MB (Ethernet), capture size 262144 bytes
+
+Open another terminal, run nc command::
+
+ nstatuser@nstat-a:~$ nc nstat-b 9000
+
+As the nstat-b didn't listen on port 9000, it should reply a RST, and
+the nc command exited immediately. It was enough for the tcpdump
+command to capture a SYN packet. A linux server might use hardware
+offload for the TCP checksum, so the checksum in the /tmp/syn.pcap
+might be not correct. We call tcprewrite to fix it::
+
+ nstatuser@nstat-a:~$ tcprewrite --infile=/tmp/syn.pcap --outfile=/tmp/syn_fixcsum.pcap --fixcsum
+
+On nstat-b, we run nc to listen on port 9000::
+
+ nstatuser@nstat-b:~$ nc -lkv 9000
+ Listening on [0.0.0.0] (family 0, port 9000)
+
+On nstat-a, we blocked the packet from port 9000, or nstat-a would send
+RST to nstat-b::
+
+ nstatuser@nstat-a:~$ sudo iptables -A INPUT -p tcp --sport 9000 -j DROP
+
+Send 3 SYN repeatly to nstat-b::
+
+ nstatuser@nstat-a:~$ for i in {1..3}; do sudo tcpreplay -i ens3 /tmp/syn_fixcsum.pcap; done
+
+Check snmp cunter on nstat-b::
+
+ nstatuser@nstat-b:~$ nstat | grep -i skip
+ TcpExtTCPACKSkippedSynRecv 1 0.0
+
+As we expected, TcpExtTCPACKSkippedSynRecv is 1.
+
+TcpExtTCPACKSkippedPAWS
+----------------------
+To trigger PAWS, we could send an old SYN.
+
+On nstat-b, let nc listen on port 9000::
+
+ nstatuser@nstat-b:~$ nc -lkv 9000
+ Listening on [0.0.0.0] (family 0, port 9000)
+
+On nstat-a, run tcpdump to capture a SYN::
+
+ nstatuser@nstat-a:~$ sudo tcpdump -w /tmp/paws_pre.pcap -c 1 port 9000
+ tcpdump: listening on ens3, link-type EN10MB (Ethernet), capture size 262144 bytes
+
+On nstat-a, run nc as a client to connect nstat-b::
+
+ nstatuser@nstat-a:~$ nc -v nstat-b 9000
+ Connection to nstat-b 9000 port [tcp/*] succeeded!
+
+Now the tcpdump has captured the SYN and exit. We should fix the
+checksum::
+
+ nstatuser@nstat-a:~$ tcprewrite --infile /tmp/paws_pre.pcap --outfile /tmp/paws.pcap --fixcsum
+
+Send the SYN packet twice::
+
+ nstatuser@nstat-a:~$ for i in {1..2}; do sudo tcpreplay -i ens3 /tmp/paws.pcap; done
+
+On nstat-b, check the snmp counter::
+
+ nstatuser@nstat-b:~$ nstat | grep -i skip
+ TcpExtTCPACKSkippedPAWS 1 0.0
+
+We sent two SYN via tcpreplay, both of them would let PAWS check
+failed, the nstat-b replied an ACK for the first SYN, skipped the ACK
+for the second SYN, and updated TcpExtTCPACKSkippedPAWS.
+
+TcpExtTCPACKSkippedSeq
+--------------------
+To trigger TcpExtTCPACKSkippedSeq, we send packets which have valid
+timestamp (to pass PAWS check) but the sequence number is out of
+window. The linux TCP stack would avoid to skip if the packet has
+data, so we need a pure ACK packet. To generate such a packet, we
+could create two sockets: one on port 9000, another on port 9001. Then
+we capture an ACK on port 9001, change the source/destination port
+numbers to match the port 9000 socket. Then we could trigger
+TcpExtTCPACKSkippedSeq via this packet.
+
+On nstat-b, open two terminals, run two nc commands to listen on both
+port 9000 and port 9001::
+
+ nstatuser@nstat-b:~$ nc -lkv 9000
+ Listening on [0.0.0.0] (family 0, port 9000)
+
+ nstatuser@nstat-b:~$ nc -lkv 9001
+ Listening on [0.0.0.0] (family 0, port 9001)
+
+On nstat-a, run two nc clients::
+
+ nstatuser@nstat-a:~$ nc -v nstat-b 9000
+ Connection to nstat-b 9000 port [tcp/*] succeeded!
+
+ nstatuser@nstat-a:~$ nc -v nstat-b 9001
+ Connection to nstat-b 9001 port [tcp/*] succeeded!
+
+On nstat-a, run tcpdump to capture an ACK::
+
+ nstatuser@nstat-a:~$ sudo tcpdump -w /tmp/seq_pre.pcap -c 1 dst port 9001
+ tcpdump: listening on ens3, link-type EN10MB (Ethernet), capture size 262144 bytes
+
+On nstat-b, send a packet via the port 9001 socket. E.g. we sent a
+string 'foo' in our example::
+
+ nstatuser@nstat-b:~$ nc -lkv 9001
+ Listening on [0.0.0.0] (family 0, port 9001)
+ Connection from nstat-a 42132 received!
+ foo
+
+On nstat-a, the tcpdump should have caputred the ACK. We should check
+the source port numbers of the two nc clients::
+
+ nstatuser@nstat-a:~$ ss -ta '( dport = :9000 || dport = :9001 )' | tee
+ State Recv-Q Send-Q Local Address:Port Peer Address:Port
+ ESTAB 0 0 192.168.122.250:50208 192.168.122.251:9000
+ ESTAB 0 0 192.168.122.250:42132 192.168.122.251:9001
+
+Run tcprewrite, change port 9001 to port 9000, chagne port 42132 to
+port 50208::
+
+ nstatuser@nstat-a:~$ tcprewrite --infile /tmp/seq_pre.pcap --outfile /tmp/seq.pcap -r 9001:9000 -r 42132:50208 --fixcsum
+
+Now the /tmp/seq.pcap is the packet we need. Send it to nstat-b::
+
+ nstatuser@nstat-a:~$ for i in {1..2}; do sudo tcpreplay -i ens3 /tmp/seq.pcap; done
+
+Check TcpExtTCPACKSkippedSeq on nstat-b::
+
+ nstatuser@nstat-b:~$ nstat | grep -i skip
+ TcpExtTCPACKSkippedSeq 1 0.0
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 0ff517d3c98f98..a4ceb61c5b6035 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -852,7 +852,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf)
u16 ret;
if (contr == 0) {
- strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
+ strncpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
return CAPI_NOERROR;
}
@@ -860,7 +860,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf)
ctr = get_capi_ctr_by_nr(contr);
if (ctr && ctr->state == CAPI_CTR_RUNNING) {
- strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
+ strncpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
ret = CAPI_NOERROR;
} else
ret = CAPI_REGNOTINSTALLED;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index d3b9aaf96c1c30..07cd58798083cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3995,17 +3995,18 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
int ret = 0;
+ clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
+
if (netif_running(kinfo->netdev)) {
- ret = hns3_nic_net_up(kinfo->netdev);
+ ret = hns3_nic_net_open(kinfo->netdev);
if (ret) {
+ set_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
netdev_err(kinfo->netdev,
"hns net up fail, ret=%d!\n", ret);
return ret;
}
}
- clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
-
return ret;
}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 6d48dc62a44b5d..da323b9e1f62fb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -1106,6 +1106,11 @@ static void hinic_remove(struct pci_dev *pdev)
dev_info(&pdev->dev, "HiNIC driver - removed\n");
}
+static void hinic_shutdown(struct pci_dev *pdev)
+{
+ pci_disable_device(pdev);
+}
+
static const struct pci_device_id hinic_pci_table[] = {
{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE), 0},
{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_25GE), 0},
@@ -1119,6 +1124,7 @@ static struct pci_driver hinic_driver = {
.id_table = hinic_pci_table,
.probe = hinic_probe,
.remove = hinic_remove,
+ .shutdown = hinic_shutdown,
};
module_pci_driver(hinic_driver);
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 28c74998035946..a19868cba48cb0 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -523,10 +523,7 @@ static void resync_tnc(struct timer_list *t)
/* Start resync timer again -- the TNC might be still absent */
-
- del_timer(&sp->resync_t);
- sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT;
- add_timer(&sp->resync_t);
+ mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT);
}
static inline int tnc_init(struct sixpack *sp)
@@ -537,9 +534,7 @@ static inline int tnc_init(struct sixpack *sp)
sp->tty->ops->write(sp->tty, &inbyte, 1);
- del_timer(&sp->resync_t);
- sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT;
- add_timer(&sp->resync_t);
+ mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT);
return 0;
}
@@ -897,11 +892,8 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd)
/* if the state byte has been received, the TNC is present,
so the resync timer can be reset. */
- if (sp->tnc_state == TNC_IN_SYNC) {
- del_timer(&sp->resync_t);
- sp->resync_t.expires = jiffies + SIXP_INIT_RESYNC_TIMEOUT;
- add_timer(&sp->resync_t);
- }
+ if (sp->tnc_state == TNC_IN_SYNC)
+ mod_timer(&sp->resync_t, jiffies + SIXP_INIT_RESYNC_TIMEOUT);
sp->status1 = cmd & SIXP_PRIO_DATA_MASK;
}
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 443b2694130cda..c0b52e48f0e63c 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1177,8 +1177,6 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
goto err_kfree;
}
- skb_probe_transport_header(skb, ETH_HLEN);
-
/* Move network header to the right position for VLAN tagged packets */
if ((skb->protocol == htons(ETH_P_8021Q) ||
skb->protocol == htons(ETH_P_8021AD)) &&
@@ -1189,6 +1187,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
tap = rcu_dereference(q->tap);
if (tap) {
skb->dev = tap->dev;
+ skb_probe_transport_header(skb, ETH_HLEN);
dev_queue_xmit(skb);
} else {
kfree_skb(skb);
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6894976b54e376..186cd8e970c70f 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -573,6 +573,8 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
else if (destroy)
destroy(ptr);
+ if (producer >= size)
+ producer = 0;
__ptr_ring_set_size(r, size);
r->producer = producer;
r->consumer_head = 0;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index cbcf35ce1b1496..34f019650941bd 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -308,6 +308,26 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
int ip_tunnel_encap_setup(struct ip_tunnel *t,
struct ip_tunnel_encap *ipencap);
+static inline bool pskb_inet_may_pull(struct sk_buff *skb)
+{
+ int nhlen;
+
+ switch (skb->protocol) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ nhlen = sizeof(struct ipv6hdr);
+ break;
+#endif
+ case htons(ETH_P_IP):
+ nhlen = sizeof(struct iphdr);
+ break;
+ default:
+ nhlen = 0;
+ }
+
+ return pskb_network_may_pull(skb, nhlen);
+}
+
static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
{
const struct ip_tunnel_encap_ops *ops;
diff --git a/include/net/sock.h b/include/net/sock.h
index a6235c286ef996..2b229f7be8ebbc 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -298,6 +298,7 @@ struct sock_common {
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
+ * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
* @sk_tsflags: SO_TIMESTAMPING socket options
* @sk_tskey: counter to disambiguate concurrent tstamp requests
* @sk_zckey: counter to order MSG_ZEROCOPY notifications
@@ -474,6 +475,9 @@ struct sock {
const struct cred *sk_peer_cred;
long sk_rcvtimeo;
ktime_t sk_stamp;
+#if BITS_PER_LONG==32
+ seqlock_t sk_stamp_seq;
+#endif
u16 sk_tsflags;
u8 sk_shutdown;
u32 sk_tskey;
@@ -2297,6 +2301,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
atomic_add(segs, &sk->sk_drops);
}
+static inline ktime_t sock_read_timestamp(struct sock *sk)
+{
+#if BITS_PER_LONG==32
+ unsigned int seq;
+ ktime_t kt;
+
+ do {
+ seq = read_seqbegin(&sk->sk_stamp_seq);
+ kt = sk->sk_stamp;
+ } while (read_seqretry(&sk->sk_stamp_seq, seq));
+
+ return kt;
+#else
+ return sk->sk_stamp;
+#endif
+}
+
+static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
+{
+#if BITS_PER_LONG==32
+ write_seqlock(&sk->sk_stamp_seq);
+ sk->sk_stamp = kt;
+ write_sequnlock(&sk->sk_stamp_seq);
+#else
+ sk->sk_stamp = kt;
+#endif
+}
+
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
@@ -2321,7 +2353,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb);
else
- sk->sk_stamp = kt;
+ sock_write_timestamp(sk, kt);
if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
__sock_recv_wifi_status(msg, sk, skb);
@@ -2342,9 +2374,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
__sock_recv_ts_and_drops(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
- sk->sk_stamp = skb->tstamp;
+ sock_write_timestamp(sk, skb->tstamp);
else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
- sk->sk_stamp = 0;
+ sock_write_timestamp(sk, 0);
}
void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
diff --git a/net/compat.c b/net/compat.c
index 47a614b370cd3e..d1f3a8a0b3efe7 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
ctv = (struct compat_timeval __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sk->sk_stamp);
+ tv = ktime_to_timeval(sock_read_timestamp(sk));
+
if (tv.tv_sec == -1)
return err;
if (tv.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- tv = ktime_to_timeval(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ tv = ktime_to_timeval(kt);
}
err = 0;
if (put_user(tv.tv_sec, &ctv->tv_sec) ||
@@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
ctv = (struct compat_timespec __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sk->sk_stamp);
+ ts = ktime_to_timespec(sock_read_timestamp(sk));
if (ts.tv_sec == -1)
return err;
if (ts.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- ts = ktime_to_timespec(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ ts = ktime_to_timespec(kt);
}
err = 0;
if (put_user(ts.tv_sec, &ctv->tv_sec) ||
diff --git a/net/core/sock.c b/net/core/sock.c
index f00902c532cc77..6aa2e7e0b4fbdb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2751,6 +2751,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
+#if BITS_PER_LONG==32
+ seqlock_init(&sk->sk_stamp_seq);
+#endif
atomic_set(&sk->sk_zckey, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -2850,12 +2853,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
struct timeval tv;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sk->sk_stamp);
+ tv = ktime_to_timeval(sock_read_timestamp(sk));
if (tv.tv_sec == -1)
return -ENOENT;
if (tv.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- tv = ktime_to_timeval(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ tv = ktime_to_timeval(kt);
}
return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
}
@@ -2866,11 +2870,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
struct timespec ts;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sk->sk_stamp);
+ ts = ktime_to_timespec(sock_read_timestamp(sk));
if (ts.tv_sec == -1)
return -ENOENT;
if (ts.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
ts = ktime_to_timespec(sk->sk_stamp);
}
return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c7a7bd58a23c58..d1d09f3e5f9ec9 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -676,6 +676,9 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
gre_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
@@ -719,6 +722,9 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
bool truncate = false;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
erspan_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
@@ -762,6 +768,9 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
return NETDEV_TX_OK;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 284a22154b4e6c..c4f5602308edca 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -627,7 +627,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- unsigned int inner_nhdr_len = 0;
const struct iphdr *inner_iph;
struct flowi4 fl4;
u8 tos, ttl;
@@ -637,14 +636,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
__be32 dst;
bool connected;
- /* ensure we can access the inner net header, for several users below */
- if (skb->protocol == htons(ETH_P_IP))
- inner_nhdr_len = sizeof(struct iphdr);
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner_nhdr_len = sizeof(struct ipv6hdr);
- if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
- goto tx_error;
-
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index de31b302d69c6b..d7b43e700023a0 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -241,6 +241,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
struct flowi fl;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
@@ -253,15 +256,18 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
break;
default:
- dev->stats.tx_errors++;
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
+ goto tx_err;
}
/* override mark with tunnel output key */
fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
return vti_xmit(skb, dev, &fl);
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
static int vti4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 229e55c99021a8..09d0826742f89f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -881,6 +881,9 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
struct net_device_stats *stats = &t->dev->stats;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -923,6 +926,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
int nhoff;
int thoff;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -995,8 +1001,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
}
} else {
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1004,7 +1008,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
&dsfield, &encap_limit);
break;
case htons(ETH_P_IPV6):
- if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr))
goto tx_err;
if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
&dsfield, &encap_limit))
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 99179b9c83840b..0c6403cf8b5226 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1243,10 +1243,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- /* ensure we can access the full inner ip header */
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- return -1;
-
iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1321,9 +1317,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- return -1;
-
ipv6h = ipv6_hdr(skb);
tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
@@ -1405,6 +1398,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device_stats *stats = &t->dev->stats;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = ip4ip6_tnl_xmit(skb, dev);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 706fe42e492899..8b6eefff2f7eaf 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -522,18 +522,18 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct ipv6hdr *ipv6h;
struct flowi fl;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ipv6h = ipv6_hdr(skb);
-
if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- vti6_addr_conflict(t, ipv6h))
+ vti6_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
xfrm_decode_session(skb, &fl, AF_INET6);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8276f1224f168d..30337b38274b29 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -51,6 +51,7 @@
#include <linux/export.h>
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
+#include <net/ip_tunnels.h>
#include <linux/nospec.h>
@@ -599,13 +600,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
- int err;
- err = ip6mr_fib_lookup(net, &fl6, &mrt);
- if (err < 0) {
- kfree_skb(skb);
- return err;
- }
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+ goto tx_err;
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
@@ -614,6 +614,11 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
read_unlock(&mrt_lock);
kfree_skb(skb);
return NETDEV_TX_OK;
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
static int reg_vif_get_iflink(const struct net_device *dev)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a94e0b02a8ac9d..40b225f87d5e1d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -210,7 +210,9 @@ struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
n = __ipv6_neigh_lookup(dev, daddr);
if (n)
return n;
- return neigh_create(&nd_tbl, daddr, dev);
+
+ n = neigh_create(&nd_tbl, daddr, dev);
+ return IS_ERR(n) ? NULL : n;
}
static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 51c9f75f34b9bb..1e03305c054922 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1021,6 +1021,9 @@ tx_error:
static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index b9bbcf3d6c6397..c16f0a362c32c3 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -623,7 +623,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
if (rtn->rds_tcp_sysctl)
unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
- if (net != &init_net && rtn->ctl_table)
+ if (net != &init_net)
kfree(rtn->ctl_table);
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 986f3ed7d1a248..b7e67310ec379f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -549,7 +549,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
- svsk->sk_sk->sk_stamp = skb->tstamp;
+ sock_write_timestamp(svsk->sk_sk, skb->tstamp);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
len = skb->len;