diff options
author | Paul Gortmaker <paul.gortmaker@windriver.com> | 2017-03-20 15:11:20 -0400 |
---|---|---|
committer | Paul Gortmaker <paul.gortmaker@windriver.com> | 2017-03-20 15:43:12 -0400 |
commit | 06633a39c84dbc9a8373293477ef5eb327b62d02 (patch) | |
tree | e25a326dfd3ca6ca1863f055d7be1d0e80dce997 | |
parent | 699543fe55f503431d79c4746f314ebf213dc059 (diff) | |
download | longterm-queue-4.8-06633a39c84dbc9a8373293477ef5eb327b62d02.tar.gz |
raw import of mainline commits used in 4.9.11
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
35 files changed, 3076 insertions, 0 deletions
diff --git a/queue/can-Fix-kernel-panic-at-security_sock_rcv_skb.patch b/queue/can-Fix-kernel-panic-at-security_sock_rcv_skb.patch new file mode 100644 index 0000000..3ee31b8 --- /dev/null +++ b/queue/can-Fix-kernel-panic-at-security_sock_rcv_skb.patch @@ -0,0 +1,207 @@ +From f1712c73714088a7252d276a57126d56c7d37e64 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 27 Jan 2017 08:11:44 -0800 +Subject: [PATCH] can: Fix kernel panic at security_sock_rcv_skb + +commit f1712c73714088a7252d276a57126d56c7d37e64 upstream. + +Zhang Yanmin reported crashes [1] and provided a patch adding a +synchronize_rcu() call in can_rx_unregister() + +The main problem seems that the sockets themselves are not RCU +protected. + +If CAN uses RCU for delivery, then sockets should be freed only after +one RCU grace period. + +Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's +ease stable backports with the following fix instead. + +[1] +BUG: unable to handle kernel NULL pointer dereference at (null) +IP: [<ffffffff81495e25>] selinux_socket_sock_rcv_skb+0x65/0x2a0 + +Call Trace: + <IRQ> + [<ffffffff81485d8c>] security_sock_rcv_skb+0x4c/0x60 + [<ffffffff81d55771>] sk_filter+0x41/0x210 + [<ffffffff81d12913>] sock_queue_rcv_skb+0x53/0x3a0 + [<ffffffff81f0a2b3>] raw_rcv+0x2a3/0x3c0 + [<ffffffff81f06eab>] can_rcv_filter+0x12b/0x370 + [<ffffffff81f07af9>] can_receive+0xd9/0x120 + [<ffffffff81f07beb>] can_rcv+0xab/0x100 + [<ffffffff81d362ac>] __netif_receive_skb_core+0xd8c/0x11f0 + [<ffffffff81d36734>] __netif_receive_skb+0x24/0xb0 + [<ffffffff81d37f67>] process_backlog+0x127/0x280 + [<ffffffff81d36f7b>] net_rx_action+0x33b/0x4f0 + [<ffffffff810c88d4>] __do_softirq+0x184/0x440 + [<ffffffff81f9e86c>] do_softirq_own_stack+0x1c/0x30 + <EOI> + [<ffffffff810c76fb>] do_softirq.part.18+0x3b/0x40 + [<ffffffff810c8bed>] do_softirq+0x1d/0x20 + [<ffffffff81d30085>] netif_rx_ni+0xe5/0x110 + [<ffffffff8199cc87>] slcan_receive_buf+0x507/0x520 + [<ffffffff8167ef7c>] flush_to_ldisc+0x21c/0x230 + [<ffffffff810e3baf>] process_one_work+0x24f/0x670 + [<ffffffff810e44ed>] worker_thread+0x9d/0x6f0 + [<ffffffff810e4450>] ? rescuer_thread+0x480/0x480 + [<ffffffff810ebafc>] kthread+0x12c/0x150 + [<ffffffff81f9ccef>] ret_from_fork+0x3f/0x70 + +Reported-by: Zhang Yanmin <yanmin.zhang@intel.com> +Signed-off-by: Eric Dumazet <edumazet@google.com> +Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/include/linux/can/core.h b/include/linux/can/core.h +index a0875001b13c..df08a41d5be5 100644 +--- a/include/linux/can/core.h ++++ b/include/linux/can/core.h +@@ -45,10 +45,9 @@ struct can_proto { + extern int can_proto_register(const struct can_proto *cp); + extern void can_proto_unregister(const struct can_proto *cp); + +-extern int can_rx_register(struct net_device *dev, canid_t can_id, +- canid_t mask, +- void (*func)(struct sk_buff *, void *), +- void *data, char *ident); ++int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, ++ void (*func)(struct sk_buff *, void *), ++ void *data, char *ident, struct sock *sk); + + extern void can_rx_unregister(struct net_device *dev, canid_t can_id, + canid_t mask, +diff --git a/net/can/af_can.c b/net/can/af_can.c +index 1108079d934f..5488e4a6ccd0 100644 +--- a/net/can/af_can.c ++++ b/net/can/af_can.c +@@ -445,6 +445,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, + * @func: callback function on filter match + * @data: returned parameter for callback function + * @ident: string for calling module identification ++ * @sk: socket pointer (might be NULL) + * + * Description: + * Invokes the callback function with the received sk_buff and the given +@@ -468,7 +469,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, + */ + int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, + void (*func)(struct sk_buff *, void *), void *data, +- char *ident) ++ char *ident, struct sock *sk) + { + struct receiver *r; + struct hlist_head *rl; +@@ -496,6 +497,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, + r->func = func; + r->data = data; + r->ident = ident; ++ r->sk = sk; + + hlist_add_head_rcu(&r->list, rl); + d->entries++; +@@ -520,8 +522,11 @@ EXPORT_SYMBOL(can_rx_register); + static void can_rx_delete_receiver(struct rcu_head *rp) + { + struct receiver *r = container_of(rp, struct receiver, rcu); ++ struct sock *sk = r->sk; + + kmem_cache_free(rcv_cache, r); ++ if (sk) ++ sock_put(sk); + } + + /** +@@ -596,8 +601,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, + spin_unlock(&can_rcvlists_lock); + + /* schedule the receiver item for deletion */ +- if (r) ++ if (r) { ++ if (r->sk) ++ sock_hold(r->sk); + call_rcu(&r->rcu, can_rx_delete_receiver); ++ } + } + EXPORT_SYMBOL(can_rx_unregister); + +diff --git a/net/can/af_can.h b/net/can/af_can.h +index fca0fe9fc45a..b86f5129e838 100644 +--- a/net/can/af_can.h ++++ b/net/can/af_can.h +@@ -50,13 +50,14 @@ + + struct receiver { + struct hlist_node list; +- struct rcu_head rcu; + canid_t can_id; + canid_t mask; + unsigned long matches; + void (*func)(struct sk_buff *, void *); + void *data; + char *ident; ++ struct sock *sk; ++ struct rcu_head rcu; + }; + + #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) +diff --git a/net/can/bcm.c b/net/can/bcm.c +index 21ac75390e3d..5c9407181918 100644 +--- a/net/can/bcm.c ++++ b/net/can/bcm.c +@@ -1216,7 +1216,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, + err = can_rx_register(dev, op->can_id, + REGMASK(op->can_id), + bcm_rx_handler, op, +- "bcm"); ++ "bcm", sk); + + op->rx_reg_dev = dev; + dev_put(dev); +@@ -1225,7 +1225,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, + } else + err = can_rx_register(NULL, op->can_id, + REGMASK(op->can_id), +- bcm_rx_handler, op, "bcm"); ++ bcm_rx_handler, op, "bcm", sk); + if (err) { + /* this bcm rx op is broken -> remove it */ + list_del(&op->list); +diff --git a/net/can/gw.c b/net/can/gw.c +index a54ab0c82104..7056a1a2bb70 100644 +--- a/net/can/gw.c ++++ b/net/can/gw.c +@@ -442,7 +442,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj) + { + return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, +- gwj, "gw"); ++ gwj, "gw", NULL); + } + + static inline void cgw_unregister_filter(struct cgw_job *gwj) +diff --git a/net/can/raw.c b/net/can/raw.c +index b075f028d7e2..6dc546a06673 100644 +--- a/net/can/raw.c ++++ b/net/can/raw.c +@@ -190,7 +190,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, + for (i = 0; i < count; i++) { + err = can_rx_register(dev, filter[i].can_id, + filter[i].can_mask, +- raw_rcv, sk, "raw"); ++ raw_rcv, sk, "raw", sk); + if (err) { + /* clean up successfully registered filters */ + while (--i >= 0) +@@ -211,7 +211,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, + + if (err_mask) + err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, +- raw_rcv, sk, "raw"); ++ raw_rcv, sk, "raw", sk); + + return err; + } +-- +2.12.0 + diff --git a/queue/catc-Combine-failure-cleanup-code-in-catc_probe.patch b/queue/catc-Combine-failure-cleanup-code-in-catc_probe.patch new file mode 100644 index 0000000..2bc8a2c --- /dev/null +++ b/queue/catc-Combine-failure-cleanup-code-in-catc_probe.patch @@ -0,0 +1,72 @@ +From d41149145f98fe26dcd0bfd1d6cc095e6e041418 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings <ben@decadent.org.uk> +Date: Sat, 4 Feb 2017 16:56:56 +0000 +Subject: [PATCH] catc: Combine failure cleanup code in catc_probe() + +commit d41149145f98fe26dcd0bfd1d6cc095e6e041418 upstream. + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c +index 3daa41bdd4ea..985909eab72c 100644 +--- a/drivers/net/usb/catc.c ++++ b/drivers/net/usb/catc.c +@@ -776,7 +776,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id + struct net_device *netdev; + struct catc *catc; + u8 broadcast[ETH_ALEN]; +- int i, pktsz; ++ int i, pktsz, ret; + + if (usb_set_interface(usbdev, + intf->altsetting->desc.bInterfaceNumber, 1)) { +@@ -811,12 +811,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id + if ((!catc->ctrl_urb) || (!catc->tx_urb) || + (!catc->rx_urb) || (!catc->irq_urb)) { + dev_err(&intf->dev, "No free urbs available.\n"); +- usb_free_urb(catc->ctrl_urb); +- usb_free_urb(catc->tx_urb); +- usb_free_urb(catc->rx_urb); +- usb_free_urb(catc->irq_urb); +- free_netdev(netdev); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto fail_free; + } + + /* The F5U011 has the same vendor/product as the netmate but a device version of 0x130 */ +@@ -913,16 +909,21 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id + usb_set_intfdata(intf, catc); + + SET_NETDEV_DEV(netdev, &intf->dev); +- if (register_netdev(netdev) != 0) { +- usb_set_intfdata(intf, NULL); +- usb_free_urb(catc->ctrl_urb); +- usb_free_urb(catc->tx_urb); +- usb_free_urb(catc->rx_urb); +- usb_free_urb(catc->irq_urb); +- free_netdev(netdev); +- return -EIO; +- } ++ ret = register_netdev(netdev); ++ if (ret) ++ goto fail_clear_intfdata; ++ + return 0; ++ ++fail_clear_intfdata: ++ usb_set_intfdata(intf, NULL); ++fail_free: ++ usb_free_urb(catc->ctrl_urb); ++ usb_free_urb(catc->tx_urb); ++ usb_free_urb(catc->rx_urb); ++ usb_free_urb(catc->irq_urb); ++ free_netdev(netdev); ++ return ret; + } + + static void catc_disconnect(struct usb_interface *intf) +-- +2.12.0 + diff --git a/queue/catc-Use-heap-buffer-for-memory-size-test.patch b/queue/catc-Use-heap-buffer-for-memory-size-test.patch new file mode 100644 index 0000000..11d79a8 --- /dev/null +++ b/queue/catc-Use-heap-buffer-for-memory-size-test.patch @@ -0,0 +1,70 @@ +From 2d6a0e9de03ee658a9adc3bfb2f0ca55dff1e478 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings <ben@decadent.org.uk> +Date: Sat, 4 Feb 2017 16:57:04 +0000 +Subject: [PATCH] catc: Use heap buffer for memory size test + +commit 2d6a0e9de03ee658a9adc3bfb2f0ca55dff1e478 upstream. + +Allocating USB buffers on the stack is not portable, and no longer +works on x86_64 (with VMAP_STACK enabled as per default). + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c +index 985909eab72c..0acc9b640419 100644 +--- a/drivers/net/usb/catc.c ++++ b/drivers/net/usb/catc.c +@@ -776,7 +776,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id + struct net_device *netdev; + struct catc *catc; + u8 broadcast[ETH_ALEN]; +- int i, pktsz, ret; ++ int pktsz, ret; + + if (usb_set_interface(usbdev, + intf->altsetting->desc.bInterfaceNumber, 1)) { +@@ -840,15 +840,24 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id + catc->irq_buf, 2, catc_irq_done, catc, 1); + + if (!catc->is_f5u011) { ++ u32 *buf; ++ int i; ++ + dev_dbg(dev, "Checking memory size\n"); + +- i = 0x12345678; +- catc_write_mem(catc, 0x7a80, &i, 4); +- i = 0x87654321; +- catc_write_mem(catc, 0xfa80, &i, 4); +- catc_read_mem(catc, 0x7a80, &i, 4); ++ buf = kmalloc(4, GFP_KERNEL); ++ if (!buf) { ++ ret = -ENOMEM; ++ goto fail_free; ++ } ++ ++ *buf = 0x12345678; ++ catc_write_mem(catc, 0x7a80, buf, 4); ++ *buf = 0x87654321; ++ catc_write_mem(catc, 0xfa80, buf, 4); ++ catc_read_mem(catc, 0x7a80, buf, 4); + +- switch (i) { ++ switch (*buf) { + case 0x12345678: + catc_set_reg(catc, TxBufCount, 8); + catc_set_reg(catc, RxBufCount, 32); +@@ -863,6 +872,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id + dev_dbg(dev, "32k Memory\n"); + break; + } ++ ++ kfree(buf); + + dev_dbg(dev, "Getting MAC from SEEROM.\n"); + +-- +2.12.0 + diff --git a/queue/igmp-mld-Fix-memory-leak-in-igmpv3-mld_del_delrec.patch b/queue/igmp-mld-Fix-memory-leak-in-igmpv3-mld_del_delrec.patch new file mode 100644 index 0000000..14737aa --- /dev/null +++ b/queue/igmp-mld-Fix-memory-leak-in-igmpv3-mld_del_delrec.patch @@ -0,0 +1,44 @@ +From 9c8bb163ae784be4f79ae504e78c862806087c54 Mon Sep 17 00:00:00 2001 +From: Hangbin Liu <liuhangbin@gmail.com> +Date: Wed, 8 Feb 2017 21:16:45 +0800 +Subject: [PATCH] igmp, mld: Fix memory leak in igmpv3/mld_del_delrec() + +commit 9c8bb163ae784be4f79ae504e78c862806087c54 upstream. + +In function igmpv3/mld_add_delrec() we allocate pmc and put it in +idev->mc_tomb, so we should free it when we don't need it in del_delrec(). +But I removed kfree(pmc) incorrectly in latest two patches. Now fix it. + +Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info when ...") +Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when ...") +Reported-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index 5b15459955f8..44fd86de2823 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -1172,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) + psf->sf_crcount = im->crcount; + } + in_dev_put(pmc->interface); ++ kfree(pmc); + } + spin_unlock_bh(&im->lock); + } +diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c +index 7139fffd61b6..1bdc703cb966 100644 +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -779,6 +779,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) + psf->sf_crcount = im->mca_crcount; + } + in6_dev_put(pmc->idev); ++ kfree(pmc); + } + spin_unlock_bh(&im->mca_lock); + } +-- +2.12.0 + diff --git a/queue/ip6_gre-fix-ip6gre_err-invalid-reads.patch b/queue/ip6_gre-fix-ip6gre_err-invalid-reads.patch new file mode 100644 index 0000000..52793b2 --- /dev/null +++ b/queue/ip6_gre-fix-ip6gre_err-invalid-reads.patch @@ -0,0 +1,90 @@ +From 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Sat, 4 Feb 2017 23:18:55 -0800 +Subject: [PATCH] ip6_gre: fix ip6gre_err() invalid reads + +commit 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 upstream. + +Andrey Konovalov reported out of bound accesses in ip6gre_err() + +If GRE flags contains GRE_KEY, the following expression +*(((__be32 *)p) + (grehlen / 4) - 1) + +accesses data ~40 bytes after the expected point, since +grehlen includes the size of IPv6 headers. + +Let's use a "struct gre_base_hdr *greh" pointer to make this +code more readable. + +p[1] becomes greh->protocol. +grhlen is the GRE header length. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Andrey Konovalov <andreyknvl@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c +index 558631860d91..630b73be5999 100644 +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -367,35 +367,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) + + + static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +- u8 type, u8 code, int offset, __be32 info) ++ u8 type, u8 code, int offset, __be32 info) + { +- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; +- __be16 *p = (__be16 *)(skb->data + offset); +- int grehlen = offset + 4; ++ const struct gre_base_hdr *greh; ++ const struct ipv6hdr *ipv6h; ++ int grehlen = sizeof(*greh); + struct ip6_tnl *t; ++ int key_off = 0; + __be16 flags; ++ __be32 key; + +- flags = p[0]; +- if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { +- if (flags&(GRE_VERSION|GRE_ROUTING)) +- return; +- if (flags&GRE_KEY) { +- grehlen += 4; +- if (flags&GRE_CSUM) +- grehlen += 4; +- } ++ if (!pskb_may_pull(skb, offset + grehlen)) ++ return; ++ greh = (const struct gre_base_hdr *)(skb->data + offset); ++ flags = greh->flags; ++ if (flags & (GRE_VERSION | GRE_ROUTING)) ++ return; ++ if (flags & GRE_CSUM) ++ grehlen += 4; ++ if (flags & GRE_KEY) { ++ key_off = grehlen + offset; ++ grehlen += 4; + } + +- /* If only 8 bytes returned, keyed message will be dropped here */ +- if (!pskb_may_pull(skb, grehlen)) ++ if (!pskb_may_pull(skb, offset + grehlen)) + return; + ipv6h = (const struct ipv6hdr *)skb->data; +- p = (__be16 *)(skb->data + offset); ++ greh = (const struct gre_base_hdr *)(skb->data + offset); ++ key = key_off ? *(__be32 *)(skb->data + key_off) : 0; + + t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, +- flags & GRE_KEY ? +- *(((__be32 *)p) + (grehlen / 4) - 1) : 0, +- p[1]); ++ key, greh->protocol); + if (!t) + return; + +-- +2.12.0 + diff --git a/queue/ipv4-keep-skb-dst-around-in-presence-of-IP-options.patch b/queue/ipv4-keep-skb-dst-around-in-presence-of-IP-options.patch new file mode 100644 index 0000000..4bde2bc --- /dev/null +++ b/queue/ipv4-keep-skb-dst-around-in-presence-of-IP-options.patch @@ -0,0 +1,46 @@ +From 34b2cef20f19c87999fff3da4071e66937db9644 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Sat, 4 Feb 2017 11:16:52 -0800 +Subject: [PATCH] ipv4: keep skb->dst around in presence of IP options + +commit 34b2cef20f19c87999fff3da4071e66937db9644 upstream. + +Andrey Konovalov got crashes in __ip_options_echo() when a NULL skb->dst +is accessed. + +ipv4_pktinfo_prepare() should not drop the dst if (evil) IP options +are present. + +We could refine the test to the presence of ts_needtime or srr, +but IP options are not often used, so let's be conservative. + +Thanks to syzkaller team for finding this bug. + +Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Andrey Konovalov <andreyknvl@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c +index 53ae0c6315ad..900011709e3b 100644 +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -1238,7 +1238,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) + pktinfo->ipi_ifindex = 0; + pktinfo->ipi_spec_dst.s_addr = 0; + } +- skb_dst_drop(skb); ++ /* We need to keep the dst for __ip_options_echo() ++ * We could restrict the test to opt.ts_needtime || opt.srr, ++ * but the following is good enough as IP options are not often used. ++ */ ++ if (unlikely(IPCB(skb)->opt.optlen)) ++ skb_dst_force(skb); ++ else ++ skb_dst_drop(skb); + } + + int ip_setsockopt(struct sock *sk, int level, +-- +2.12.0 + diff --git a/queue/ipv6-fix-ip6_tnl_parse_tlv_enc_lim.patch b/queue/ipv6-fix-ip6_tnl_parse_tlv_enc_lim.patch new file mode 100644 index 0000000..b350d15 --- /dev/null +++ b/queue/ipv6-fix-ip6_tnl_parse_tlv_enc_lim.patch @@ -0,0 +1,100 @@ +From fbfa743a9d2a0ffa24251764f10afc13eb21e739 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Mon, 23 Jan 2017 16:43:06 -0800 +Subject: [PATCH] ipv6: fix ip6_tnl_parse_tlv_enc_lim() + +commit fbfa743a9d2a0ffa24251764f10afc13eb21e739 upstream. + +This function suffers from multiple issues. + +First one is that pskb_may_pull() may reallocate skb->head, +so the 'raw' pointer needs either to be reloaded or not used at all. + +Second issue is that NEXTHDR_DEST handling does not validate +that the options are present in skb->data, so we might read +garbage or access non existent memory. + +With help from Willem de Bruijn. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Cc: Willem de Bruijn <willemb@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index 02923f956ac8..ff8ee06491c3 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -400,18 +400,19 @@ ip6_tnl_dev_uninit(struct net_device *dev) + + __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) + { +- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; +- __u8 nexthdr = ipv6h->nexthdr; +- __u16 off = sizeof(*ipv6h); ++ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; ++ unsigned int nhoff = raw - skb->data; ++ unsigned int off = nhoff + sizeof(*ipv6h); ++ u8 next, nexthdr = ipv6h->nexthdr; + + while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { +- __u16 optlen = 0; + struct ipv6_opt_hdr *hdr; +- if (raw + off + sizeof(*hdr) > skb->data && +- !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) ++ u16 optlen; ++ ++ if (!pskb_may_pull(skb, off + sizeof(*hdr))) + break; + +- hdr = (struct ipv6_opt_hdr *) (raw + off); ++ hdr = (struct ipv6_opt_hdr *)(skb->data + off); + if (nexthdr == NEXTHDR_FRAGMENT) { + struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; + if (frag_hdr->frag_off) +@@ -422,20 +423,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) + } else { + optlen = ipv6_optlen(hdr); + } ++ /* cache hdr->nexthdr, since pskb_may_pull() might ++ * invalidate hdr ++ */ ++ next = hdr->nexthdr; + if (nexthdr == NEXTHDR_DEST) { +- __u16 i = off + 2; ++ u16 i = 2; ++ ++ /* Remember : hdr is no longer valid at this point. */ ++ if (!pskb_may_pull(skb, off + optlen)) ++ break; ++ + while (1) { + struct ipv6_tlv_tnl_enc_lim *tel; + + /* No more room for encapsulation limit */ +- if (i + sizeof (*tel) > off + optlen) ++ if (i + sizeof(*tel) > optlen) + break; + +- tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; ++ tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i; + /* return index of option if found and valid */ + if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && + tel->length == 1) +- return i; ++ return i + off - nhoff; + /* else jump to next option */ + if (tel->type) + i += tel->length + 2; +@@ -443,7 +453,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) + i++; + } + } +- nexthdr = hdr->nexthdr; ++ nexthdr = next; + off += optlen; + } + return 0; +-- +2.12.0 + diff --git a/queue/ipv6-pointer-math-error-in-ip6_tnl_parse_tlv_enc_lim.patch b/queue/ipv6-pointer-math-error-in-ip6_tnl_parse_tlv_enc_lim.patch new file mode 100644 index 0000000..cbfba42 --- /dev/null +++ b/queue/ipv6-pointer-math-error-in-ip6_tnl_parse_tlv_enc_lim.patch @@ -0,0 +1,31 @@ +From 63117f09c768be05a0bf465911297dc76394f686 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter <dan.carpenter@oracle.com> +Date: Wed, 1 Feb 2017 11:46:32 +0300 +Subject: [PATCH] ipv6: pointer math error in ip6_tnl_parse_tlv_enc_lim() + +commit 63117f09c768be05a0bf465911297dc76394f686 upstream. + +Casting is a high precedence operation but "off" and "i" are in terms of +bytes so we need to have some parenthesis here. + +Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()") +Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index ff8ee06491c3..75fac933c209 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -441,7 +441,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) + if (i + sizeof(*tel) > optlen) + break; + +- tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i; ++ tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); + /* return index of option if found and valid */ + if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && + tel->length == 1) +-- +2.12.0 + diff --git a/queue/ipv6-tcp-add-a-missing-tcp_v6_restore_cb.patch b/queue/ipv6-tcp-add-a-missing-tcp_v6_restore_cb.patch new file mode 100644 index 0000000..3d4a20b --- /dev/null +++ b/queue/ipv6-tcp-add-a-missing-tcp_v6_restore_cb.patch @@ -0,0 +1,72 @@ +From ebf6c9cb23d7e56eec8575a88071dec97ad5c6e2 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Sun, 5 Feb 2017 20:23:22 -0800 +Subject: [PATCH] ipv6: tcp: add a missing tcp_v6_restore_cb() + +commit ebf6c9cb23d7e56eec8575a88071dec97ad5c6e2 upstream. + +Dmitry reported use-after-free in ip6_datagram_recv_specific_ctl() + +A similar bug was fixed in commit 8ce48623f0cf ("ipv6: tcp: restore +IP6CB for pktoptions skbs"), but I missed another spot. + +tcp_v6_syn_recv_sock() can indeed set np->pktoptions from ireq->pktopts + +Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index cb8929681dc7..eaad72c3d746 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -991,6 +991,16 @@ drop: + return 0; /* don't send reset */ + } + ++static void tcp_v6_restore_cb(struct sk_buff *skb) ++{ ++ /* We need to move header back to the beginning if xfrm6_policy_check() ++ * and tcp_v6_fill_cb() are going to be called again. ++ * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. ++ */ ++ memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, ++ sizeof(struct inet6_skb_parm)); ++} ++ + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, +@@ -1182,8 +1192,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + sk_gfp_mask(sk, GFP_ATOMIC)); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; +- if (newnp->pktoptions) ++ if (newnp->pktoptions) { ++ tcp_v6_restore_cb(newnp->pktoptions); + skb_set_owner_r(newnp->pktoptions, newsk); ++ } + } + } + +@@ -1198,16 +1210,6 @@ out: + return NULL; + } + +-static void tcp_v6_restore_cb(struct sk_buff *skb) +-{ +- /* We need to move header back to the beginning if xfrm6_policy_check() +- * and tcp_v6_fill_cb() are going to be called again. +- * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. +- */ +- memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, +- sizeof(struct inet6_skb_parm)); +-} +- + /* The socket must have it's spinlock held when we get + * here, unless it is a TCP_LISTEN socket. + * +-- +2.12.0 + diff --git a/queue/l2tp-do-not-use-udp_ioctl.patch b/queue/l2tp-do-not-use-udp_ioctl.patch new file mode 100644 index 0000000..a1940f0 --- /dev/null +++ b/queue/l2tp-do-not-use-udp_ioctl.patch @@ -0,0 +1,109 @@ +From 72fb96e7bdbbdd4421b0726992496531060f3636 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Thu, 9 Feb 2017 16:15:52 -0800 +Subject: [PATCH] l2tp: do not use udp_ioctl() + +commit 72fb96e7bdbbdd4421b0726992496531060f3636 upstream. + +udp_ioctl(), as its name suggests, is used by UDP protocols, +but is also used by L2TP :( + +L2TP should use its own handler, because it really does not +look the same. + +SIOCINQ for instance should not assume UDP checksum or headers. + +Thanks to Andrey and syzkaller team for providing the report +and a nice reproducer. + +While crashes only happen on recent kernels (after commit +7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")), this +probably needs to be backported to older kernels. + +Fixes: 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue") +Fixes: 85584672012e ("udp: Fix udp_poll() and ioctl()") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Andrey Konovalov <andreyknvl@google.com> +Acked-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h +index 8f560f7140a0..aebf281d09ee 100644 +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -263,6 +263,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, + int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, + const struct l2tp_nl_cmd_ops *ops); + void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); ++int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); + + /* Session reference counts. Incremented when code obtains a reference + * to a session. +diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c +index 3d73278b86ca..28c21546d5b6 100644 +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -11,6 +11,7 @@ + + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + ++#include <asm/ioctls.h> + #include <linux/icmp.h> + #include <linux/module.h> + #include <linux/skbuff.h> +@@ -553,6 +554,30 @@ out: + return err ? err : copied; + } + ++int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) ++{ ++ struct sk_buff *skb; ++ int amount; ++ ++ switch (cmd) { ++ case SIOCOUTQ: ++ amount = sk_wmem_alloc_get(sk); ++ break; ++ case SIOCINQ: ++ spin_lock_bh(&sk->sk_receive_queue.lock); ++ skb = skb_peek(&sk->sk_receive_queue); ++ amount = skb ? skb->len : 0; ++ spin_unlock_bh(&sk->sk_receive_queue.lock); ++ break; ++ ++ default: ++ return -ENOIOCTLCMD; ++ } ++ ++ return put_user(amount, (int __user *)arg); ++} ++EXPORT_SYMBOL(l2tp_ioctl); ++ + static struct proto l2tp_ip_prot = { + .name = "L2TP/IP", + .owner = THIS_MODULE, +@@ -561,7 +586,7 @@ static struct proto l2tp_ip_prot = { + .bind = l2tp_ip_bind, + .connect = l2tp_ip_connect, + .disconnect = l2tp_ip_disconnect, +- .ioctl = udp_ioctl, ++ .ioctl = l2tp_ioctl, + .destroy = l2tp_ip_destroy_sock, + .setsockopt = ip_setsockopt, + .getsockopt = ip_getsockopt, +diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c +index 331ccf5a7bad..f47c45250f86 100644 +--- a/net/l2tp/l2tp_ip6.c ++++ b/net/l2tp/l2tp_ip6.c +@@ -722,7 +722,7 @@ static struct proto l2tp_ip6_prot = { + .bind = l2tp_ip6_bind, + .connect = l2tp_ip6_connect, + .disconnect = l2tp_ip6_disconnect, +- .ioctl = udp_ioctl, ++ .ioctl = l2tp_ioctl, + .destroy = l2tp_ip6_destroy_sock, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, +-- +2.12.0 + diff --git a/queue/lwtunnel-valid-encap-attr-check-should-return-0-when.patch b/queue/lwtunnel-valid-encap-attr-check-should-return-0-when.patch new file mode 100644 index 0000000..06bf302 --- /dev/null +++ b/queue/lwtunnel-valid-encap-attr-check-should-return-0-when.patch @@ -0,0 +1,45 @@ +From 2bd137de531367fb573d90150d1872cb2a2095f7 Mon Sep 17 00:00:00 2001 +From: David Ahern <dsa@cumulusnetworks.com> +Date: Wed, 8 Feb 2017 09:29:00 -0800 +Subject: [PATCH] lwtunnel: valid encap attr check should return 0 when + lwtunnel is disabled + +commit 2bd137de531367fb573d90150d1872cb2a2095f7 upstream. + +An error was reported upgrading to 4.9.8: + root@Typhoon:~# ip route add default table 210 nexthop dev eth0 via 10.68.64.1 + weight 1 nexthop dev eth0 via 10.68.64.2 weight 1 + RTNETLINK answers: Operation not supported + +The problem occurs when CONFIG_LWTUNNEL is not enabled and a multipath +route is submitted. + +The point of lwtunnel_valid_encap_type_attr is catch modules that +need to be loaded before any references are taken with rntl held. With +CONFIG_LWTUNNEL disabled, there will be no modules to load so the +lwtunnel_valid_encap_type_attr stub should just return 0. + +Fixes: 9ed59592e3e3 ("lwtunnel: fix autoload of lwt modules") +Reported-by: pupilla@libero.it +Signed-off-by: David Ahern <dsa@cumulusnetworks.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h +index 73dd87647460..0388b9c5f5e2 100644 +--- a/include/net/lwtunnel.h ++++ b/include/net/lwtunnel.h +@@ -178,7 +178,10 @@ static inline int lwtunnel_valid_encap_type(u16 encap_type) + } + static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) + { +- return -EOPNOTSUPP; ++ /* return 0 since we are not walking attr looking for ++ * RTA_ENCAP_TYPE attribute on nexthops. ++ */ ++ return 0; + } + + static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +-- +2.12.0 + diff --git a/queue/macvtap-read-vnet_hdr_size-once.patch b/queue/macvtap-read-vnet_hdr_size-once.patch new file mode 100644 index 0000000..17f92bd --- /dev/null +++ b/queue/macvtap-read-vnet_hdr_size-once.patch @@ -0,0 +1,45 @@ +From 837585a5375c38d40361cfe64e6fd11e1addb936 Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn <willemb@google.com> +Date: Fri, 3 Feb 2017 18:20:49 -0500 +Subject: [PATCH] macvtap: read vnet_hdr_size once + +commit 837585a5375c38d40361cfe64e6fd11e1addb936 upstream. + +When IFF_VNET_HDR is enabled, a virtio_net header must precede data. +Data length is verified to be greater than or equal to expected header +length tun->vnet_hdr_sz before copying. + +Macvtap functions read the value once, but unless READ_ONCE is used, +the compiler may ignore this and read multiple times. Enforce a single +read and locally cached value to avoid updates between test and use. + +Signed-off-by: Willem de Bruijn <willemb@google.com> +Suggested-by: Eric Dumazet <edumazet@google.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c +index 402618565838..c27011bbe30c 100644 +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -681,7 +681,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, + size_t linear; + + if (q->flags & IFF_VNET_HDR) { +- vnet_hdr_len = q->vnet_hdr_sz; ++ vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); + + err = -EINVAL; + if (len < vnet_hdr_len) +@@ -820,7 +820,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, + + if (q->flags & IFF_VNET_HDR) { + struct virtio_net_hdr vnet_hdr; +- vnet_hdr_len = q->vnet_hdr_sz; ++ vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); + if (iov_iter_count(iter) < vnet_hdr_len) + return -EINVAL; + +-- +2.12.0 + diff --git a/queue/mld-do-not-remove-mld-souce-list-info-when-set-link-.patch b/queue/mld-do-not-remove-mld-souce-list-info-when-set-link-.patch new file mode 100644 index 0000000..8860e0d --- /dev/null +++ b/queue/mld-do-not-remove-mld-souce-list-info-when-set-link-.patch @@ -0,0 +1,171 @@ +From 1666d49e1d416fcc2cce708242a52fe3317ea8ba Mon Sep 17 00:00:00 2001 +From: Hangbin Liu <liuhangbin@gmail.com> +Date: Thu, 12 Jan 2017 21:19:37 +0800 +Subject: [PATCH] mld: do not remove mld souce list info when set link down + +commit 1666d49e1d416fcc2cce708242a52fe3317ea8ba upstream. + +This is an IPv6 version of commit 24803f38a5c0 ("igmp: do not remove igmp +souce list..."). In mld_del_delrec(), we will restore back all source filter +info instead of flush them. + +Move mld_clear_delrec() from ipv6_mc_down() to ipv6_mc_destroy_dev() since +we should not remove source list info when set link down. Remove +igmp6_group_dropped() in ipv6_mc_destroy_dev() since we have called it in +ipv6_mc_down(). + +Also clear all source info after igmp6_group_dropped() instead of in it +because ipv6_mc_down() will call igmp6_group_dropped(). + +Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c +index 14a3903f1c82..7139fffd61b6 100644 +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -81,7 +81,7 @@ static void mld_gq_timer_expire(unsigned long data); + static void mld_ifc_timer_expire(unsigned long data); + static void mld_ifc_event(struct inet6_dev *idev); + static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); +-static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr); ++static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); + static void mld_clear_delrec(struct inet6_dev *idev); + static bool mld_in_v1_mode(const struct inet6_dev *idev); + static int sf_setstate(struct ifmcaddr6 *pmc); +@@ -692,9 +692,9 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) + dev_mc_del(dev, buf); + } + +- if (mc->mca_flags & MAF_NOREPORT) +- goto done; + spin_unlock_bh(&mc->mca_lock); ++ if (mc->mca_flags & MAF_NOREPORT) ++ return; + + if (!mc->idev->dead) + igmp6_leave_group(mc); +@@ -702,8 +702,6 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) + spin_lock_bh(&mc->mca_lock); + if (del_timer(&mc->mca_timer)) + atomic_dec(&mc->mca_refcnt); +-done: +- ip6_mc_clear_src(mc); + spin_unlock_bh(&mc->mca_lock); + } + +@@ -748,10 +746,11 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) + spin_unlock_bh(&idev->mc_lock); + } + +-static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) ++static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) + { + struct ifmcaddr6 *pmc, *pmc_prev; +- struct ip6_sf_list *psf, *psf_next; ++ struct ip6_sf_list *psf; ++ struct in6_addr *pmca = &im->mca_addr; + + spin_lock_bh(&idev->mc_lock); + pmc_prev = NULL; +@@ -768,14 +767,20 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) + } + spin_unlock_bh(&idev->mc_lock); + ++ spin_lock_bh(&im->mca_lock); + if (pmc) { +- for (psf = pmc->mca_tomb; psf; psf = psf_next) { +- psf_next = psf->sf_next; +- kfree(psf); ++ im->idev = pmc->idev; ++ im->mca_crcount = idev->mc_qrv; ++ im->mca_sfmode = pmc->mca_sfmode; ++ if (pmc->mca_sfmode == MCAST_INCLUDE) { ++ im->mca_tomb = pmc->mca_tomb; ++ im->mca_sources = pmc->mca_sources; ++ for (psf = im->mca_sources; psf; psf = psf->sf_next) ++ psf->sf_crcount = im->mca_crcount; + } + in6_dev_put(pmc->idev); +- kfree(pmc); + } ++ spin_unlock_bh(&im->mca_lock); + } + + static void mld_clear_delrec(struct inet6_dev *idev) +@@ -904,7 +909,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) + mca_get(mc); + write_unlock_bh(&idev->lock); + +- mld_del_delrec(idev, &mc->mca_addr); ++ mld_del_delrec(idev, mc); + igmp6_group_added(mc); + ma_put(mc); + return 0; +@@ -927,6 +932,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) + write_unlock_bh(&idev->lock); + + igmp6_group_dropped(ma); ++ ip6_mc_clear_src(ma); + + ma_put(ma); + return 0; +@@ -2501,15 +2507,17 @@ void ipv6_mc_down(struct inet6_dev *idev) + /* Withdraw multicast list */ + + read_lock_bh(&idev->lock); +- mld_ifc_stop_timer(idev); +- mld_gq_stop_timer(idev); +- mld_dad_stop_timer(idev); + + for (i = idev->mc_list; i; i = i->next) + igmp6_group_dropped(i); +- read_unlock_bh(&idev->lock); + +- mld_clear_delrec(idev); ++ /* Should stop timer after group drop. or we will ++ * start timer again in mld_ifc_event() ++ */ ++ mld_ifc_stop_timer(idev); ++ mld_gq_stop_timer(idev); ++ mld_dad_stop_timer(idev); ++ read_unlock_bh(&idev->lock); + } + + static void ipv6_mc_reset(struct inet6_dev *idev) +@@ -2531,8 +2539,10 @@ void ipv6_mc_up(struct inet6_dev *idev) + + read_lock_bh(&idev->lock); + ipv6_mc_reset(idev); +- for (i = idev->mc_list; i; i = i->next) ++ for (i = idev->mc_list; i; i = i->next) { ++ mld_del_delrec(idev, i); + igmp6_group_added(i); ++ } + read_unlock_bh(&idev->lock); + } + +@@ -2565,6 +2575,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) + + /* Deactivate timers */ + ipv6_mc_down(idev); ++ mld_clear_delrec(idev); + + /* Delete all-nodes address. */ + /* We cannot call ipv6_dev_mc_dec() directly, our caller in +@@ -2579,11 +2590,9 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) + write_lock_bh(&idev->lock); + while ((i = idev->mc_list) != NULL) { + idev->mc_list = i->next; +- write_unlock_bh(&idev->lock); + +- igmp6_group_dropped(i); ++ write_unlock_bh(&idev->lock); + ma_put(i); +- + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); +-- +2.12.0 + diff --git a/queue/mlx4-Invoke-softirqs-after-napi_reschedule.patch b/queue/mlx4-Invoke-softirqs-after-napi_reschedule.patch new file mode 100644 index 0000000..a4db623 --- /dev/null +++ b/queue/mlx4-Invoke-softirqs-after-napi_reschedule.patch @@ -0,0 +1,42 @@ +From bd4ce941c8d5b862b2f83364be5dbe8fc8ab48f8 Mon Sep 17 00:00:00 2001 +From: Benjamin Poirier <bpoirier@suse.com> +Date: Mon, 6 Feb 2017 10:14:31 -0800 +Subject: [PATCH] mlx4: Invoke softirqs after napi_reschedule + +commit bd4ce941c8d5b862b2f83364be5dbe8fc8ab48f8 upstream. + +mlx4 may schedule napi from a workqueue. Afterwards, softirqs are not run +in a deterministic time frame and the following message may be logged: +NOHZ: local_softirq_pending 08 + +The problem is the same as what was described in commit ec13ee80145c +("virtio_net: invoke softirqs after __napi_schedule") and this patch +applies the same fix to mlx4. + +Fixes: 07841f9d94c1 ("net/mlx4_en: Schedule napi when RX buffers allocation fails") +Cc: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: Benjamin Poirier <bpoirier@suse.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Tariq Toukan <tariqt@mellanox.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +index eac527e25ec9..cc003fdf0ed9 100644 +--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +@@ -514,8 +514,11 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) + return; + + for (ring = 0; ring < priv->rx_ring_num; ring++) { +- if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) ++ if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { ++ local_bh_disable(); + napi_reschedule(&priv->rx_cq[ring]->napi); ++ local_bh_enable(); ++ } + } + } + +-- +2.12.0 + diff --git a/queue/net-dsa-Do-not-destroy-invalid-network-devices.patch b/queue/net-dsa-Do-not-destroy-invalid-network-devices.patch new file mode 100644 index 0000000..0bab519 --- /dev/null +++ b/queue/net-dsa-Do-not-destroy-invalid-network-devices.patch @@ -0,0 +1,32 @@ +From 382e1eea2d983cd2343482c6a638f497bb44a636 Mon Sep 17 00:00:00 2001 +From: Florian Fainelli <f.fainelli@gmail.com> +Date: Tue, 7 Feb 2017 23:10:13 -0800 +Subject: [PATCH] net: dsa: Do not destroy invalid network devices + +commit 382e1eea2d983cd2343482c6a638f497bb44a636 upstream. + +dsa_slave_create() can fail, and dsa_user_port_unapply() will properly check +for the network device not being NULL before attempting to destroy it. We were +not setting the slave network device as NULL if dsa_slave_create() failed, so +we would later on be calling dsa_slave_destroy() on a now free'd and +unitialized network device, causing crashes in dsa_slave_destroy(). + +Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") +Signed-off-by: Florian Fainelli <f.fainelli@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c +index da3862124545..0f99297b2fb3 100644 +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -273,6 +273,7 @@ static int dsa_user_port_apply(struct device_node *port, u32 index, + if (err) { + dev_warn(ds->dev, "Failed to create slave %d: %d\n", + index, err); ++ ds->ports[index].netdev = NULL; + return err; + } + +-- +2.12.0 + diff --git a/queue/net-introduce-device-min_header_len.patch b/queue/net-introduce-device-min_header_len.patch new file mode 100644 index 0000000..0b340e5 --- /dev/null +++ b/queue/net-introduce-device-min_header_len.patch @@ -0,0 +1,84 @@ +From 217e6fa24ce28ec87fca8da93c9016cb78028612 Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn <willemb@google.com> +Date: Tue, 7 Feb 2017 15:57:20 -0500 +Subject: [PATCH] net: introduce device min_header_len + +commit 217e6fa24ce28ec87fca8da93c9016cb78028612 upstream. + +The stack must not pass packets to device drivers that are shorter +than the minimum link layer header length. + +Previously, packet sockets would drop packets smaller than or equal +to dev->hard_header_len, but this has false positives. Zero length +payload is used over Ethernet. Other link layer protocols support +variable length headers. Support for validation of these protocols +removed the min length check for all protocols. + +Introduce an explicit dev->min_header_len parameter and drop all +packets below this value. Initially, set it to non-zero only for +Ethernet and loopback. Other protocols can follow in a patch to +net-next. + +Fixes: 9ed988cd5915 ("packet: validate variable length ll headers") +Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> +Signed-off-by: Willem de Bruijn <willemb@google.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c +index 1e05b7c2d157..0844f8496413 100644 +--- a/drivers/net/loopback.c ++++ b/drivers/net/loopback.c +@@ -164,6 +164,7 @@ static void loopback_setup(struct net_device *dev) + { + dev->mtu = 64 * 1024; + dev->hard_header_len = ETH_HLEN; /* 14 */ ++ dev->min_header_len = ETH_HLEN; /* 14 */ + dev->addr_len = ETH_ALEN; /* 6 */ + dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ + dev->flags = IFF_LOOPBACK; +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 70ad0291d517..27914672602d 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1511,6 +1511,7 @@ enum netdev_priv_flags { + * @max_mtu: Interface Maximum MTU value + * @type: Interface hardware type + * @hard_header_len: Maximum hardware header length. ++ * @min_header_len: Minimum hardware header length + * + * @needed_headroom: Extra headroom the hardware may need, but not in all + * cases can this be guaranteed +@@ -1728,6 +1729,7 @@ struct net_device { + unsigned int max_mtu; + unsigned short type; + unsigned short hard_header_len; ++ unsigned short min_header_len; + + unsigned short needed_headroom; + unsigned short needed_tailroom; +@@ -2694,6 +2696,8 @@ static inline bool dev_validate_header(const struct net_device *dev, + { + if (likely(len >= dev->hard_header_len)) + return true; ++ if (len < dev->min_header_len) ++ return false; + + if (capable(CAP_SYS_RAWIO)) { + memset(ll_header + len, 0, dev->hard_header_len - len); +diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c +index 8c5a479681ca..516c87e75de7 100644 +--- a/net/ethernet/eth.c ++++ b/net/ethernet/eth.c +@@ -356,6 +356,7 @@ void ether_setup(struct net_device *dev) + dev->header_ops = ð_header_ops; + dev->type = ARPHRD_ETHER; + dev->hard_header_len = ETH_HLEN; ++ dev->min_header_len = ETH_HLEN; + dev->mtu = ETH_DATA_LEN; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_DATA_LEN; +-- +2.12.0 + diff --git a/queue/net-mlx5-Don-t-unlock-fte-while-still-using-it.patch b/queue/net-mlx5-Don-t-unlock-fte-while-still-using-it.patch new file mode 100644 index 0000000..1e1b9e7 --- /dev/null +++ b/queue/net-mlx5-Don-t-unlock-fte-while-still-using-it.patch @@ -0,0 +1,50 @@ +From 0fd758d6112f867b2cc6df0f6a856048ff99b211 Mon Sep 17 00:00:00 2001 +From: Mark Bloch <markb@mellanox.com> +Date: Mon, 5 Sep 2016 10:58:04 +0000 +Subject: [PATCH] net/mlx5: Don't unlock fte while still using it + +commit 0fd758d6112f867b2cc6df0f6a856048ff99b211 upstream. + +When adding a new rule to an fte, we need to hold the fte lock +until we add that rule to the fte and increase the fte ref count. + +Fixes: 0c56b97503fd ("net/mlx5_core: Introduce flow steering API") +Signed-off-by: Mark Bloch <markb@mellanox.com> +Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> +Signed-off-by: Leon Romanovsky <leon@kernel.org> + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +index 5da2cc878582..a07ff305b44f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -1107,9 +1107,8 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, + return rule; + } + rule = add_rule_fte(fte, fg, dest); +- unlock_ref_node(&fte->node); + if (IS_ERR(rule)) +- goto unlock_fg; ++ goto unlock_fte; + else + goto add_rule; + } +@@ -1127,6 +1126,7 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, + goto unlock_fg; + } + tree_init_node(&fte->node, 0, del_fte); ++ nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + rule = add_rule_fte(fte, fg, dest); + if (IS_ERR(rule)) { + kfree(fte); +@@ -1139,6 +1139,8 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, + list_add(&fte->node.list, prev); + add_rule: + tree_add_node(&rule->node, &fte->node); ++unlock_fte: ++ unlock_ref_node(&fte->node); + unlock_fg: + unlock_ref_node(&fg->node); + return rule; +-- +2.12.0 + diff --git a/queue/net-mlx5e-Fix-update-of-hash-function-key-via-ethtoo.patch b/queue/net-mlx5e-Fix-update-of-hash-function-key-via-ethtoo.patch new file mode 100644 index 0000000..e33b8e6 --- /dev/null +++ b/queue/net-mlx5e-Fix-update-of-hash-function-key-via-ethtoo.patch @@ -0,0 +1,304 @@ +From a100ff3eef193d2d79daf98dcd97a54776ffeb78 Mon Sep 17 00:00:00 2001 +From: Gal Pressman <galp@mellanox.com> +Date: Thu, 12 Jan 2017 16:25:46 +0200 +Subject: [PATCH] net/mlx5e: Fix update of hash function/key via ethtool + +commit a100ff3eef193d2d79daf98dcd97a54776ffeb78 upstream. + +Modifying TIR hash should change selected fields bitmask in addition to +the function and key. + +Formerly, Only on ethool mlx5e_set_rxfh "ethtoo -X" we would not set this +field resulting in zeroing of its value, which means no packet fields are +used for RX RSS hash calculation thus causing all traffic to arrive in +RQ[0]. + +On driver load out of the box we don't have this issue, since the TIR +hash is fully created from scratch. + +Tested: +ethtool -X ethX hkey <new key> +ethtool -X ethX hfunc <new func> +ethtool -X ethX equal <new indirection table> + +All cases are verified with TCP Multi-Stream traffic over IPv4 & IPv6. + +Fixes: bdfc028de1b3 ("net/mlx5e: Fix ethtool RX hash func configuration change") +Signed-off-by: Gal Pressman <galp@mellanox.com> +Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h +index 1619147a63e8..d5ecb8f53fd4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -791,7 +791,8 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); + int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); + + int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); +-void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); ++void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, ++ enum mlx5e_traffic_types tt); + + int mlx5e_open_locked(struct net_device *netdev); + int mlx5e_close_locked(struct net_device *netdev); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +index 6f4eb34259f0..bb67863aa361 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +@@ -980,15 +980,18 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + + static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) + { +- struct mlx5_core_dev *mdev = priv->mdev; + void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); +- int i; ++ struct mlx5_core_dev *mdev = priv->mdev; ++ int ctxlen = MLX5_ST_SZ_BYTES(tirc); ++ int tt; + + MLX5_SET(modify_tir_in, in, bitmask.hash, 1); +- mlx5e_build_tir_ctx_hash(tirc, priv); + +- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) +- mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); ++ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { ++ memset(tirc, 0, ctxlen); ++ mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); ++ mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); ++ } + } + + static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 948351ae5bd2..f14ca3385fdd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -2022,8 +2022,23 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); + } + +-void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) ++void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, ++ enum mlx5e_traffic_types tt) + { ++ void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); ++ ++#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ ++ MLX5_HASH_FIELD_SEL_DST_IP) ++ ++#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ ++ MLX5_HASH_FIELD_SEL_DST_IP |\ ++ MLX5_HASH_FIELD_SEL_L4_SPORT |\ ++ MLX5_HASH_FIELD_SEL_L4_DPORT) ++ ++#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ ++ MLX5_HASH_FIELD_SEL_DST_IP |\ ++ MLX5_HASH_FIELD_SEL_IPSEC_SPI) ++ + MLX5_SET(tirc, tirc, rx_hash_fn, + mlx5e_rx_hash_fn(priv->params.rss_hfunc)); + if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { +@@ -2035,6 +2050,88 @@ void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, priv->params.toeplitz_hash_key, len); + } ++ ++ switch (tt) { ++ case MLX5E_TT_IPV4_TCP: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV4); ++ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, ++ MLX5_L4_PROT_TYPE_TCP); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_L4PORTS); ++ break; ++ ++ case MLX5E_TT_IPV6_TCP: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV6); ++ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, ++ MLX5_L4_PROT_TYPE_TCP); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_L4PORTS); ++ break; ++ ++ case MLX5E_TT_IPV4_UDP: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV4); ++ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, ++ MLX5_L4_PROT_TYPE_UDP); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_L4PORTS); ++ break; ++ ++ case MLX5E_TT_IPV6_UDP: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV6); ++ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, ++ MLX5_L4_PROT_TYPE_UDP); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_L4PORTS); ++ break; ++ ++ case MLX5E_TT_IPV4_IPSEC_AH: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV4); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_IPSEC_SPI); ++ break; ++ ++ case MLX5E_TT_IPV6_IPSEC_AH: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV6); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_IPSEC_SPI); ++ break; ++ ++ case MLX5E_TT_IPV4_IPSEC_ESP: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV4); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_IPSEC_SPI); ++ break; ++ ++ case MLX5E_TT_IPV6_IPSEC_ESP: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV6); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP_IPSEC_SPI); ++ break; ++ ++ case MLX5E_TT_IPV4: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV4); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP); ++ break; ++ ++ case MLX5E_TT_IPV6: ++ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, ++ MLX5_L3_PROT_TYPE_IPV6); ++ MLX5_SET(rx_hash_field_select, hfso, selected_fields, ++ MLX5_HASH_IP); ++ break; ++ default: ++ WARN_ONCE(true, "%s: bad traffic type!\n", __func__); ++ } + } + + static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) +@@ -2404,110 +2501,13 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) + static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + enum mlx5e_traffic_types tt) + { +- void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); +- + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + +-#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ +- MLX5_HASH_FIELD_SEL_DST_IP) +- +-#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ +- MLX5_HASH_FIELD_SEL_DST_IP |\ +- MLX5_HASH_FIELD_SEL_L4_SPORT |\ +- MLX5_HASH_FIELD_SEL_L4_DPORT) +- +-#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ +- MLX5_HASH_FIELD_SEL_DST_IP |\ +- MLX5_HASH_FIELD_SEL_IPSEC_SPI) +- + mlx5e_build_tir_ctx_lro(tirc, priv); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); +- mlx5e_build_tir_ctx_hash(tirc, priv); +- +- switch (tt) { +- case MLX5E_TT_IPV4_TCP: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV4); +- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, +- MLX5_L4_PROT_TYPE_TCP); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_L4PORTS); +- break; +- +- case MLX5E_TT_IPV6_TCP: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV6); +- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, +- MLX5_L4_PROT_TYPE_TCP); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_L4PORTS); +- break; +- +- case MLX5E_TT_IPV4_UDP: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV4); +- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, +- MLX5_L4_PROT_TYPE_UDP); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_L4PORTS); +- break; +- +- case MLX5E_TT_IPV6_UDP: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV6); +- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, +- MLX5_L4_PROT_TYPE_UDP); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_L4PORTS); +- break; +- +- case MLX5E_TT_IPV4_IPSEC_AH: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV4); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_IPSEC_SPI); +- break; +- +- case MLX5E_TT_IPV6_IPSEC_AH: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV6); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_IPSEC_SPI); +- break; +- +- case MLX5E_TT_IPV4_IPSEC_ESP: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV4); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_IPSEC_SPI); +- break; +- +- case MLX5E_TT_IPV6_IPSEC_ESP: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV6); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP_IPSEC_SPI); +- break; +- +- case MLX5E_TT_IPV4: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV4); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP); +- break; +- +- case MLX5E_TT_IPV6: +- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, +- MLX5_L3_PROT_TYPE_IPV6); +- MLX5_SET(rx_hash_field_select, hfso, selected_fields, +- MLX5_HASH_IP); +- break; +- default: +- WARN_ONCE(true, +- "mlx5e_build_indir_tir_ctx: bad traffic type!\n"); +- } ++ mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + } + + static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, +-- +2.12.0 + diff --git a/queue/net-sched-matchall-Fix-configuration-race.patch b/queue/net-sched-matchall-Fix-configuration-race.patch new file mode 100644 index 0000000..d150a49 --- /dev/null +++ b/queue/net-sched-matchall-Fix-configuration-race.patch @@ -0,0 +1,313 @@ +From fd62d9f5c575f0792f150109f1fd24a0d4b3f854 Mon Sep 17 00:00:00 2001 +From: Yotam Gigi <yotamg@mellanox.com> +Date: Tue, 31 Jan 2017 15:14:29 +0200 +Subject: [PATCH] net/sched: matchall: Fix configuration race + +commit fd62d9f5c575f0792f150109f1fd24a0d4b3f854 upstream. + +In the current version, the matchall internal state is split into two +structs: cls_matchall_head and cls_matchall_filter. This makes little +sense, as matchall instance supports only one filter, and there is no +situation where one exists and the other does not. In addition, that led +to some races when filter was deleted while packet was processed. + +Unify that two structs into one, thus simplifying the process of matchall +creation and deletion. As a result, the new, delete and get callbacks have +a dummy implementation where all the work is done in destroy and change +callbacks, as was done in cls_cgroup. + +Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier") +Reported-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Yotam Gigi <yotamg@mellanox.com> +Acked-by: Jiri Pirko <jiri@mellanox.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c +index f935429bd5ef..b12bc2abea93 100644 +--- a/net/sched/cls_matchall.c ++++ b/net/sched/cls_matchall.c +@@ -16,16 +16,11 @@ + #include <net/sch_generic.h> + #include <net/pkt_cls.h> + +-struct cls_mall_filter { ++struct cls_mall_head { + struct tcf_exts exts; + struct tcf_result res; + u32 handle; +- struct rcu_head rcu; + u32 flags; +-}; +- +-struct cls_mall_head { +- struct cls_mall_filter *filter; + struct rcu_head rcu; + }; + +@@ -33,38 +28,29 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) + { + struct cls_mall_head *head = rcu_dereference_bh(tp->root); +- struct cls_mall_filter *f = head->filter; + +- if (tc_skip_sw(f->flags)) ++ if (tc_skip_sw(head->flags)) + return -1; + +- return tcf_exts_exec(skb, &f->exts, res); ++ return tcf_exts_exec(skb, &head->exts, res); + } + + static int mall_init(struct tcf_proto *tp) + { +- struct cls_mall_head *head; +- +- head = kzalloc(sizeof(*head), GFP_KERNEL); +- if (!head) +- return -ENOBUFS; +- +- rcu_assign_pointer(tp->root, head); +- + return 0; + } + +-static void mall_destroy_filter(struct rcu_head *head) ++static void mall_destroy_rcu(struct rcu_head *rcu) + { +- struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu); ++ struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, ++ rcu); + +- tcf_exts_destroy(&f->exts); +- +- kfree(f); ++ tcf_exts_destroy(&head->exts); ++ kfree(head); + } + + static int mall_replace_hw_filter(struct tcf_proto *tp, +- struct cls_mall_filter *f, ++ struct cls_mall_head *head, + unsigned long cookie) + { + struct net_device *dev = tp->q->dev_queue->dev; +@@ -74,7 +60,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, + offload.type = TC_SETUP_MATCHALL; + offload.cls_mall = &mall_offload; + offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; +- offload.cls_mall->exts = &f->exts; ++ offload.cls_mall->exts = &head->exts; + offload.cls_mall->cookie = cookie; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, +@@ -82,7 +68,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, + } + + static void mall_destroy_hw_filter(struct tcf_proto *tp, +- struct cls_mall_filter *f, ++ struct cls_mall_head *head, + unsigned long cookie) + { + struct net_device *dev = tp->q->dev_queue->dev; +@@ -103,29 +89,20 @@ static bool mall_destroy(struct tcf_proto *tp, bool force) + { + struct cls_mall_head *head = rtnl_dereference(tp->root); + struct net_device *dev = tp->q->dev_queue->dev; +- struct cls_mall_filter *f = head->filter; + +- if (!force && f) +- return false; ++ if (!head) ++ return true; + +- if (f) { +- if (tc_should_offload(dev, tp, f->flags)) +- mall_destroy_hw_filter(tp, f, (unsigned long) f); ++ if (tc_should_offload(dev, tp, head->flags)) ++ mall_destroy_hw_filter(tp, head, (unsigned long) head); + +- call_rcu(&f->rcu, mall_destroy_filter); +- } +- kfree_rcu(head, rcu); ++ call_rcu(&head->rcu, mall_destroy_rcu); + return true; + } + + static unsigned long mall_get(struct tcf_proto *tp, u32 handle) + { +- struct cls_mall_head *head = rtnl_dereference(tp->root); +- struct cls_mall_filter *f = head->filter; +- +- if (f && f->handle == handle) +- return (unsigned long) f; +- return 0; ++ return 0UL; + } + + static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { +@@ -134,7 +111,7 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { + }; + + static int mall_set_parms(struct net *net, struct tcf_proto *tp, +- struct cls_mall_filter *f, ++ struct cls_mall_head *head, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) + { +@@ -147,11 +124,11 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, + return err; + + if (tb[TCA_MATCHALL_CLASSID]) { +- f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); +- tcf_bind_filter(tp, &f->res, base); ++ head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); ++ tcf_bind_filter(tp, &head->res, base); + } + +- tcf_exts_change(tp, &f->exts, &e); ++ tcf_exts_change(tp, &head->exts, &e); + + return 0; + } +@@ -162,21 +139,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, + unsigned long *arg, bool ovr) + { + struct cls_mall_head *head = rtnl_dereference(tp->root); +- struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg; + struct net_device *dev = tp->q->dev_queue->dev; +- struct cls_mall_filter *f; + struct nlattr *tb[TCA_MATCHALL_MAX + 1]; ++ struct cls_mall_head *new; + u32 flags = 0; + int err; + + if (!tca[TCA_OPTIONS]) + return -EINVAL; + +- if (head->filter) +- return -EBUSY; +- +- if (fold) +- return -EINVAL; ++ if (head) ++ return -EEXIST; + + err = nla_parse_nested(tb, TCA_MATCHALL_MAX, + tca[TCA_OPTIONS], mall_policy); +@@ -189,23 +162,23 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, + return -EINVAL; + } + +- f = kzalloc(sizeof(*f), GFP_KERNEL); +- if (!f) ++ new = kzalloc(sizeof(*new), GFP_KERNEL); ++ if (!new) + return -ENOBUFS; + +- tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0); ++ tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0); + + if (!handle) + handle = 1; +- f->handle = handle; +- f->flags = flags; ++ new->handle = handle; ++ new->flags = flags; + +- err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); ++ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr); + if (err) + goto errout; + + if (tc_should_offload(dev, tp, flags)) { +- err = mall_replace_hw_filter(tp, f, (unsigned long) f); ++ err = mall_replace_hw_filter(tp, new, (unsigned long) new); + if (err) { + if (tc_skip_sw(flags)) + goto errout; +@@ -214,39 +187,29 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, + } + } + +- *arg = (unsigned long) f; +- rcu_assign_pointer(head->filter, f); +- ++ *arg = (unsigned long) head; ++ rcu_assign_pointer(tp->root, new); ++ if (head) ++ call_rcu(&head->rcu, mall_destroy_rcu); + return 0; + + errout: +- kfree(f); ++ kfree(new); + return err; + } + + static int mall_delete(struct tcf_proto *tp, unsigned long arg) + { +- struct cls_mall_head *head = rtnl_dereference(tp->root); +- struct cls_mall_filter *f = (struct cls_mall_filter *) arg; +- struct net_device *dev = tp->q->dev_queue->dev; +- +- if (tc_should_offload(dev, tp, f->flags)) +- mall_destroy_hw_filter(tp, f, (unsigned long) f); +- +- RCU_INIT_POINTER(head->filter, NULL); +- tcf_unbind_filter(tp, &f->res); +- call_rcu(&f->rcu, mall_destroy_filter); +- return 0; ++ return -EOPNOTSUPP; + } + + static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) + { + struct cls_mall_head *head = rtnl_dereference(tp->root); +- struct cls_mall_filter *f = head->filter; + + if (arg->count < arg->skip) + goto skip; +- if (arg->fn(tp, (unsigned long) f, arg) < 0) ++ if (arg->fn(tp, (unsigned long) head, arg) < 0) + arg->stop = 1; + skip: + arg->count++; +@@ -255,28 +218,28 @@ skip: + static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) + { +- struct cls_mall_filter *f = (struct cls_mall_filter *) fh; ++ struct cls_mall_head *head = (struct cls_mall_head *) fh; + struct nlattr *nest; + +- if (!f) ++ if (!head) + return skb->len; + +- t->tcm_handle = f->handle; ++ t->tcm_handle = head->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + +- if (f->res.classid && +- nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid)) ++ if (head->res.classid && ++ nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid)) + goto nla_put_failure; + +- if (tcf_exts_dump(skb, &f->exts)) ++ if (tcf_exts_dump(skb, &head->exts)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + +- if (tcf_exts_dump_stats(skb, &f->exts) < 0) ++ if (tcf_exts_dump_stats(skb, &head->exts) < 0) + goto nla_put_failure; + + return skb->len; +-- +2.12.0 + diff --git a/queue/net-use-a-work-queue-to-defer-net_disable_timestamp-.patch b/queue/net-use-a-work-queue-to-defer-net_disable_timestamp-.patch new file mode 100644 index 0000000..69619d7 --- /dev/null +++ b/queue/net-use-a-work-queue-to-defer-net_disable_timestamp-.patch @@ -0,0 +1,216 @@ +From 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Thu, 2 Feb 2017 10:31:35 -0800 +Subject: [PATCH] net: use a work queue to defer net_disable_timestamp() work + +commit 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec upstream. + +Dmitry reported a warning [1] showing that we were calling +net_disable_timestamp() -> static_key_slow_dec() from a non +process context. + +Grabbing a mutex while holding a spinlock or rcu_read_lock() +is not allowed. + +As Cong suggested, we now use a work queue. + +It is possible netstamp_clear() exits while netstamp_needed_deferred +is not zero, but it is probably not worth trying to do better than that. + +netstamp_needed_deferred atomic tracks the exact number of deferred +decrements. + +[1] +[ INFO: suspicious RCU usage. ] +4.10.0-rc5+ #192 Not tainted +------------------------------- +./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side +critical section! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 0 +2 locks held by syz-executor14/23111: + #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] lock_sock +include/net/sock.h:1454 [inline] + #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] +rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919 + #1: (rcu_read_lock){......}, at: [<ffffffff83ae2678>] nf_hook +include/linux/netfilter.h:201 [inline] + #1: (rcu_read_lock){......}, at: [<ffffffff83ae2678>] +__ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160 + +stack backtrace: +CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs +01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:15 [inline] + dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 + lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452 + rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline] + ___might_sleep+0x560/0x650 kernel/sched/core.c:7748 + __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 + mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 + atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 + __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 + static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 + net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 + sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 + __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 + sk_destruct+0x47/0x80 net/core/sock.c:1460 + __sk_free+0x57/0x230 net/core/sock.c:1468 + sock_wfree+0xae/0x120 net/core/sock.c:1645 + skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 + skb_release_all+0x15/0x60 net/core/skbuff.c:668 + __kfree_skb+0x15/0x20 net/core/skbuff.c:684 + kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 + inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 + inet_frag_put include/net/inet_frag.h:133 [inline] + nf_ct_frag6_gather+0x1106/0x3840 +net/ipv6/netfilter/nf_conntrack_reasm.c:617 + ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 + nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] + nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 + nf_hook include/linux/netfilter.h:212 [inline] + __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 + ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 + ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 + ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 + rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] + rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 + inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 + sock_sendmsg_nosec net/socket.c:635 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:645 + sock_write_iter+0x326/0x600 net/socket.c:848 + do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 + do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 + vfs_writev+0x87/0xc0 fs/read_write.c:911 + do_writev+0x110/0x2c0 fs/read_write.c:944 + SYSC_writev fs/read_write.c:1017 [inline] + SyS_writev+0x27/0x30 fs/read_write.c:1014 + entry_SYSCALL_64_fastpath+0x1f/0xc2 +RIP: 0033:0x445559 +RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 +RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559 +RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005 +RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000 +R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400 +BUG: sleeping function called from invalid context at +kernel/locking/mutex.c:752 +in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14 +INFO: lockdep is turned off. +CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs +01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:15 [inline] + dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 + ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780 + __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 + mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 + atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 + __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 + static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 + net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 + sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 + __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 + sk_destruct+0x47/0x80 net/core/sock.c:1460 + __sk_free+0x57/0x230 net/core/sock.c:1468 + sock_wfree+0xae/0x120 net/core/sock.c:1645 + skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 + skb_release_all+0x15/0x60 net/core/skbuff.c:668 + __kfree_skb+0x15/0x20 net/core/skbuff.c:684 + kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 + inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 + inet_frag_put include/net/inet_frag.h:133 [inline] + nf_ct_frag6_gather+0x1106/0x3840 +net/ipv6/netfilter/nf_conntrack_reasm.c:617 + ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 + nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] + nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 + nf_hook include/linux/netfilter.h:212 [inline] + __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 + ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 + ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 + ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 + rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] + rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 + inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 + sock_sendmsg_nosec net/socket.c:635 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:645 + sock_write_iter+0x326/0x600 net/socket.c:848 + do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 + do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 + vfs_writev+0x87/0xc0 fs/read_write.c:911 + do_writev+0x110/0x2c0 fs/read_write.c:944 + SYSC_writev fs/read_write.c:1017 [inline] + SyS_writev+0x27/0x30 fs/read_write.c:1014 + entry_SYSCALL_64_fastpath+0x1f/0xc2 +RIP: 0033:0x445559 + +Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") +Suggested-by: Cong Wang <xiyou.wangcong@gmail.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/core/dev.c b/net/core/dev.c +index 7f218e095361..29101c98399f 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1695,24 +1695,19 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); + + static struct static_key netstamp_needed __read_mostly; + #ifdef HAVE_JUMP_LABEL +-/* We are not allowed to call static_key_slow_dec() from irq context +- * If net_disable_timestamp() is called from irq context, defer the +- * static_key_slow_dec() calls. +- */ + static atomic_t netstamp_needed_deferred; +-#endif +- +-void net_enable_timestamp(void) ++static void netstamp_clear(struct work_struct *work) + { +-#ifdef HAVE_JUMP_LABEL + int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + +- if (deferred) { +- while (--deferred) +- static_key_slow_dec(&netstamp_needed); +- return; +- } ++ while (deferred--) ++ static_key_slow_dec(&netstamp_needed); ++} ++static DECLARE_WORK(netstamp_work, netstamp_clear); + #endif ++ ++void net_enable_timestamp(void) ++{ + static_key_slow_inc(&netstamp_needed); + } + EXPORT_SYMBOL(net_enable_timestamp); +@@ -1720,12 +1715,12 @@ EXPORT_SYMBOL(net_enable_timestamp); + void net_disable_timestamp(void) + { + #ifdef HAVE_JUMP_LABEL +- if (in_interrupt()) { +- atomic_inc(&netstamp_needed_deferred); +- return; +- } +-#endif ++ /* net_disable_timestamp() can be called from non process context */ ++ atomic_inc(&netstamp_needed_deferred); ++ schedule_work(&netstamp_work); ++#else + static_key_slow_dec(&netstamp_needed); ++#endif + } + EXPORT_SYMBOL(net_disable_timestamp); + +-- +2.12.0 + diff --git a/queue/netlabel-out-of-bound-access-in-cipso_v4_validate.patch b/queue/netlabel-out-of-bound-access-in-cipso_v4_validate.patch new file mode 100644 index 0000000..d59bb3f --- /dev/null +++ b/queue/netlabel-out-of-bound-access-in-cipso_v4_validate.patch @@ -0,0 +1,51 @@ +From d71b7896886345c53ef1d84bda2bc758554f5d61 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 3 Feb 2017 00:03:26 -0800 +Subject: [PATCH] netlabel: out of bound access in cipso_v4_validate() + +commit d71b7896886345c53ef1d84bda2bc758554f5d61 upstream. + +syzkaller found another out of bound access in ip_options_compile(), +or more exactly in cipso_v4_validate() + +Fixes: 20e2a8648596 ("cipso: handle CIPSO options correctly when NetLabel is disabled") +Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Cc: Paul Moore <paul@paul-moore.com> +Acked-by: Paul Moore <paul@paul-moore.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h +index 3ebb168b9afc..a34b141f125f 100644 +--- a/include/net/cipso_ipv4.h ++++ b/include/net/cipso_ipv4.h +@@ -309,6 +309,10 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, + } + + for (opt_iter = 6; opt_iter < opt_len;) { ++ if (opt_iter + 1 == opt_len) { ++ err_offset = opt_iter; ++ goto out; ++ } + tag_len = opt[opt_iter + 1]; + if ((tag_len == 0) || (tag_len > (opt_len - opt_iter))) { + err_offset = opt_iter + 1; +diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c +index 72d6f056d863..ae206163c273 100644 +--- a/net/ipv4/cipso_ipv4.c ++++ b/net/ipv4/cipso_ipv4.c +@@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) + goto validate_return_locked; + } + ++ if (opt_iter + 1 == opt_len) { ++ err_offset = opt_iter; ++ goto validate_return_locked; ++ } + tag_len = tag[1]; + if (tag_len > (opt_len - opt_iter)) { + err_offset = opt_iter + 1; +-- +2.12.0 + diff --git a/queue/packet-round-up-linear-to-header-len.patch b/queue/packet-round-up-linear-to-header-len.patch new file mode 100644 index 0000000..cb22f4e --- /dev/null +++ b/queue/packet-round-up-linear-to-header-len.patch @@ -0,0 +1,51 @@ +From 57031eb794906eea4e1c7b31dc1e2429c0af0c66 Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn <willemb@google.com> +Date: Tue, 7 Feb 2017 15:57:21 -0500 +Subject: [PATCH] packet: round up linear to header len + +commit 57031eb794906eea4e1c7b31dc1e2429c0af0c66 upstream. + +Link layer protocols may unconditionally pull headers, as Ethernet +does in eth_type_trans. Ensure that the entire link layer header +always lies in the skb linear segment. tpacket_snd has such a check. +Extend this to packet_snd. + +Variable length link layer headers complicate the computation +somewhat. Here skb->len may be smaller than dev->hard_header_len. + +Round up the linear length to be at least as long as the smallest of +the two. + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: Willem de Bruijn <willemb@google.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c +index 3d555c79a7b5..d56ee46b11fc 100644 +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2755,7 +2755,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) + struct virtio_net_hdr vnet_hdr = { 0 }; + int offset = 0; + struct packet_sock *po = pkt_sk(sk); +- int hlen, tlen; ++ int hlen, tlen, linear; + int extra_len = 0; + + /* +@@ -2816,8 +2816,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) + err = -ENOBUFS; + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; +- skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, +- __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len), ++ linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); ++ linear = max(linear, min_t(int, len, dev->hard_header_len)); ++ skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, + msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) + goto out_unlock; +-- +2.12.0 + diff --git a/queue/pegasus-Use-heap-buffers-for-all-register-access.patch b/queue/pegasus-Use-heap-buffers-for-all-register-access.patch new file mode 100644 index 0000000..3f0f55d --- /dev/null +++ b/queue/pegasus-Use-heap-buffers-for-all-register-access.patch @@ -0,0 +1,93 @@ +From 5593523f968bc86d42a035c6df47d5e0979b5ace Mon Sep 17 00:00:00 2001 +From: Ben Hutchings <ben@decadent.org.uk> +Date: Sat, 4 Feb 2017 16:56:03 +0000 +Subject: [PATCH] pegasus: Use heap buffers for all register access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 5593523f968bc86d42a035c6df47d5e0979b5ace upstream. + +Allocating USB buffers on the stack is not portable, and no longer +works on x86_64 (with VMAP_STACK enabled as per default). + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +References: https://bugs.debian.org/852556 +Reported-by: Lisandro Damián Nicanor Pérez Meyer <lisandro@debian.org> +Tested-by: Lisandro Damián Nicanor Pérez Meyer <lisandro@debian.org> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c +index 24e803fe9a53..36674484c6fb 100644 +--- a/drivers/net/usb/pegasus.c ++++ b/drivers/net/usb/pegasus.c +@@ -126,40 +126,61 @@ static void async_ctrl_callback(struct urb *urb) + + static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) + { ++ u8 *buf; + int ret; + ++ buf = kmalloc(size, GFP_NOIO); ++ if (!buf) ++ return -ENOMEM; ++ + ret = usb_control_msg(pegasus->usb, usb_rcvctrlpipe(pegasus->usb, 0), + PEGASUS_REQ_GET_REGS, PEGASUS_REQT_READ, 0, +- indx, data, size, 1000); ++ indx, buf, size, 1000); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, + "%s returned %d\n", __func__, ret); ++ else if (ret <= size) ++ memcpy(data, buf, ret); ++ kfree(buf); + return ret; + } + +-static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) ++static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, ++ const void *data) + { ++ u8 *buf; + int ret; + ++ buf = kmemdup(data, size, GFP_NOIO); ++ if (!buf) ++ return -ENOMEM; ++ + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), + PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, +- indx, data, size, 100); ++ indx, buf, size, 100); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, + "%s returned %d\n", __func__, ret); ++ kfree(buf); + return ret; + } + + static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) + { ++ u8 *buf; + int ret; + ++ buf = kmemdup(&data, 1, GFP_NOIO); ++ if (!buf) ++ return -ENOMEM; ++ + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), + PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, +- indx, &data, 1, 1000); ++ indx, buf, 1, 1000); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, + "%s returned %d\n", __func__, ret); ++ kfree(buf); + return ret; + } + +-- +2.12.0 + diff --git a/queue/ping-fix-a-null-pointer-dereference.patch b/queue/ping-fix-a-null-pointer-dereference.patch new file mode 100644 index 0000000..d96032a --- /dev/null +++ b/queue/ping-fix-a-null-pointer-dereference.patch @@ -0,0 +1,60 @@ +From 73d2c6678e6c3af7e7a42b1e78cd0211782ade32 Mon Sep 17 00:00:00 2001 +From: WANG Cong <xiyou.wangcong@gmail.com> +Date: Tue, 7 Feb 2017 12:59:46 -0800 +Subject: [PATCH] ping: fix a null pointer dereference + +commit 73d2c6678e6c3af7e7a42b1e78cd0211782ade32 upstream. + +Andrey reported a kernel crash: + + general protection fault: 0000 [#1] SMP KASAN + Dumping ftrace buffer: + (ftrace buffer empty) + Modules linked in: + CPU: 2 PID: 3880 Comm: syz-executor1 Not tainted 4.10.0-rc6+ #124 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + task: ffff880060048040 task.stack: ffff880069be8000 + RIP: 0010:ping_v4_push_pending_frames net/ipv4/ping.c:647 [inline] + RIP: 0010:ping_v4_sendmsg+0x1acd/0x23f0 net/ipv4/ping.c:837 + RSP: 0018:ffff880069bef8b8 EFLAGS: 00010206 + RAX: dffffc0000000000 RBX: ffff880069befb90 RCX: 0000000000000000 + RDX: 0000000000000018 RSI: ffff880069befa30 RDI: 00000000000000c2 + RBP: ffff880069befbb8 R08: 0000000000000008 R09: 0000000000000000 + R10: 0000000000000002 R11: 0000000000000000 R12: ffff880069befab0 + R13: ffff88006c624a80 R14: ffff880069befa70 R15: 0000000000000000 + FS: 00007f6f7c716700(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00000000004a6f28 CR3: 000000003a134000 CR4: 00000000000006e0 + Call Trace: + inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 + sock_sendmsg_nosec net/socket.c:635 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:645 + SYSC_sendto+0x660/0x810 net/socket.c:1687 + SyS_sendto+0x40/0x50 net/socket.c:1655 + entry_SYSCALL_64_fastpath+0x1f/0xc2 + +This is because we miss a check for NULL pointer for skb_peek() when +the queue is empty. Other places already have the same check. + +Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") +Reported-by: Andrey Konovalov <andreyknvl@google.com> +Tested-by: Andrey Konovalov <andreyknvl@google.com> +Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c +index 86cca610f4c2..68d77b1f1495 100644 +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -642,6 +642,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + { + struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + ++ if (!skb) ++ return 0; + pfh->wcheck = csum_partial((char *)&pfh->icmph, + sizeof(struct icmphdr), pfh->wcheck); + pfh->icmph.checksum = csum_fold(pfh->wcheck); +-- +2.12.0 + diff --git a/queue/rtl8150-Use-heap-buffers-for-all-register-access.patch b/queue/rtl8150-Use-heap-buffers-for-all-register-access.patch new file mode 100644 index 0000000..ae2eb40 --- /dev/null +++ b/queue/rtl8150-Use-heap-buffers-for-all-register-access.patch @@ -0,0 +1,65 @@ +From 7926aff5c57b577ab0f43364ff0c59d968f6a414 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings <ben@decadent.org.uk> +Date: Sat, 4 Feb 2017 16:56:32 +0000 +Subject: [PATCH] rtl8150: Use heap buffers for all register access + +commit 7926aff5c57b577ab0f43364ff0c59d968f6a414 upstream. + +Allocating USB buffers on the stack is not portable, and no longer +works on x86_64 (with VMAP_STACK enabled as per default). + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c +index 95b7bd0d7abc..c81c79110cef 100644 +--- a/drivers/net/usb/rtl8150.c ++++ b/drivers/net/usb/rtl8150.c +@@ -155,16 +155,36 @@ static const char driver_name [] = "rtl8150"; + */ + static int get_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) + { +- return usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), +- RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, +- indx, 0, data, size, 500); ++ void *buf; ++ int ret; ++ ++ buf = kmalloc(size, GFP_NOIO); ++ if (!buf) ++ return -ENOMEM; ++ ++ ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), ++ RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, ++ indx, 0, buf, size, 500); ++ if (ret > 0 && ret <= size) ++ memcpy(data, buf, ret); ++ kfree(buf); ++ return ret; + } + +-static int set_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) ++static int set_registers(rtl8150_t * dev, u16 indx, u16 size, const void *data) + { +- return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), +- RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, +- indx, 0, data, size, 500); ++ void *buf; ++ int ret; ++ ++ buf = kmemdup(data, size, GFP_NOIO); ++ if (!buf) ++ return -ENOMEM; ++ ++ ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), ++ RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, ++ indx, 0, buf, size, 500); ++ kfree(buf); ++ return ret; + } + + static void async_set_reg_cb(struct urb *urb) +-- +2.12.0 + diff --git a/queue/sctp-avoid-BUG_ON-on-sctp_wait_for_sndbuf.patch b/queue/sctp-avoid-BUG_ON-on-sctp_wait_for_sndbuf.patch new file mode 100644 index 0000000..29fc17d --- /dev/null +++ b/queue/sctp-avoid-BUG_ON-on-sctp_wait_for_sndbuf.patch @@ -0,0 +1,38 @@ +From 2dcab598484185dea7ec22219c76dcdd59e3cb90 Mon Sep 17 00:00:00 2001 +From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> +Date: Mon, 6 Feb 2017 18:10:31 -0200 +Subject: [PATCH] sctp: avoid BUG_ON on sctp_wait_for_sndbuf + +commit 2dcab598484185dea7ec22219c76dcdd59e3cb90 upstream. + +Alexander Popov reported that an application may trigger a BUG_ON in +sctp_wait_for_sndbuf if the socket tx buffer is full, a thread is +waiting on it to queue more data and meanwhile another thread peels off +the association being used by the first thread. + +This patch replaces the BUG_ON call with a proper error handling. It +will return -EPIPE to the original sendmsg call, similarly to what would +have been done if the association wasn't found in the first place. + +Acked-by: Alexander Popov <alex.popov@linux.com> +Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> +Reviewed-by: Xin Long <lucien.xin@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/sctp/socket.c b/net/sctp/socket.c +index 37eeab7899fc..e214d2e7e9a3 100644 +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -7426,7 +7426,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, + */ + release_sock(sk); + current_timeo = schedule_timeout(current_timeo); +- BUG_ON(sk != asoc->base.sk); ++ if (sk != asoc->base.sk) ++ goto do_error; + lock_sock(sk); + + *timeo_p = current_timeo; +-- +2.12.0 + diff --git a/queue/series b/queue/series index 8c59945..4bcfe58 100644 --- a/queue/series +++ b/queue/series @@ -125,3 +125,37 @@ IB-rxe-Fix-resid-update.patch IB-rxe-Fix-mem_check_range-integer-overflow.patch perf-diff-Fix-o-order-option-behavior-again.patch perf-diff-Fix-segfault-on-perf-diff-o-N-option.patch +can-Fix-kernel-panic-at-security_sock_rcv_skb.patch +net-mlx5e-Fix-update-of-hash-function-key-via-ethtoo.patch +net-sched-matchall-Fix-configuration-race.patch +ipv6-fix-ip6_tnl_parse_tlv_enc_lim.patch +ipv6-pointer-math-error-in-ip6_tnl_parse_tlv_enc_lim.patch +tcp-fix-0-divide-in-__tcp_select_window.patch +stmmac-Discard-masked-flags-in-interrupt-status-regi.patch +net-use-a-work-queue-to-defer-net_disable_timestamp-.patch +ipv4-keep-skb-dst-around-in-presence-of-IP-options.patch +netlabel-out-of-bound-access-in-cipso_v4_validate.patch +ip6_gre-fix-ip6gre_err-invalid-reads.patch +ipv6-tcp-add-a-missing-tcp_v6_restore_cb.patch +tcp-avoid-infinite-loop-in-tcp_splice_read.patch +tun-read-vnet_hdr_sz-once.patch +macvtap-read-vnet_hdr_size-once.patch +pegasus-Use-heap-buffers-for-all-register-access.patch +rtl8150-Use-heap-buffers-for-all-register-access.patch +catc-Combine-failure-cleanup-code-in-catc_probe.patch +catc-Use-heap-buffer-for-memory-size-test.patch +mlx4-Invoke-softirqs-after-napi_reschedule.patch +sctp-avoid-BUG_ON-on-sctp_wait_for_sndbuf.patch +lwtunnel-valid-encap-attr-check-should-return-0-when.patch +sit-fix-a-double-free-on-error-path.patch +net-introduce-device-min_header_len.patch +packet-round-up-linear-to-header-len.patch +ping-fix-a-null-pointer-dereference.patch +net-dsa-Do-not-destroy-invalid-network-devices.patch +l2tp-do-not-use-udp_ioctl.patch +mld-do-not-remove-mld-souce-list-info-when-set-link-.patch +igmp-mld-Fix-memory-leak-in-igmpv3-mld_del_delrec.patch +tcp-fix-mark-propagation-with-fwmark_reflect-enabled.patch +net-mlx5-Don-t-unlock-fte-while-still-using-it.patch +tcp-don-t-annotate-mark-on-control-socket-from-tcp_v.patch +x86-fpu-xstate-Fix-xcomp_bv-in-XSAVES-header.patch diff --git a/queue/sit-fix-a-double-free-on-error-path.patch b/queue/sit-fix-a-double-free-on-error-path.patch new file mode 100644 index 0000000..6ab2450 --- /dev/null +++ b/queue/sit-fix-a-double-free-on-error-path.patch @@ -0,0 +1,68 @@ +From d7426c69a1942b2b9b709bf66b944ff09f561484 Mon Sep 17 00:00:00 2001 +From: WANG Cong <xiyou.wangcong@gmail.com> +Date: Wed, 8 Feb 2017 10:02:13 -0800 +Subject: [PATCH] sit: fix a double free on error path + +commit d7426c69a1942b2b9b709bf66b944ff09f561484 upstream. + +Dmitry reported a double free in sit_init_net(): + + kernel BUG at mm/percpu.c:689! + invalid opcode: 0000 [#1] SMP KASAN + Dumping ftrace buffer: + (ftrace buffer empty) + Modules linked in: + CPU: 0 PID: 15692 Comm: syz-executor1 Not tainted 4.10.0-rc6-next-20170206 #1 + Hardware name: Google Google Compute Engine/Google Compute Engine, + BIOS Google 01/01/2011 + task: ffff8801c9cc27c0 task.stack: ffff88017d1d8000 + RIP: 0010:pcpu_free_area+0x68b/0x810 mm/percpu.c:689 + RSP: 0018:ffff88017d1df488 EFLAGS: 00010046 + RAX: 0000000000010000 RBX: 00000000000007c0 RCX: ffffc90002829000 + RDX: 0000000000010000 RSI: ffffffff81940efb RDI: ffff8801db841d94 + RBP: ffff88017d1df590 R08: dffffc0000000000 R09: 1ffffffff0bb3bdd + R10: dffffc0000000000 R11: 00000000000135dd R12: ffff8801db841d80 + R13: 0000000000038e40 R14: 00000000000007c0 R15: 00000000000007c0 + FS: 00007f6ea608f700(0000) GS:ffff8801dbe00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000002000aff8 CR3: 00000001c8d44000 CR4: 00000000001426f0 + DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 + Call Trace: + free_percpu+0x212/0x520 mm/percpu.c:1264 + ipip6_dev_free+0x43/0x60 net/ipv6/sit.c:1335 + sit_init_net+0x3cb/0xa10 net/ipv6/sit.c:1831 + ops_init+0x10a/0x530 net/core/net_namespace.c:115 + setup_net+0x2ed/0x690 net/core/net_namespace.c:291 + copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396 + create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106 + unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 + SYSC_unshare kernel/fork.c:2281 [inline] + SyS_unshare+0x64e/0xfc0 kernel/fork.c:2231 + entry_SYSCALL_64_fastpath+0x1f/0xc2 + +This is because when tunnel->dst_cache init fails, we free dev->tstats +once in ipip6_tunnel_init() and twice in sit_init_net(). This looks +redundant but its ndo_uinit() does not seem enough to clean up everything +here. So avoid this by setting dev->tstats to NULL after the first free, +at least for -net. + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c +index fad992ad4bc8..99853c6e33a8 100644 +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -1380,6 +1380,7 @@ static int ipip6_tunnel_init(struct net_device *dev) + err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (err) { + free_percpu(dev->tstats); ++ dev->tstats = NULL; + return err; + } + +-- +2.12.0 + diff --git a/queue/stmmac-Discard-masked-flags-in-interrupt-status-regi.patch b/queue/stmmac-Discard-masked-flags-in-interrupt-status-regi.patch new file mode 100644 index 0000000..96e4169 --- /dev/null +++ b/queue/stmmac-Discard-masked-flags-in-interrupt-status-regi.patch @@ -0,0 +1,63 @@ +From 0a764db103376cf69d04449b10688f3516cc0b88 Mon Sep 17 00:00:00 2001 +From: Alexey Brodkin <Alexey.Brodkin@synopsys.com> +Date: Fri, 27 Jan 2017 15:24:43 +0300 +Subject: [PATCH] stmmac: Discard masked flags in interrupt status register + +commit 0a764db103376cf69d04449b10688f3516cc0b88 upstream. + +DW GMAC databook says the following about bits in "Register 15 (Interrupt +Mask Register)": +--------------------------->8------------------------- +When set, this bit __disables_the_assertion_of_the_interrupt_signal__ +because of the setting of XXX bit in Register 14 (Interrupt +Status Register). +--------------------------->8------------------------- + +In fact even if we mask one bit in the mask register it doesn't prevent +corresponding bit to appear in the status register, it only disables +interrupt generation for corresponding event. + +But currently we expect a bit different behavior: status bits to be in +sync with their masks, i.e. if mask for bit A is set in the mask +register then bit A won't appear in the interrupt status register. + +This was proven to be incorrect assumption, see discussion here [1]. +That misunderstanding causes unexpected behaviour of the GMAC, for +example we were happy enough to just see bogus messages about link +state changes. + +So from now on we'll be only checking bits that really may trigger an +interrupt. + +[1] https://lkml.org/lkml/2016/11/3/413 + +Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com> +Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com> +Cc: Fabrice Gasnier <fabrice.gasnier@st.com> +Cc: Joachim Eastwood <manabian@gmail.com> +Cc: Phil Reid <preid@electromag.com.au> +Cc: David Miller <davem@davemloft.net> +Cc: Alexandre Torgue <alexandre.torgue@gmail.com> +Cc: Vineet Gupta <vgupta@synopsys.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +index be3c91c7f211..5484fd726d5a 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +@@ -305,8 +305,12 @@ static int dwmac1000_irq_status(struct mac_device_info *hw, + { + void __iomem *ioaddr = hw->pcsr; + u32 intr_status = readl(ioaddr + GMAC_INT_STATUS); ++ u32 intr_mask = readl(ioaddr + GMAC_INT_MASK); + int ret = 0; + ++ /* Discard masked bits */ ++ intr_status &= ~intr_mask; ++ + /* Not used events (e.g. MMC interrupts) are not handled. */ + if ((intr_status & GMAC_INT_STATUS_MMCTIS)) + x->mmc_tx_irq_n++; +-- +2.12.0 + diff --git a/queue/tcp-avoid-infinite-loop-in-tcp_splice_read.patch b/queue/tcp-avoid-infinite-loop-in-tcp_splice_read.patch new file mode 100644 index 0000000..087031e --- /dev/null +++ b/queue/tcp-avoid-infinite-loop-in-tcp_splice_read.patch @@ -0,0 +1,44 @@ +From ccf7abb93af09ad0868ae9033d1ca8108bdaec82 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 3 Feb 2017 14:59:38 -0800 +Subject: [PATCH] tcp: avoid infinite loop in tcp_splice_read() + +commit ccf7abb93af09ad0868ae9033d1ca8108bdaec82 upstream. + +Splicing from TCP socket is vulnerable when a packet with URG flag is +received and stored into receive queue. + +__tcp_splice_read() returns 0, and sk_wait_data() immediately +returns since there is the problematic skb in queue. + +This is a nice way to burn cpu (aka infinite loop) and trigger +soft lockups. + +Again, this gem was found by syzkaller tool. + +Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Cc: Willy Tarreau <w@1wt.eu> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 4a044964da66..0efb4c7f6704 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -770,6 +770,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, + ret = -EAGAIN; + break; + } ++ /* if __tcp_splice_read() got nothing while we have ++ * an skb in receive queue, we do not want to loop. ++ * This might happen with URG data. ++ */ ++ if (!skb_queue_empty(&sk->sk_receive_queue)) ++ break; + sk_wait_data(sk, &timeo, NULL); + if (signal_pending(current)) { + ret = sock_intr_errno(timeo); +-- +2.12.0 + diff --git a/queue/tcp-don-t-annotate-mark-on-control-socket-from-tcp_v.patch b/queue/tcp-don-t-annotate-mark-on-control-socket-from-tcp_v.patch new file mode 100644 index 0000000..573b256 --- /dev/null +++ b/queue/tcp-don-t-annotate-mark-on-control-socket-from-tcp_v.patch @@ -0,0 +1,131 @@ +From 92e55f412cffd016cc245a74278cb4d7b89bb3bc Mon Sep 17 00:00:00 2001 +From: Pablo Neira <pablo@netfilter.org> +Date: Thu, 26 Jan 2017 22:56:21 +0100 +Subject: [PATCH] tcp: don't annotate mark on control socket from + tcp_v6_send_response() + +commit 92e55f412cffd016cc245a74278cb4d7b89bb3bc upstream. + +Unlike ipv4, this control socket is shared by all cpus so we cannot use +it as scratchpad area to annotate the mark that we pass to ip6_xmit(). + +Add a new parameter to ip6_xmit() to indicate the mark. The SCTP socket +family caches the flowi6 structure in the sctp_transport structure, so +we cannot use to carry the mark unless we later on reset it back, which +I discarded since it looks ugly to me. + +Fixes: bf99b4ded5f8 ("tcp: fix mark propagation with fwmark_reflect enabled") +Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/include/net/ipv6.h b/include/net/ipv6.h +index 487e57391664..7afe991e900e 100644 +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -871,7 +871,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb); + * upper-layer output functions + */ + int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, +- struct ipv6_txoptions *opt, int tclass); ++ __u32 mark, struct ipv6_txoptions *opt, int tclass); + + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); + +diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c +index adfc790f7193..c4e879c02186 100644 +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -227,7 +227,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req + opt = ireq->ipv6_opt; + if (!opt) + opt = rcu_dereference(np->opt); +- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); ++ err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass); + rcu_read_unlock(); + err = net_xmit_eval(err); + } +@@ -281,7 +281,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); + if (!IS_ERR(dst)) { + skb_dst_set(skb, dst); +- ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); ++ ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); + return; +diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c +index 7396e75e161b..75c308239243 100644 +--- a/net/ipv6/inet6_connection_sock.c ++++ b/net/ipv6/inet6_connection_sock.c +@@ -176,7 +176,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused + /* Restore final destination back after routing done */ + fl6.daddr = sk->sk_v6_daddr; + +- res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), ++ res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); + return res; +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 38122d04fadc..2c0df09e9036 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -172,7 +172,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) + * which are using proper atomic operations or spinlocks. + */ + int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, +- struct ipv6_txoptions *opt, int tclass) ++ __u32 mark, struct ipv6_txoptions *opt, int tclass) + { + struct net *net = sock_net(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); +@@ -240,7 +240,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, + + skb->protocol = htons(ETH_P_IPV6); + skb->priority = sk->sk_priority; +- skb->mark = sk->sk_mark; ++ skb->mark = mark; + + mtu = dst_mtu(dst); + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 2b20622a5824..cb8929681dc7 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -469,7 +469,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, + opt = ireq->ipv6_opt; + if (!opt) + opt = rcu_dereference(np->opt); +- err = ip6_xmit(sk, skb, fl6, opt, np->tclass); ++ err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass); + rcu_read_unlock(); + err = net_xmit_eval(err); + } +@@ -840,8 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); + if (!IS_ERR(dst)) { + skb_dst_set(buff, dst); +- ctl_sk->sk_mark = fl6.flowi6_mark; +- ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); ++ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass); + TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + if (rst) + TCP_INC_STATS(net, TCP_MIB_OUTRSTS); +diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c +index 5ed8e79bf102..64dfd35ccdcc 100644 +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -222,7 +222,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); + + rcu_read_lock(); +- res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); ++ res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), ++ np->tclass); + rcu_read_unlock(); + return res; + } +-- +2.12.0 + diff --git a/queue/tcp-fix-0-divide-in-__tcp_select_window.patch b/queue/tcp-fix-0-divide-in-__tcp_select_window.patch new file mode 100644 index 0000000..2e79715 --- /dev/null +++ b/queue/tcp-fix-0-divide-in-__tcp_select_window.patch @@ -0,0 +1,41 @@ +From 06425c308b92eaf60767bc71d359f4cbc7a561f8 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Wed, 1 Feb 2017 08:33:53 -0800 +Subject: [PATCH] tcp: fix 0 divide in __tcp_select_window() + +commit 06425c308b92eaf60767bc71d359f4cbc7a561f8 upstream. + +syszkaller fuzzer was able to trigger a divide by zero, when +TCP window scaling is not enabled. + +SO_RCVBUF can be used not only to increase sk_rcvbuf, also +to decrease it below current receive buffers utilization. + +If mss is negative or 0, just return a zero TCP window. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Acked-by: Neal Cardwell <ncardwell@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 1d5331a1b1dc..8ce50dc3ab8c 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2518,9 +2518,11 @@ u32 __tcp_select_window(struct sock *sk) + int full_space = min_t(int, tp->window_clamp, allowed_space); + int window; + +- if (mss > full_space) ++ if (unlikely(mss > full_space)) { + mss = full_space; +- ++ if (mss <= 0) ++ return 0; ++ } + if (free_space < (full_space >> 1)) { + icsk->icsk_ack.quick = 0; + +-- +2.12.0 + diff --git a/queue/tcp-fix-mark-propagation-with-fwmark_reflect-enabled.patch b/queue/tcp-fix-mark-propagation-with-fwmark_reflect-enabled.patch new file mode 100644 index 0000000..7f1dd83 --- /dev/null +++ b/queue/tcp-fix-mark-propagation-with-fwmark_reflect-enabled.patch @@ -0,0 +1,45 @@ +From bf99b4ded5f8a4767dbb9d180626f06c51f9881f Mon Sep 17 00:00:00 2001 +From: Pau Espin Pedrol <pespin.shar@gmail.com> +Date: Fri, 6 Jan 2017 20:33:28 +0100 +Subject: [PATCH] tcp: fix mark propagation with fwmark_reflect enabled + +commit bf99b4ded5f8a4767dbb9d180626f06c51f9881f upstream. + +Otherwise, RST packets generated by the TCP stack for non-existing +sockets always have mark 0. +The mark from the original packet is assigned to the netns_ipv4/6 +socket used to send the response so that it can get copied into the +response skb when the socket sends it. + +Fixes: e110861f8609 ("net: add a sysctl to reflect the fwmark on replies") +Cc: Lorenzo Colitti <lorenzo@google.com> +Signed-off-by: Pau Espin Pedrol <pau.espin@tessares.net> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> + +diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c +index fac275c48108..b67719f45953 100644 +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1629,6 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, + sk->sk_protocol = ip_hdr(skb)->protocol; + sk->sk_bound_dev_if = arg->bound_dev_if; + sk->sk_sndbuf = sysctl_wmem_default; ++ sk->sk_mark = fl4.flowi4_mark; + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); + if (unlikely(err)) { +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 73bc8fc68acd..2b20622a5824 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -840,6 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); + if (!IS_ERR(dst)) { + skb_dst_set(buff, dst); ++ ctl_sk->sk_mark = fl6.flowi6_mark; + ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); + TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + if (rst) +-- +2.12.0 + diff --git a/queue/tun-read-vnet_hdr_sz-once.patch b/queue/tun-read-vnet_hdr_sz-once.patch new file mode 100644 index 0000000..7d15022 --- /dev/null +++ b/queue/tun-read-vnet_hdr_sz-once.patch @@ -0,0 +1,59 @@ +From e1edab87faf6ca30cd137e0795bc73aa9a9a22ec Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn <willemb@google.com> +Date: Fri, 3 Feb 2017 18:20:48 -0500 +Subject: [PATCH] tun: read vnet_hdr_sz once + +commit e1edab87faf6ca30cd137e0795bc73aa9a9a22ec upstream. + +When IFF_VNET_HDR is enabled, a virtio_net header must precede data. +Data length is verified to be greater than or equal to expected header +length tun->vnet_hdr_sz before copying. + +Read this value once and cache locally, as it can be updated between +the test and use (TOCTOU). + +Signed-off-by: Willem de Bruijn <willemb@google.com> +Reported-by: Dmitry Vyukov <dvyukov@google.com> +CC: Eric Dumazet <edumazet@google.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> + +diff --git a/drivers/net/tun.c b/drivers/net/tun.c +index 2cd10b26b650..bfabe180053e 100644 +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1170,9 +1170,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, + } + + if (tun->flags & IFF_VNET_HDR) { +- if (len < tun->vnet_hdr_sz) ++ int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); ++ ++ if (len < vnet_hdr_sz) + return -EINVAL; +- len -= tun->vnet_hdr_sz; ++ len -= vnet_hdr_sz; + + if (!copy_from_iter_full(&gso, sizeof(gso), from)) + return -EFAULT; +@@ -1183,7 +1185,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, + + if (tun16_to_cpu(tun, gso.hdr_len) > len) + return -EINVAL; +- iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso)); ++ iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); + } + + if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { +@@ -1335,7 +1337,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, + vlan_hlen = VLAN_HLEN; + + if (tun->flags & IFF_VNET_HDR) +- vnet_hdr_sz = tun->vnet_hdr_sz; ++ vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + + total = skb->len + vlan_hlen + vnet_hdr_sz; + +-- +2.12.0 + diff --git a/queue/x86-fpu-xstate-Fix-xcomp_bv-in-XSAVES-header.patch b/queue/x86-fpu-xstate-Fix-xcomp_bv-in-XSAVES-header.patch new file mode 100644 index 0000000..c372d55 --- /dev/null +++ b/queue/x86-fpu-xstate-Fix-xcomp_bv-in-XSAVES-header.patch @@ -0,0 +1,90 @@ +From dffba9a31c7769be3231c420d4b364c92ba3f1ac Mon Sep 17 00:00:00 2001 +From: Yu-cheng Yu <yu-cheng.yu@intel.com> +Date: Mon, 23 Jan 2017 14:54:44 -0800 +Subject: [PATCH] x86/fpu/xstate: Fix xcomp_bv in XSAVES header + +commit dffba9a31c7769be3231c420d4b364c92ba3f1ac upstream. + +The compacted-format XSAVES area is determined at boot time and +never changed after. The field xsave.header.xcomp_bv indicates +which components are in the fixed XSAVES format. + +In fpstate_init() we did not set xcomp_bv to reflect the XSAVES +format since at the time there is no valid data. + +However, after we do copy_init_fpstate_to_fpregs() in fpu__clear(), +as in commit: + + b22cbe404a9c x86/fpu: Fix invalid FPU ptrace state after execve() + +and when __fpu_restore_sig() does fpu__restore() for a COMPAT-mode +app, a #GP occurs. This can be easily triggered by doing valgrind on +a COMPAT-mode "Hello World," as reported by Joakim Tjernlund and +others: + + https://bugzilla.kernel.org/show_bug.cgi?id=190061 + +Fix it by setting xcomp_bv correctly. + +This patch also moves the xcomp_bv initialization to the proper +place, which was in copyin_to_xsaves() as of: + + 4c833368f0bf x86/fpu: Set the xcomp_bv when we fake up a XSAVES area + +which fixed the bug too, but it's more efficient and cleaner to +initialize things once per boot, not for every signal handling +operation. + +Reported-by: Kevin Hao <haokexin@gmail.com> +Reported-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com> +Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@suse.de> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: Fenghua Yu <fenghua.yu@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Ravi V. Shankar <ravi.v.shankar@intel.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: haokexin@gmail.com +Link: http://lkml.kernel.org/r/1485212084-4418-1-git-send-email-yu-cheng.yu@intel.com +[ Combined it with 4c833368f0bf. ] +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index e4e97a5355ce..de7234401275 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -9,6 +9,7 @@ + #include <asm/fpu/regset.h> + #include <asm/fpu/signal.h> + #include <asm/fpu/types.h> ++#include <asm/fpu/xstate.h> + #include <asm/traps.h> + + #include <linux/hardirq.h> +@@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state) + * it will #GP. Make sure it is replaced after the memset(). + */ + if (static_cpu_has(X86_FEATURE_XSAVES)) +- state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; ++ state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | ++ xfeatures_mask; + + if (static_cpu_has(X86_FEATURE_FXSR)) + fpstate_init_fxstate(&state->fxsave); +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c +index e287b9075527..1d7770447b3e 100644 +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -1070,7 +1070,6 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, + * Add back in the features that came in from userspace: + */ + xsave->header.xfeatures |= xfeatures; +- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; + + return 0; + } +-- +2.12.0 + |