From d6990976af7c5d8f55903bfb4289b6fb030bf754 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 7 Jun 2018 10:11:02 +0300 Subject: vti6: fix PMTU caching and reporting on xmit When setting the skb->dst before doing the MTU check, the route PMTU caching and reporting is done on the new dst which is about to be released. Instead, PMTU handling should be done using the original dst. This is aligned with IPv4 VTI. Fixes: ccd740cbc6 ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b7f28deddaeaf..c72ae3a4fe097 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -480,10 +480,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } - skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); - skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; - mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { skb_dst_update_pmtu(skb, mtu); @@ -498,9 +494,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) htonl(mtu)); } - return -EMSGSIZE; + err = -EMSGSIZE; + goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; + err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); -- cgit 1.2.3-korg From 45c180bc29babbedd6b8c01b975780ef44d9d09c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Jun 2018 21:35:07 -0700 Subject: xfrm_user: prevent leaking 2 bytes of kernel memory struct xfrm_userpolicy_type has two holes, so we should not use C99 style initializer. KMSAN report: BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:140 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571 CPU: 1 PID: 4520 Comm: syz-executor841 Not tainted 4.17.0+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1211 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253 copyout lib/iov_iter.c:140 [inline] _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571 copy_to_iter include/linux/uio.h:106 [inline] skb_copy_datagram_iter+0x422/0xfa0 net/core/datagram.c:431 skb_copy_datagram_msg include/linux/skbuff.h:3268 [inline] netlink_recvmsg+0x6f1/0x1900 net/netlink/af_netlink.c:1959 sock_recvmsg_nosec net/socket.c:802 [inline] sock_recvmsg+0x1d6/0x230 net/socket.c:809 ___sys_recvmsg+0x3fe/0x810 net/socket.c:2279 __sys_recvmmsg+0x58e/0xe30 net/socket.c:2391 do_sys_recvmmsg+0x2a6/0x3e0 net/socket.c:2472 __do_sys_recvmmsg net/socket.c:2485 [inline] __se_sys_recvmmsg net/socket.c:2481 [inline] __x64_sys_recvmmsg+0x15d/0x1c0 net/socket.c:2481 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x446ce9 RSP: 002b:00007fc307918db8 EFLAGS: 00000293 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006dbc24 RCX: 0000000000446ce9 RDX: 000000000000000a RSI: 0000000020005040 RDI: 0000000000000003 RBP: 00000000006dbc20 R08: 0000000020004e40 R09: 0000000000000000 R10: 0000000040000000 R11: 0000000000000293 R12: 0000000000000000 R13: 00007ffc8d2df32f R14: 00007fc3079199c0 R15: 0000000000000001 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] kmsan_save_stack mm/kmsan/kmsan.c:294 [inline] kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685 kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527 __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:413 __nla_put lib/nlattr.c:569 [inline] nla_put+0x276/0x340 lib/nlattr.c:627 copy_to_user_policy_type net/xfrm/xfrm_user.c:1678 [inline] dump_one_policy+0xbe1/0x1090 net/xfrm/xfrm_user.c:1708 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013 xfrm_dump_policy+0x1c0/0x2a0 net/xfrm/xfrm_user.c:1749 netlink_dump+0x9b5/0x1550 net/netlink/af_netlink.c:2226 __netlink_dump_start+0x1131/0x1270 net/netlink/af_netlink.c:2323 netlink_dump_start include/linux/netlink.h:214 [inline] xfrm_user_rcv_msg+0x8a3/0x9b0 net/xfrm/xfrm_user.c:2577 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448 xfrm_netlink_rcv+0xb2/0xf0 net/xfrm/xfrm_user.c:2598 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Local variable description: ----upt.i@dump_one_policy Variable was created at: dump_one_policy+0x78/0x1090 net/xfrm/xfrm_user.c:1689 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013 Byte 130 of 137 is uninitialized Memory access starts at ffff88019550407f Fixes: c0144beaeca42 ("[XFRM] netlink: Use nla_put()/NLA_PUT() variantes") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 080035f056d99..1e50b70ad6680 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1671,9 +1671,11 @@ static inline unsigned int userpolicy_type_attrsize(void) #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt = { - .type = type, - }; + struct xfrm_userpolicy_type upt; + + /* Sadly there are two holes in struct xfrm_userpolicy_type */ + memset(&upt, 0, sizeof(upt)); + upt.type = type; return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } -- cgit 1.2.3-korg From 8cc88773855f988d6a3bbf102bbd9dd9c828eb81 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 21 Jun 2018 09:30:47 +0300 Subject: xfrm: fix missing dst_release() after policy blocking lbcast and multicast Fix missing dst_release() when local broadcast or multicast traffic is xfrm policy blocked. For IPv4 this results to dst leak: ip_route_output_flow() allocates dst_entry via __ip_route_output_key() and passes it to xfrm_lookup_route(). xfrm_lookup returns ERR_PTR(-EPERM) that is propagated. The dst that was allocated is never released. IPv4 local broadcast testcase: ping -b 192.168.1.255 & sleep 1 ip xfrm policy add src 0.0.0.0/0 dst 192.168.1.255/32 dir out action block IPv4 multicast testcase: ping 224.0.0.1 & sleep 1 ip xfrm policy add src 0.0.0.0/0 dst 224.0.0.1/32 dir out action block For IPv6 the missing dst_release() causes trouble e.g. when used in netns: ip netns add TEST ip netns exec TEST ip link set lo up ip link add dummy0 type dummy ip link set dev dummy0 netns TEST ip netns exec TEST ip addr add fd00::1111 dev dummy0 ip netns exec TEST ip link set dummy0 up ip netns exec TEST ping -6 -c 5 ff02::1%dummy0 & sleep 1 ip netns exec TEST ip xfrm policy add src ::/0 dst ff02::1 dir out action block wait ip netns del TEST After netns deletion we see: [ 258.239097] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 268.279061] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 278.367018] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 288.375259] unregister_netdevice: waiting for lo to become free. Usage count = 2 Fixes: ac37e2515c1a ("xfrm: release dst_orig in case of error in xfrm_lookup()") Signed-off-by: Tommi Rantala Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5f48251c1319a..7c5e8978aeaab 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2286,6 +2286,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); + if (IS_ERR(dst)) + dst_release(dst_orig); + return dst; } EXPORT_SYMBOL(xfrm_lookup_route); -- cgit 1.2.3-korg From 86126b77dcd551ce223e7293bb55854e3df05646 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Jun 2018 14:00:07 +0200 Subject: xfrm: free skb if nlsk pointer is NULL nlmsg_multicast() always frees the skb, so in case we cannot call it we must do that ourselves. Fixes: 21ee543edc0dea ("xfrm: fix race between netns cleanup and state expire notification") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1e50b70ad6680..33878e6e0d0a0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1025,10 +1025,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, { struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); - if (nlsk) - return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); - else - return -1; + if (!nlsk) { + kfree_skb(skb); + return -EPIPE; + } + + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } static inline unsigned int xfrm_spdinfo_msgsize(void) -- cgit 1.2.3-korg From 0ca54b29054151b7a52cbb8904732280afe5a302 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 26 Jun 2018 11:03:18 -0400 Subject: media: rc: be less noisy when driver misbehaves Since commit 48231f289e52 ("media: rc: drivers should produce alternate pulse and space timing events"), on meson-ir we are regularly producing errors. Reduce to warning level and only warn once to avoid flooding the log. A proper fix for meson-ir is going to be too large for v4.18. Signed-off-by: Sean Young Cc: stable@vger.kernel.org # 4.17+ Tested-by: Jerome Brunet Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-ir-raw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c index 2e0066b1a31ce..e7948908e78c8 100644 --- a/drivers/media/rc/rc-ir-raw.c +++ b/drivers/media/rc/rc-ir-raw.c @@ -30,13 +30,13 @@ static int ir_raw_event_thread(void *data) while (kfifo_out(&raw->kfifo, &ev, 1)) { if (is_timing_event(ev)) { if (ev.duration == 0) - dev_err(&dev->dev, "nonsensical timing event of duration 0"); + dev_warn_once(&dev->dev, "nonsensical timing event of duration 0"); if (is_timing_event(raw->prev_ev) && !is_transition(&ev, &raw->prev_ev)) - dev_err(&dev->dev, "two consecutive events of type %s", - TO_STR(ev.pulse)); + dev_warn_once(&dev->dev, "two consecutive events of type %s", + TO_STR(ev.pulse)); if (raw->prev_ev.reset && ev.pulse == 0) - dev_err(&dev->dev, "timing event after reset should be pulse"); + dev_warn_once(&dev->dev, "timing event after reset should be pulse"); } list_for_each_entry(handler, &ir_raw_handler_list, list) if (dev->enabled_protocols & -- cgit 1.2.3-korg From 7284fdf39a912322ce97de2d30def3c6068a418c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 27 Jun 2018 11:49:28 +0800 Subject: esp6: fix memleak on error path in esp6_input This ought to be an omission in e6194923237 ("esp: Fix memleaks on error paths."). The memleak on error path in esp6_input is similar to esp_input of esp4. Fixes: e6194923237 ("esp: Fix memleaks on error paths.") Fixes: 3f29770723f ("ipsec: check return value of skb_to_sgvec always") Signed-off-by: Zhen Lei Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 97513f35bcc58..88a7579c23bdb 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -669,8 +669,10 @@ skip_cow: sg_init_table(sg, nfrags); ret = skb_to_sgvec(skb, sg, 0, skb->len); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + kfree(tmp); goto out; + } skb->ip_summed = CHECKSUM_NONE; -- cgit 1.2.3-korg From 5f3417569165a8ee57654217f73e0160312f409c Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 3 Jul 2018 12:56:03 -0400 Subject: drm/bridge: adv7511: Reset registers on hotplug The bridge loses its hw state when the cable is unplugged. If we detect this case in the hpd handler, reset its state. Reported-by: Rob Clark Tested-by: Rob Clark Reviewed-by: Archit Taneja Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180703165648.120401-1-seanpaul@chromium.org --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 73021b388e12d..dd3ff2f2cdce0 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -429,6 +429,18 @@ static void adv7511_hpd_work(struct work_struct *work) else status = connector_status_disconnected; + /* + * The bridge resets its registers on unplug. So when we get a plug + * event and we're already supposed to be powered, cycle the bridge to + * restore its state. + */ + if (status == connector_status_connected && + adv7511->connector.status == connector_status_disconnected && + adv7511->powered) { + regcache_mark_dirty(adv7511->regmap); + adv7511_power_on(adv7511); + } + if (adv7511->connector.status != status) { adv7511->connector.status = status; if (status == connector_status_disconnected) -- cgit 1.2.3-korg From 1b350ea0c2f4df9aa30426614c8eb755a8c32814 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 11 Jul 2018 22:03:43 +0530 Subject: scsi: target: iscsi: cxgbit: fix max iso npdu calculation - rounddown CXGBIT_MAX_ISO_PAYLOAD by csk->emss before calculating max_iso_npdu to get max TCP payload in multiple of mss. - call cxgbit_set_digest() before cxgbit_set_iso_npdu() to set csk->submode, it is used in calculating number of iso pdus. Signed-off-by: Varun Prakash Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/cxgbit/cxgbit_target.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index 514986b57c2d6..25eb3891e34b8 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -652,6 +652,7 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk) struct iscsi_param *param; u32 mrdsl, mbl; u32 max_npdu, max_iso_npdu; + u32 max_iso_payload; if (conn->login->leading_connection) { param = iscsi_find_param_from_key(MAXBURSTLENGTH, @@ -670,8 +671,10 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk) mrdsl = conn_ops->MaxRecvDataSegmentLength; max_npdu = mbl / mrdsl; - max_iso_npdu = CXGBIT_MAX_ISO_PAYLOAD / - (ISCSI_HDR_LEN + mrdsl + + max_iso_payload = rounddown(CXGBIT_MAX_ISO_PAYLOAD, csk->emss); + + max_iso_npdu = max_iso_payload / + (ISCSI_HDR_LEN + mrdsl + cxgbit_digest_len[csk->submode]); csk->max_iso_npdu = min(max_npdu, max_iso_npdu); @@ -741,6 +744,9 @@ static int cxgbit_set_params(struct iscsi_conn *conn) if (conn_ops->MaxRecvDataSegmentLength > cdev->mdsl) conn_ops->MaxRecvDataSegmentLength = cdev->mdsl; + if (cxgbit_set_digest(csk)) + return -1; + if (conn->login->leading_connection) { param = iscsi_find_param_from_key(ERRORRECOVERYLEVEL, conn->param_list); @@ -764,7 +770,7 @@ static int cxgbit_set_params(struct iscsi_conn *conn) if (is_t5(cdev->lldi.adapter_type)) goto enable_ddp; else - goto enable_digest; + return 0; } if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) { @@ -781,10 +787,6 @@ enable_ddp: } } -enable_digest: - if (cxgbit_set_digest(csk)) - return -1; - return 0; } -- cgit 1.2.3-korg From a17037e7d59075053b522048742a08ac9500bde8 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 11 Jul 2018 22:09:52 +0530 Subject: scsi: libiscsi: fix possible NULL pointer dereference in case of TMF In iscsi_check_tmf_restrictions() task->hdr is dereferenced to print the opcode, it is possible that task->hdr is NULL. There are two cases based on opcode argument: 1. ISCSI_OP_SCSI_CMD - In this case alloc_pdu() is called after iscsi_check_tmf_restrictions() iscsi_prep_scsi_cmd_pdu() -> iscsi_check_tmf_restrictions() -> alloc_pdu(). Transport drivers allocate memory for iSCSI hdr in alloc_pdu() and assign it to task->hdr. In case of TMF task->hdr will be NULL resulting in NULL pointer dereference. 2. ISCSI_OP_SCSI_DATA_OUT - In this case transport driver can free the memory for iSCSI hdr after transmitting the pdu so task->hdr can be NULL or invalid. This patch fixes this issue by removing task->hdr->opcode from the printk statement. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index d6093838f5f20..c972cc2b3d5b7 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -284,11 +284,11 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) */ if (opcode != ISCSI_OP_SCSI_DATA_OUT) { iscsi_conn_printk(KERN_INFO, conn, - "task [op %x/%x itt " + "task [op %x itt " "0x%x/0x%x] " "rejected.\n", - task->hdr->opcode, opcode, - task->itt, task->hdr_itt); + opcode, task->itt, + task->hdr_itt); return -EACCES; } /* @@ -297,10 +297,10 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) */ if (conn->session->fast_abort) { iscsi_conn_printk(KERN_INFO, conn, - "task [op %x/%x itt " + "task [op %x itt " "0x%x/0x%x] fast abort.\n", - task->hdr->opcode, opcode, - task->itt, task->hdr_itt); + opcode, task->itt, + task->hdr_itt); return -EACCES; } break; -- cgit 1.2.3-korg From c170e5a8d222537e98aa8d4fddb667ff7a2ee114 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 12 Jul 2018 16:30:45 -0400 Subject: scsi: sg: fix minor memory leak in error path Fix a minor memory leak when there is an error opening a /dev/sg device. Fixes: cc833acbee9d ("sg: O_EXCL and other lock handling") Cc: Reviewed-by: Ewan D. Milne Signed-off-by: Tony Battersby Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cd2fdac000c9e..2962a38c5068e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2185,6 +2185,7 @@ sg_add_sfp(Sg_device * sdp) write_lock_irqsave(&sdp->sfd_lock, iflags); if (atomic_read(&sdp->detaching)) { write_unlock_irqrestore(&sdp->sfd_lock, iflags); + kfree(sfp); return ERR_PTR(-ENODEV); } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); -- cgit 1.2.3-korg From 8e4a4189ce02fe53b6f3ffcc1ac5a3a9967f2611 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 12 Jul 2018 18:09:21 -0400 Subject: scsi: sg: update comment for blk_get_request() The calling convention of blk_get_request() has changed in lk 4.18; update the comment in sg.c to match. Fixes: ff005a066240 ("block: sanitize blk_get_request calling conventions") Signed-off-by: Tony Battersby Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2962a38c5068e..ba9ba0e04f425 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1741,15 +1741,11 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) * * With scsi-mq enabled, there are a fixed number of preallocated * requests equal in number to shost->can_queue. If all of the - * preallocated requests are already in use, then using GFP_ATOMIC with - * blk_get_request() will return -EWOULDBLOCK, whereas using GFP_KERNEL - * will cause blk_get_request() to sleep until an active command - * completes, freeing up a request. Neither option is ideal, but - * GFP_KERNEL is the better choice to prevent userspace from getting an - * unexpected EWOULDBLOCK. - * - * With scsi-mq disabled, blk_get_request() with GFP_KERNEL usually - * does not sleep except under memory pressure. + * preallocated requests are already in use, then blk_get_request() + * will sleep until an active command completes, freeing up a request. + * Although waiting in an asynchronous interface is less than ideal, we + * do not want to use BLK_MQ_REQ_NOWAIT here because userspace might + * not expect an EWOULDBLOCK from this condition. */ rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); -- cgit 1.2.3-korg From 3b1074bf9817bf43d4da375aa5f4b6c88f1d953e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 12 Jul 2018 12:59:37 -0700 Subject: mmc: mxcmmc: Fix missing parentheses and brace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like the adjusted syntax wasn't fully build tested. This fixes failures with powerpc builds: drivers/mmc/host/mxcmmc.c: In function ‘mxcmci_swap_buffers’: drivers/mmc/host/mxcmmc.c:296:51: error: expected ‘)’ before ‘;’ token void *buf = kmap_atomic(sg_page(sg) + sg->offset; ^ drivers/mmc/host/mxcmmc.c:299:1: error: expected ‘,’ or ‘;’ before ‘}’ token } ^ Fixes: b189e7589f6d3 ("mmc: mxcmmc: handle highmem pages") Signed-off-by: Kees Cook Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Ulf Hansson --- drivers/mmc/host/mxcmmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 75f781c11e897..de4e6e5bf3044 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -293,9 +293,10 @@ static void mxcmci_swap_buffers(struct mmc_data *data) int i; for_each_sg(data->sg, sg, data->sg_len, i) { - void *buf = kmap_atomic(sg_page(sg) + sg->offset; + void *buf = kmap_atomic(sg_page(sg) + sg->offset); buffer_swap32(buf, sg->length); kunmap_atomic(buf); + } } #else static inline void mxcmci_swap_buffers(struct mmc_data *data) {} -- cgit 1.2.3-korg From d530b5f1ca0bb66958a2b714bebe40a1248b9c15 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 14 Jul 2018 14:32:12 +0200 Subject: drm: re-enable error handling drm_legacy_ctxbitmap_next() returns idr_alloc() which can return -ENOMEM, -EINVAL or -ENOSPC none of which are -1 . but the call sites of drm_legacy_ctxbitmap_next() seem to be assuming that the error case would be -1 (original return of drm_ctxbitmap_next() prior to 2.6.23 was actually -1). Thus reenable error handling by checking for < 0. Signed-off-by: Nicholas Mc Guire Fixes: 62968144e673 ("drm: convert drm context code to use Linux idr") Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/1531571532-22733-1-git-send-email-hofrat@osadl.org --- drivers/gpu/drm/drm_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 3c4000facb360..f973d287696a6 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -372,7 +372,7 @@ int drm_legacy_addctx(struct drm_device *dev, void *data, ctx->handle = drm_legacy_ctxbitmap_next(dev); } DRM_DEBUG("%d\n", ctx->handle); - if (ctx->handle == -1) { + if (ctx->handle < 0) { DRM_DEBUG("Not enough free contexts.\n"); /* Should this return -EBUSY instead? */ return -ENOMEM; -- cgit 1.2.3-korg From 32e7024eab4c703b2f2fb362380c93be8f1949e3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 17 Jul 2018 20:06:47 -0400 Subject: tools/power turbostat: Update turbostat(8) RAPL throttling column description Explain that this column may increment for some throttling causes, and may not increment for others. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index d39e4ff7d0bf9..a6db83a88e852 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -106,7 +106,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. -\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. +\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. @@ -114,7 +114,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBCorWatt\fP Watts consumed by the core part of the package. \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. -\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. +\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. .fi .SH TOO MUCH INFORMATION EXAMPLE -- cgit 1.2.3-korg From e3dde080ebbdbb4bda8eee35d770714fee8c59ac Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:51 -0700 Subject: scsi: qla2xxx: Fix unintialized List head crash In case of IOCB Queue full or system where memory is low and driver receives large number of RSCN storm, the stale sp pointer can stay on gpnid_list resulting in page_fault. This patch fixes this issue by initializing the sp->elem list head and removing sp->elem before memory is freed. Following stack trace is seen 9 [ffff987b37d1bc60] page_fault at ffffffffad516768 [exception RIP: qla24xx_async_gpnid+496] 10 [ffff987b37d1bd10] qla24xx_async_gpnid at ffffffffc039866d [qla2xxx] 11 [ffff987b37d1bd80] qla2x00_do_work at ffffffffc036169c [qla2xxx] 12 [ffff987b37d1be38] qla2x00_do_dpc_all_vps at ffffffffc03adfed [qla2xxx] 13 [ffff987b37d1be78] qla2x00_do_dpc at ffffffffc036458a [qla2xxx] 14 [ffff987b37d1bec8] kthread at ffffffffacebae31 Fixes: 2d73ac6102d9 ("scsi: qla2xxx: Serialize GPNID for multiple RSCN") Cc: # v4.17+ Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gs.c | 4 ++++ drivers/scsi/qla2xxx/qla_inline.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 2c35b0b2baa07..7a37440064195 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3708,6 +3708,10 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) return rval; done_free_sp: + spin_lock_irqsave(&vha->hw->vport_slock, flags); + list_del(&sp->elem); + spin_unlock_irqrestore(&vha->hw->vport_slock, flags); + if (sp->u.iocb_cmd.u.ctarg.req) { dma_free_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 37ae0f6d8ae57..59fd5a9dfeb87 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -222,6 +222,8 @@ qla2xxx_get_qpair_sp(struct qla_qpair *qpair, fc_port_t *fcport, gfp_t flag) sp->fcport = fcport; sp->iocbs = 1; sp->vha = qpair->vha; + INIT_LIST_HEAD(&sp->elem); + done: if (!sp) QLA_QPAIR_MARK_NOT_BUSY(qpair); -- cgit 1.2.3-korg From efa93f48fa9d423fda166bc3b6c0cbb09682492e Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:52 -0700 Subject: scsi: qla2xxx: Fix NPIV deletion by calling wait_for_sess_deletion Add wait for session deletion to finish before freeing an NPIV scsi host. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 1 + drivers/scsi/qla2xxx/qla_gbl.h | 1 + drivers/scsi/qla2xxx/qla_mid.c | 5 +++++ drivers/scsi/qla2xxx/qla_os.c | 2 +- 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 89a4999fa631f..c8731568f9c47 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2141,6 +2141,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) msleep(1000); qla24xx_disable_vp(vha); + qla2x00_wait_for_sess_deletion(vha); vha->flags.delete_progress = 1; diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index f68eb60965592..2660a48d918a5 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -214,6 +214,7 @@ void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *, int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *); int qla24xx_async_abort_cmd(srb_t *); int qla24xx_post_relogin_work(struct scsi_qla_host *vha); +void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *); /* * Global Functions in qla_mid.c source file. diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index f6f0a759a7c24..2c1aaf9b7a00c 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -153,10 +153,15 @@ qla24xx_disable_vp(scsi_qla_host_t *vha) { unsigned long flags; int ret; + fc_port_t *fcport; ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); atomic_set(&vha->loop_state, LOOP_DOWN); atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + list_for_each_entry(fcport, &vha->vp_fcports, list) + fcport->logout_on_delete = 0; + + qla2x00_mark_all_devices_lost(vha, 0); /* Remove port id from vp target map */ spin_lock_irqsave(&vha->hw->hardware_lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 9f309e572be46..acc27808963c4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1147,7 +1147,7 @@ static inline int test_fcport_count(scsi_qla_host_t *vha) * qla2x00_wait_for_sess_deletion can only be called from remove_one. * it has dependency on UNLOADING flag to stop device discovery */ -static void +void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) { qla2x00_mark_all_devices_lost(vha, 0); -- cgit 1.2.3-korg From 45235022da9925b2b070c0139629233173e50089 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:53 -0700 Subject: scsi: qla2xxx: Fix driver unload by shutting down chip Use chip shutdown at the start of unload to stop all DMA + traffic and bring down the laser. This prevents any link activities from triggering the driver to be re-engaged. Fixes: 4b60c82736d0 ("scsi: qla2xxx: Add fw_started flags to qpair") Cc: #4.16 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 3 +++ drivers/scsi/qla2xxx/qla_mbx.c | 6 ++++++ drivers/scsi/qla2xxx/qla_mid.c | 6 ++++-- drivers/scsi/qla2xxx/qla_os.c | 44 +++++++++++++++++------------------------- drivers/scsi/qla2xxx/qla_sup.c | 3 +++ 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 9fa5a2557f2c7..7756106d45554 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -631,6 +631,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) unsigned long flags; fc_port_t *fcport = NULL; + if (!vha->hw->flags.fw_started) + return; + /* Setup to process RIO completion. */ handle_cnt = 0; if (IS_CNA_CAPABLE(ha)) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 7e875f5752299..f0ec13d48bf34 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -4220,6 +4220,9 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req) mbx_cmd_t *mcp = &mc; struct qla_hw_data *ha = vha->hw; + if (!ha->flags.fw_started) + return QLA_SUCCESS; + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d3, "Entered %s.\n", __func__); @@ -4289,6 +4292,9 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp) mbx_cmd_t *mcp = &mc; struct qla_hw_data *ha = vha->hw; + if (!ha->flags.fw_started) + return QLA_SUCCESS; + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d6, "Entered %s.\n", __func__); diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 2c1aaf9b7a00c..aa727d07b702e 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -152,10 +152,12 @@ int qla24xx_disable_vp(scsi_qla_host_t *vha) { unsigned long flags; - int ret; + int ret = QLA_SUCCESS; fc_port_t *fcport; - ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); + if (vha->hw->flags.fw_started) + ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); + atomic_set(&vha->loop_state, LOOP_DOWN); atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); list_for_each_entry(fcport, &vha->vp_fcports, list) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index acc27808963c4..2cd2e5ccce15d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -303,6 +303,7 @@ static void qla2x00_free_device(scsi_qla_host_t *); static int qla2xxx_map_queues(struct Scsi_Host *shost); static void qla2x00_destroy_deferred_work(struct qla_hw_data *); + struct scsi_host_template qla2xxx_driver_template = { .module = THIS_MODULE, .name = QLA2XXX_DRIVER_NAME, @@ -3603,6 +3604,8 @@ qla2x00_remove_one(struct pci_dev *pdev) base_vha = pci_get_drvdata(pdev); ha = base_vha->hw; + ql_log(ql_log_info, base_vha, 0xb079, + "Removing driver\n"); /* Indicate device removal to prevent future board_disable and wait * until any pending board_disable has completed. */ @@ -3625,6 +3628,21 @@ qla2x00_remove_one(struct pci_dev *pdev) } qla2x00_wait_for_hba_ready(base_vha); + if (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha)) { + if (ha->flags.fw_started) + qla2x00_abort_isp_cleanup(base_vha); + } else if (!IS_QLAFX00(ha)) { + if (IS_QLA8031(ha)) { + ql_dbg(ql_dbg_p3p, base_vha, 0xb07e, + "Clearing fcoe driver presence.\n"); + if (qla83xx_clear_drv_presence(base_vha) != QLA_SUCCESS) + ql_dbg(ql_dbg_p3p, base_vha, 0xb079, + "Error while clearing DRV-Presence.\n"); + } + + qla2x00_try_to_stop_firmware(base_vha); + } + qla2x00_wait_for_sess_deletion(base_vha); /* @@ -3648,14 +3666,6 @@ qla2x00_remove_one(struct pci_dev *pdev) qla2x00_delete_all_vps(ha, base_vha); - if (IS_QLA8031(ha)) { - ql_dbg(ql_dbg_p3p, base_vha, 0xb07e, - "Clearing fcoe driver presence.\n"); - if (qla83xx_clear_drv_presence(base_vha) != QLA_SUCCESS) - ql_dbg(ql_dbg_p3p, base_vha, 0xb079, - "Error while clearing DRV-Presence.\n"); - } - qla2x00_abort_all_cmds(base_vha, DID_NO_CONNECT << 16); qla2x00_dfs_remove(base_vha); @@ -3715,24 +3725,6 @@ qla2x00_free_device(scsi_qla_host_t *vha) qla2x00_stop_timer(vha); qla25xx_delete_queues(vha); - - if (ha->flags.fce_enabled) - qla2x00_disable_fce_trace(vha, NULL, NULL); - - if (ha->eft) - qla2x00_disable_eft_trace(vha); - - if (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha)) { - if (ha->flags.fw_started) - qla2x00_abort_isp_cleanup(vha); - } else { - if (ha->flags.fw_started) { - /* Stop currently executing firmware. */ - qla2x00_try_to_stop_firmware(vha); - ha->flags.fw_started = 0; - } - } - vha->flags.online = 0; /* turn-off interrupts on the card */ diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 04458eb19d380..4499c787165f1 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -1880,6 +1880,9 @@ qla24xx_beacon_off(struct scsi_qla_host *vha) if (IS_P3P_TYPE(ha)) return QLA_SUCCESS; + if (!ha->flags.fw_started) + return QLA_SUCCESS; + ha->beacon_blink_led = 0; if (IS_QLA2031(ha) || IS_QLA27XX(ha)) -- cgit 1.2.3-korg From b08abbd9f5996309f021684f9ca74da30dcca36a Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:54 -0700 Subject: scsi: qla2xxx: Fix ISP recovery on unload During unload process, the chip can encounter problem where a FW dump would be captured. For this case, the full reset sequence will be skip to bring the chip back to full operational state. Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 2cd2e5ccce15d..1fbd16c8c9a7b 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6020,8 +6020,9 @@ qla2x00_do_dpc(void *data) set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); } - if (test_and_clear_bit(ISP_ABORT_NEEDED, - &base_vha->dpc_flags)) { + if (test_and_clear_bit + (ISP_ABORT_NEEDED, &base_vha->dpc_flags) && + !test_bit(UNLOADING, &base_vha->dpc_flags)) { ql_dbg(ql_dbg_dpc, base_vha, 0x4007, "ISP abort scheduled.\n"); -- cgit 1.2.3-korg From b4146c4929ef61d5afca011474d59d0918a0cd82 Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Wed, 18 Jul 2018 14:29:55 -0700 Subject: scsi: qla2xxx: Return error when TMF returns Propagate the task management completion status properly to avoid unnecessary waits for commands to complete. Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling") Cc: Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index db0e3279e07ab..1b19b954bbae7 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1489,11 +1489,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, wait_for_completion(&tm_iocb->u.tmf.comp); - rval = tm_iocb->u.tmf.comp_status == CS_COMPLETE ? - QLA_SUCCESS : QLA_FUNCTION_FAILED; + rval = tm_iocb->u.tmf.data; - if ((rval != QLA_SUCCESS) || tm_iocb->u.tmf.data) { - ql_dbg(ql_dbg_taskm, vha, 0x8030, + if (rval != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x8030, "TM IOCB failed (%x).\n", rval); } -- cgit 1.2.3-korg From 46d8c4b28652d35dc6cfb5adf7f54e102fc04384 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Jul 2018 16:12:32 +0800 Subject: crypto: padlock-aes - Fix Nano workaround data corruption This was detected by the self-test thanks to Ard's chunking patch. I finally got around to testing this out on my ancient Via box. It turns out that the workaround got the assembly wrong and we end up doing count + initial cycles of the loop instead of just count. This obviously causes corruption, either by overwriting the source that is yet to be processed, or writing over the end of the buffer. On CPUs that don't require the workaround only ECB is affected. On Nano CPUs both ECB and CBC are affected. This patch fixes it by doing the subtraction prior to the assembly. Fixes: a76c1c23d0c3 ("crypto: padlock-aes - work around Nano CPU...") Cc: Reported-by: Jamie Heilman Signed-off-by: Herbert Xu --- drivers/crypto/padlock-aes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 1c6cbda56afe9..09d823d36d3a4 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, return; } + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) @@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) - : "d"(control_word), "b"(key), "c"(count - initial)); + : "d"(control_word), "b"(key), "c"(count)); } static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, @@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, if (count < cbc_fetch_blocks) return cbc_crypt(input, output, key, iv, control_word, count); + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) @@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) - : "d" (control_word), "b" (key), "c" (count-initial)); + : "d" (control_word), "b" (key), "c" (count)); return iv; } -- cgit 1.2.3-korg From 0a06d4256674c4e041945b52044941995fee237d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 19 Jul 2018 10:31:00 -0700 Subject: KVM: vmx: use local variable for current_vmptr when emulating VMPTRST Do not expose the address of vmx->nested.current_vmptr to kvm_write_guest_virt_system() as the resulting __copy_to_user() call will trigger a WARN when CONFIG_HARDENED_USERCOPY is enabled. Opportunistically clean up variable names in handle_vmptrst() to improve readability, e.g. vmcs_gva is misleading as the memory operand of VMPTRST is plain memory, not a VMCS. Signed-off-by: Sean Christopherson Tested-by: Peter Shier Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e30da9a2430ca..548bef5359e69 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8480,21 +8480,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) /* Emulate the VMPTRST instruction */ static int handle_vmptrst(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - gva_t vmcs_gva; + unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; + gva_t gva; if (!nested_vmx_check_permission(vcpu)) return 1; - if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, &vmcs_gva)) + if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, vmcs_gva, - (void *)&to_vmx(vcpu)->nested.current_vmptr, - sizeof(u64), &e)) { + if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, + sizeof(gpa_t), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } -- cgit 1.2.3-korg From 63aff65573d73eb8dda4732ad4ef222dd35e4862 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 19 Jul 2018 21:59:07 +0300 Subject: kvm: x86: vmx: fix vpid leak VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested vmx is turned on with the module parameter. However, it's only freed if the L1 guest has executed VMXON which is not a given. As a result, on a system with nested==on every creation+deletion of an L1 vcpu without running an L2 guest results in leaking one vpid. Since the total number of vpids is limited to 64k, they can eventually get exhausted, preventing L2 from starting. Delay allocation of the L2 vpid until VMXON emulation, thus matching its freeing. Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0 Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 548bef5359e69..5d8e317c2b04f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7893,6 +7893,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; + vmx->nested.vpid02 = allocate_vpid(); + vmx->nested.vmxon = true; return 0; @@ -10369,11 +10371,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (nested) { + if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, kvm_vcpu_apicv_active(&vmx->vcpu)); - vmx->nested.vpid02 = allocate_vpid(); - } vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -10390,7 +10390,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return &vmx->vcpu; free_vmcs: - free_vpid(vmx->nested.vpid02); free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: kfree(vmx->guest_msrs); -- cgit 1.2.3-korg From 9d83601a9cc1884d1b5706ee2acc661d558c6838 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 20 Jul 2018 14:47:03 -0400 Subject: tools/power turbostat: fix -S on UP systems The -S (system summary) option failed to print any data on a 1-processor system. Reported-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4d14bbbf9b639..81a1df0fb5e21 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1163,9 +1163,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ if (!printed || !summary_only) print_header("\t"); - if (topo.num_cpus > 1) - format_counters(&average.threads, &average.cores, - &average.packages); + format_counters(&average.threads, &average.cores, &average.packages); printed = 1; -- cgit 1.2.3-korg From 9b788f32bee6b0b293a4bdfca4ad4bb0206407fb Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 20 Jul 2018 10:28:46 +0900 Subject: x86/efi: Access EFI MMIO data as unencrypted when SEV is active SEV guest fails to update the UEFI runtime variables stored in the flash. The following commit: 1379edd59673 ("x86/efi: Access EFI data as encrypted when SEV is active") unconditionally maps all the UEFI runtime data as 'encrypted' (C=1). When SEV is active the UEFI runtime data marked as EFI_MEMORY_MAPPED_IO should be mapped as 'unencrypted' so that both guest and hypervisor can access the data. Signed-off-by: Brijesh Singh Signed-off-by: Ard Biesheuvel Reviewed-by: Tom Lendacky Cc: # 4.15.x Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 1379edd59673 ("x86/efi: Access EFI data as encrypted ...") Link: http://lkml.kernel.org/r/20180720012846.23560-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 77873ce700ae7..5f2eb32316073 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -417,7 +417,7 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; - if (sev_active()) + if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO) flags |= _PAGE_ENC; pfn = md->phys_addr >> PAGE_SHIFT; -- cgit 1.2.3-korg From 6283fa38dc8744dc7c2bd2a03bb0478fe42f79fa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 20 Jul 2018 17:38:37 -0700 Subject: bpf: btf: Ensure the member->offset is in the right order This patch ensures the member->offset of a struct is in the correct order (i.e the later member's offset cannot go backward). The current "pahole -J" BTF encoder does not generate something like this. However, checking this can ensure future encoder will not violate this. Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)") Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/bpf/btf.c | 14 +++++++++++++- tools/testing/selftests/bpf/test_btf.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 9704934252b3f..2590700237c13 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1519,9 +1519,9 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, { bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION; const struct btf_member *member; + u32 meta_needed, last_offset; struct btf *btf = env->btf; u32 struct_size = t->size; - u32 meta_needed; u16 i; meta_needed = btf_type_vlen(t) * sizeof(*member); @@ -1534,6 +1534,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, btf_verifier_log_type(env, t, NULL); + last_offset = 0; for_each_member(i, t, member) { if (!btf_name_offset_valid(btf, member->name_off)) { btf_verifier_log_member(env, t, member, @@ -1555,6 +1556,16 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, return -EINVAL; } + /* + * ">" instead of ">=" because the last member could be + * "char a[0];" + */ + if (last_offset > member->offset) { + btf_verifier_log_member(env, t, member, + "Invalid member bits_offset"); + return -EINVAL; + } + if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) { btf_verifier_log_member(env, t, member, "Memmber bits_offset exceeds its struct size"); @@ -1562,6 +1573,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, } btf_verifier_log_member(env, t, member, NULL); + last_offset = member->offset; } return meta_needed; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 3619f30230880..402c0f7cc418d 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -247,6 +247,34 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 4, }, +{ + .descr = "struct test #3 Invalid member offset", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int64 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), + + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16), + BTF_MEMBER_ENC(NAME_TBD, 1, 64), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* int64 n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0", + .str_sec_size = sizeof("\0A\0m\0n\0"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test3_map", + .key_size = sizeof(int), + .value_size = 16, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member bits_offset", +}, + /* Test member exceeds the size of struct. * * struct A { -- cgit 1.2.3-korg From d2753e6b4882a637a0e8fb3b9c2e15f33265300e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Jul 2018 10:39:07 +0200 Subject: perf/x86/amd/ibs: Don't access non-started event Paul Menzel reported the following bug: > Enabling the undefined behavior sanitizer and building GNU/Linux 4.18-rc5+ > (with some unrelated commits) with GCC 8.1.0 from Debian Sid/unstable, the > warning below is shown. > > > [ 2.111913] > > ================================================================================ > > [ 2.111917] UBSAN: Undefined behaviour in arch/x86/events/amd/ibs.c:582:24 > > [ 2.111919] member access within null pointer of type 'struct perf_event' > > [ 2.111926] CPU: 0 PID: 144 Comm: udevadm Not tainted 4.18.0-rc5-00316-g4864b68cedf2 #104 > > [ 2.111928] Hardware name: ASROCK E350M1/E350M1, BIOS TIMELESS 01/01/1970 > > [ 2.111930] Call Trace: > > [ 2.111943] dump_stack+0x55/0x89 > > [ 2.111949] ubsan_epilogue+0xb/0x33 > > [ 2.111953] handle_null_ptr_deref+0x7f/0x90 > > [ 2.111958] __ubsan_handle_type_mismatch_v1+0x55/0x60 > > [ 2.111964] perf_ibs_handle_irq+0x596/0x620 The code dereferences event before checking the STARTED bit. Patch below should cure the issue. The warning should not trigger, if I analyzed the thing correctly. (And Paul's testing confirms this.) Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Menzel Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807200958390.1580@nanos.tec.linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/amd/ibs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 4b98101209a18..d50bb4dc06503 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -579,7 +579,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); struct perf_event *event = pcpu->event; - struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event *hwc; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; @@ -602,6 +602,10 @@ fail: return 0; } + if (WARN_ON_ONCE(!event)) + goto fail; + + hwc = &event->hw; msr = hwc->config_base; buf = ibs_data.regs; rdmsrl(msr, *buf); -- cgit 1.2.3-korg From d9e6dbcf28f383bf08e6a3180972f5722e514a54 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Jul 2018 17:19:19 -0400 Subject: x86/apic: Future-proof the TSC_DEADLINE quirk for SKX All SKX with stepping higher than 4 support the TSC_DEADLINE, no matter the microcode version. Without this patch, upcoming SKX steppings will not be able to use their TSC_DEADLINE timer. Signed-off-by: Len Brown Cc: # v4.14+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 616dd5872e ("x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping") Link: http://lkml.kernel.org/r/d0c7129e509660be9ec6b233284b8d42d90659e8.1532207856.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2aabd4cb0e3f1..adbda5847b14e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void) case 0x04: return 0x02000014; } + if (boot_cpu_data.x86_stepping > 4) + return 0; + return ~0U; } -- cgit 1.2.3-korg From b3681dd548d06deb2e1573890829dff4b15abf46 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 22 Jul 2018 11:05:09 -0700 Subject: x86/entry/64: Remove %ebx handling from error_entry/exit error_entry and error_exit communicate the user vs. kernel status of the frame using %ebx. This is unnecessary -- the information is in regs->cs. Just use regs->cs. This makes error_entry simpler and makes error_exit more robust. It also fixes a nasty bug. Before all the Spectre nonsense, the xen_failsafe_callback entry point returned like this: ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS ENCODE_FRAME_POINTER jmp error_exit And it did not go through error_entry. This was bogus: RBX contained garbage, and error_exit expected a flag in RBX. Fortunately, it generally contained *nonzero* garbage, so the correct code path was used. As part of the Spectre fixes, code was added to clear RBX to mitigate certain speculation attacks. Now, depending on kernel configuration, RBX got zeroed and, when running some Wine workloads, the kernel crashes. This was introduced by: commit 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") With this patch applied, RBX is no longer needed as a flag, and the problem goes away. I suspect that malicious userspace could use this bug to crash the kernel even without the offending patch applied, though. [ Historical note: I wrote this patch as a cleanup before I was aware of the bug it fixed. ] [ Note to stable maintainers: this should probably get applied to all kernels. If you're nervous about that, a more conservative fix to add xorl %ebx,%ebx; incl %ebx before the jump to error_exit should also fix the problem. ] Reported-and-tested-by: M. Vefa Bicakci Signed-off-by: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Dominik Brodowski Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Cc: xen-devel@lists.xenproject.org Fixes: 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") Link: http://lkml.kernel.org/r/b5010a090d3586b2d6e06c7ad3ec5542d1241c45.1532282627.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73a522d53b537..8ae7ffda8f98e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -981,7 +981,7 @@ ENTRY(\sym) call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ + jmp error_exit .endif END(\sym) .endm @@ -1222,7 +1222,6 @@ END(paranoid_exit) /* * Save all registers in pt_regs, and switch GS if needed. - * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) UNWIND_HINT_FUNC @@ -1269,7 +1268,6 @@ ENTRY(error_entry) * for these here too. */ .Lerror_kernelspace: - incl %ebx leaq native_irq_return_iret(%rip), %rcx cmpq %rcx, RIP+8(%rsp) je .Lerror_bad_iret @@ -1303,28 +1301,20 @@ ENTRY(error_entry) /* * Pretend that the exception came from user mode: set up pt_regs - * as if we faulted immediately after IRET and clear EBX so that - * error_exit knows that we will be returning to user mode. + * as if we faulted immediately after IRET. */ mov %rsp, %rdi call fixup_bad_iret mov %rax, %rsp - decl %ebx jmp .Lerror_entry_from_usermode_after_swapgs END(error_entry) - -/* - * On entry, EBX is a "return to kernel mode" flag: - * 1: already in kernel mode, don't need SWAPGS - * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode - */ ENTRY(error_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - testl %ebx, %ebx - jnz retint_kernel + testb $3, CS(%rsp) + jz retint_kernel jmp retint_user END(error_exit) -- cgit 1.2.3-korg From 4799f6856fdd38c8078a190eca3288029287cf66 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Tue, 17 Jul 2018 11:32:37 +0530 Subject: MAINTAINERS: Add Naveen N. Rao as kprobes co-maintainer Naveen has been contributing consistently reviewing and hardening kprobes for some time now. I have not been able to do the same due to other commitments. Signed-off-by: Ananth N Mavinakayanahalli Cc: Naveen N. Rao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Jiri Olsa Cc: akpm@linux-foundation.org Cc: mhiramat@kernel.org Link: http://lkml.kernel.org/r/153180735790.1914.15547706781664285286.stgit@thinktux Signed-off-by: Ingo Molnar --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0fe4228f78cb8..42a884c1b0f76 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7984,7 +7984,7 @@ F: lib/test_kmod.c F: tools/testing/selftests/kmod/ KPROBES -M: Ananth N Mavinakayanahalli +M: Naveen N. Rao M: Anil S Keshavamurthy M: "David S. Miller" M: Masami Hiramatsu -- cgit 1.2.3-korg From 6d20caed9b92d0db005324281e10bb9fd4813a32 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 24 Jul 2018 16:29:06 +0300 Subject: ARC: Add Ofer Levi as plat-eznps maintainer Signed-off-by: Leon Romanovsky Signed-off-by: Vineet Gupta --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0fe4228f78cb8..d6d86003fafd0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5444,6 +5444,7 @@ F: drivers/iommu/exynos-iommu.c EZchip NPS platform support M: Vineet Gupta +M: Ofer Levi S: Supported F: arch/arc/plat-eznps F: arch/arc/boot/dts/eznps.dts -- cgit 1.2.3-korg From addb8a6559f0f8b5a37582b7ca698358445a55bf Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 11 Jul 2018 11:23:52 +0300 Subject: RDMA/uverbs: Expand primary and alt AV port checks The commit cited below checked that the port numbers provided in the primary and alt AVs are legal. That is sufficient to prevent a kernel panic. However, it is not sufficient for correct operation. In Linux, AVs (both primary and alt) must be completely self-described. We do not accept an AV from userspace without an embedded port number. (This has been the case since kernel 3.14 commit dbf727de7440 ("IB/core: Use GID table in AH creation and dmac resolution")). For the primary AV, this embedded port number must match the port number specified with IB_QP_PORT. We also expect the port number embedded in the alt AV to match the alt_port_num value passed by the userspace driver in the modify_qp command base structure. Add these checks to modify_qp. Cc: # 4.16 Fixes: 5d4c05c3ee36 ("RDMA/uverbs: Sanitize user entered port numbers prior to access it") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 59 +++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cc06e8404e9bf..583d3a10b9405 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1984,15 +1984,64 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } - if ((cmd->base.attr_mask & IB_QP_AV) && - !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { - ret = -EINVAL; - goto release_qp; + if ((cmd->base.attr_mask & IB_QP_AV)) { + if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + + if (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state == IB_QPS_RTR) { + /* We are in INIT->RTR TRANSITION (if we are not, + * this transition will be rejected in subsequent checks). + * In the INIT->RTR transition, we cannot have IB_QP_PORT set, + * but the IB_QP_STATE flag is required. + * + * Since kernel 3.14 (commit dbf727de7440), the uverbs driver, + * when IB_QP_AV is set, has required inclusion of a valid + * port number in the primary AV. (AVs are created and handled + * differently for infiniband and ethernet (RoCE) ports). + * + * Check the port number included in the primary AV against + * the port number in the qp struct, which was set (and saved) + * in the RST->INIT transition. + */ + if (cmd->base.dest.port_num != qp->real_qp->port) { + ret = -EINVAL; + goto release_qp; + } + } else { + /* We are in SQD->SQD. (If we are not, this transition will + * be rejected later in the verbs layer checks). + * Check for both IB_QP_PORT and IB_QP_AV, these can be set + * together in the SQD->SQD transition. + * + * If only IP_QP_AV was set, add in IB_QP_PORT as well (the + * verbs layer driver does not track primary port changes + * resulting from path migration. Thus, in SQD, if the primary + * AV is modified, the primary port should also be modified). + * + * Note that in this transition, the IB_QP_STATE flag + * is not allowed. + */ + if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == (IB_QP_AV | IB_QP_PORT)) && + cmd->base.port_num != cmd->base.dest.port_num) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == IB_QP_AV) { + cmd->base.attr_mask |= IB_QP_PORT; + cmd->base.port_num = cmd->base.dest.port_num; + } + } } if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || - !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) || + cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) { ret = -EINVAL; goto release_qp; } -- cgit 1.2.3-korg From 64bb568488671048d25d7b3ada058bb6c7cb1d5d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 24 Jul 2018 08:40:20 -0700 Subject: bpf: btf: Sync uapi btf.h to tools This patch sync the uapi btf.h to tools/ Fixes: 36fc3c8c282c bpf: btf: Clean up BTF_INT_BITS() in uapi btf.h Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/btf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 0b5ddbe135a47..972265f328717 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -76,7 +76,7 @@ struct btf_type { */ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) #define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) -#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) +#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) /* Attributes stored in the BTF_INT_ENCODING */ #define BTF_INT_SIGNED (1 << 0) -- cgit 1.2.3-korg From 5b891af7fca14526b2a87c6f38b004e2df655ef4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 24 Jul 2018 08:40:21 -0700 Subject: bpf: Replace [u]int32_t and [u]int64_t in libbpf This patch replaces [u]int32_t and [u]int64_t usage with __[su]32 and __[su]64. The same change goes for [u]int16_t and [u]int8_t. Fixes: 8a138aed4a80 ("bpf: btf: Add BTF support to libbpf") Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 34 ++++++++++++++++------------------ tools/lib/bpf/btf.h | 8 ++++---- tools/lib/bpf/libbpf.c | 12 ++++++------ tools/lib/bpf/libbpf.h | 4 ++-- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8c54a4b6f1875..b80de80b45847 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2,7 +2,6 @@ /* Copyright (c) 2018 Facebook */ #include -#include #include #include #include @@ -27,13 +26,13 @@ struct btf { struct btf_type **types; const char *strings; void *nohdr_data; - uint32_t nr_types; - uint32_t types_size; - uint32_t data_size; + __u32 nr_types; + __u32 types_size; + __u32 data_size; int fd; }; -static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset) +static const char *btf_name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) return &btf->strings[offset]; @@ -45,7 +44,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { struct btf_type **new_types; - u32 expand_by, new_size; + __u32 expand_by, new_size; if (btf->types_size == BTF_MAX_NR_TYPES) return -E2BIG; @@ -72,7 +71,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) { const struct btf_header *hdr = btf->hdr; - u32 meta_left; + __u32 meta_left; if (btf->data_size < sizeof(struct btf_header)) { elog("BTF header not found\n"); @@ -151,7 +150,7 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) while (next_type < end_type) { struct btf_type *t = next_type; - uint16_t vlen = BTF_INFO_VLEN(t->info); + __u16 vlen = BTF_INFO_VLEN(t->info); int err; next_type += sizeof(*t); @@ -191,7 +190,7 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) } static const struct btf_type *btf_type_by_id(const struct btf *btf, - uint32_t type_id) + __u32 type_id) { if (type_id > btf->nr_types) return NULL; @@ -209,7 +208,7 @@ static bool btf_type_is_void_or_null(const struct btf_type *t) return !t || btf_type_is_void(t); } -static int64_t btf_type_size(const struct btf_type *t) +static __s64 btf_type_size(const struct btf_type *t) { switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_INT: @@ -226,12 +225,12 @@ static int64_t btf_type_size(const struct btf_type *t) #define MAX_RESOLVE_DEPTH 32 -int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) +__s64 btf__resolve_size(const struct btf *btf, __u32 type_id) { const struct btf_array *array; const struct btf_type *t; - uint32_t nelems = 1; - int64_t size = -1; + __u32 nelems = 1; + __s64 size = -1; int i; t = btf_type_by_id(btf, type_id); @@ -271,9 +270,9 @@ int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) return nelems * size; } -int32_t btf__find_by_name(const struct btf *btf, const char *type_name) +__s32 btf__find_by_name(const struct btf *btf, const char *type_name) { - uint32_t i; + __u32 i; if (!strcmp(type_name, "void")) return 0; @@ -302,10 +301,9 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new(uint8_t *data, uint32_t size, - btf_print_fn_t err_log) +struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) { - uint32_t log_buf_size = 0; + __u32 log_buf_size = 0; char *log_buf = NULL; struct btf *btf; int err; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 74bb344035bb9..ed3a84370cccd 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -4,7 +4,7 @@ #ifndef __BPF_BTF_H #define __BPF_BTF_H -#include +#include #define BTF_ELF_SEC ".BTF" @@ -14,9 +14,9 @@ typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); void btf__free(struct btf *btf); -struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log); -int32_t btf__find_by_name(const struct btf *btf, const char *type_name); -int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id); +struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +__s32 btf__find_by_name(const struct btf *btf, const char *type_name); +__s64 btf__resolve_size(const struct btf *btf, __u32 type_id); int btf__fd(const struct btf *btf); #endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a1e96b5de5ff8..6deb4fe4fffe6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -216,8 +216,8 @@ struct bpf_map { size_t offset; int map_ifindex; struct bpf_map_def def; - uint32_t btf_key_type_id; - uint32_t btf_value_type_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; }; @@ -1016,8 +1016,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { struct bpf_map_def *def = &map->def; const size_t max_name = 256; - int64_t key_size, value_size; - int32_t key_id, value_id; + __s64 key_size, value_size; + __s32 key_id, value_id; char name[max_name]; /* Find key type by name from BTF */ @@ -2089,12 +2089,12 @@ const char *bpf_map__name(struct bpf_map *map) return map ? map->name : NULL; } -uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map) +__u32 bpf_map__btf_key_type_id(const struct bpf_map *map) { return map ? map->btf_key_type_id : 0; } -uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map) +__u32 bpf_map__btf_value_type_id(const struct bpf_map *map) { return map ? map->btf_value_type_id : 0; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 09976531aa74d..b33ae02f7d0e4 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); int bpf_map__fd(struct bpf_map *map); const struct bpf_map_def *bpf_map__def(struct bpf_map *map); const char *bpf_map__name(struct bpf_map *map); -uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map); -uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map); +__u32 bpf_map__btf_key_type_id(const struct bpf_map *map); +__u32 bpf_map__btf_value_type_id(const struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); int bpf_map__set_priv(struct bpf_map *map, void *priv, -- cgit 1.2.3-korg From 38d5d3b3d5dbc0e0bb51fa6f7559d0d5a27916f6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 24 Jul 2018 08:40:22 -0700 Subject: bpf: Introduce BPF_ANNOTATE_KV_PAIR This patch introduces BPF_ANNOTATE_KV_PAIR to signal the bpf loader about the btf key_type and value_type of a bpf map. Please refer to the changes in test_btf_haskv.c for its usage. Both iproute2 and libbpf loader will then have the same convention to find out the map's btf_key_type_id and btf_value_type_id from a map's name. Fixes: 8a138aed4a80 ("bpf: btf: Add BTF support to libbpf") Suggested-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 7 ++- tools/lib/bpf/btf.h | 2 + tools/lib/bpf/libbpf.c | 75 +++++++++++++++------------- tools/testing/selftests/bpf/bpf_helpers.h | 9 ++++ tools/testing/selftests/bpf/test_btf_haskv.c | 7 +-- 5 files changed, 56 insertions(+), 44 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index b80de80b45847..2d270c560df39 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -189,8 +189,7 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } -static const struct btf_type *btf_type_by_id(const struct btf *btf, - __u32 type_id) +const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id > btf->nr_types) return NULL; @@ -233,7 +232,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) __s64 size = -1; int i; - t = btf_type_by_id(btf, type_id); + t = btf__type_by_id(btf, type_id); for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { size = btf_type_size(t); @@ -258,7 +257,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) return -EINVAL; } - t = btf_type_by_id(btf, type_id); + t = btf__type_by_id(btf, type_id); } if (size < 0) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index ed3a84370cccd..e2a09a155f84f 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -9,6 +9,7 @@ #define BTF_ELF_SEC ".BTF" struct btf; +struct btf_type; typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); @@ -16,6 +17,7 @@ typedef int (*btf_print_fn_t)(const char *, ...) void btf__free(struct btf *btf); struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); int btf__fd(const struct btf *btf); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6deb4fe4fffe6..d881d370616c1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1014,68 +1015,72 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { + const struct btf_type *container_type; + const struct btf_member *key, *value; struct bpf_map_def *def = &map->def; const size_t max_name = 256; + char container_name[max_name]; __s64 key_size, value_size; - __s32 key_id, value_id; - char name[max_name]; + __s32 container_id; - /* Find key type by name from BTF */ - if (snprintf(name, max_name, "%s_key", map->name) == max_name) { - pr_warning("map:%s length of BTF key_type:%s_key is too long\n", + if (snprintf(container_name, max_name, "____btf_map_%s", map->name) == + max_name) { + pr_warning("map:%s length of '____btf_map_%s' is too long\n", map->name, map->name); return -EINVAL; } - key_id = btf__find_by_name(btf, name); - if (key_id < 0) { - pr_debug("map:%s key_type:%s cannot be found in BTF\n", - map->name, name); - return key_id; + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map->name, container_name); + return container_id; } - key_size = btf__resolve_size(btf, key_id); - if (key_size < 0) { - pr_warning("map:%s key_type:%s cannot get the BTF type_size\n", - map->name, name); - return key_size; + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warning("map:%s cannot find BTF type for container_id:%u\n", + map->name, container_id); + return -EINVAL; } - if (def->key_size != key_size) { - pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n", - map->name, name, (unsigned int)key_size, def->key_size); + if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(container_type->info) < 2) { + pr_warning("map:%s container_name:%s is an invalid container struct\n", + map->name, container_name); return -EINVAL; } - /* Find value type from BTF */ - if (snprintf(name, max_name, "%s_value", map->name) == max_name) { - pr_warning("map:%s length of BTF value_type:%s_value is too long\n", - map->name, map->name); - return -EINVAL; + key = (struct btf_member *)(container_type + 1); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warning("map:%s invalid BTF key_type_size\n", + map->name); + return key_size; } - value_id = btf__find_by_name(btf, name); - if (value_id < 0) { - pr_debug("map:%s value_type:%s cannot be found in BTF\n", - map->name, name); - return value_id; + if (def->key_size != key_size) { + pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map->name, (__u32)key_size, def->key_size); + return -EINVAL; } - value_size = btf__resolve_size(btf, value_id); + value_size = btf__resolve_size(btf, value->type); if (value_size < 0) { - pr_warning("map:%s value_type:%s cannot get the BTF type_size\n", - map->name, name); + pr_warning("map:%s invalid BTF value_type_size\n", map->name); return value_size; } if (def->value_size != value_size) { - pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n", - map->name, name, (unsigned int)value_size, def->value_size); + pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map->name, (__u32)value_size, def->value_size); return -EINVAL; } - map->btf_key_type_id = key_id; - map->btf_value_type_id = value_id; + map->btf_key_type_id = key->type; + map->btf_value_type_id = value->type; return 0; } diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index f2f28b6c89151..810de20e8e263 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -158,6 +158,15 @@ struct bpf_map_def { unsigned int numa_node; }; +#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ + struct ____btf_map_##name { \ + type_key key; \ + type_val value; \ + }; \ + struct ____btf_map_##name \ + __attribute__ ((section(".maps." #name), used)) \ + ____btf_map_##name = { } + static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = (void *) BPF_FUNC_skb_load_bytes; static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c index 8c7ca096ecf2e..b21b876f475d8 100644 --- a/tools/testing/selftests/bpf/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -10,11 +10,6 @@ struct ipv_counts { unsigned int v6; }; -typedef int btf_map_key; -typedef struct ipv_counts btf_map_value; -btf_map_key dumm_key; -btf_map_value dummy_value; - struct bpf_map_def SEC("maps") btf_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), @@ -22,6 +17,8 @@ struct bpf_map_def SEC("maps") btf_map = { .max_entries = 4, }; +BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); + struct dummy_tracepoint_args { unsigned long long pad; struct sock *sock; -- cgit 1.2.3-korg From 0a5257bc6d89c2ae69b9bf955679cb4f89261874 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Jul 2018 13:43:56 +0300 Subject: iwlwifi: add more card IDs for 9000 series Add new device IDs for the 9000 series. Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Emmanuel Grumbach Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 69 +++++++++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 5 ++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 22 ++++++++ 3 files changed, 96 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index e20c30b29c03f..c8ea63d02619c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -178,6 +178,17 @@ const struct iwl_cfg iwl9260_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; +const struct iwl_cfg iwl9260_killer_2ac_cfg = { + .name = "Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW)", + .fw_name_pre = IWL9260A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9270_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9270", .fw_name_pre = IWL9260A_FW_PRE, @@ -267,6 +278,34 @@ const struct iwl_cfg iwl9560_2ac_cfg_soc = { .soc_latency = 5000, }; +const struct iwl_cfg iwl9560_killer_2ac_cfg_soc = { + .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, +}; + +const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc = { + .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, +}; + const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = { .name = "Intel(R) Dual Band Wireless AC 9460", .fw_name_pre = IWL9000A_FW_PRE, @@ -327,6 +366,36 @@ const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = { .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK }; +const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk = { + .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, + .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK +}; + +const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk = { + .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, + .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK +}; + MODULE_FIRMWARE(IWL9000A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9000B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9000RFB_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index c503b26793f6d..84a8168097235 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -551,6 +551,7 @@ extern const struct iwl_cfg iwl8275_2ac_cfg; extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9260_killer_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwl9560_2ac_cfg; @@ -558,10 +559,14 @@ extern const struct iwl_cfg iwl9460_2ac_cfg_soc; extern const struct iwl_cfg iwl9461_2ac_cfg_soc; extern const struct iwl_cfg iwl9462_2ac_cfg_soc; extern const struct iwl_cfg iwl9560_2ac_cfg_soc; +extern const struct iwl_cfg iwl9560_killer_2ac_cfg_soc; +extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc; extern const struct iwl_cfg iwl9460_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9461_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9462_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9560_2ac_cfg_shared_clk; +extern const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk; +extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl22000_2ac_cfg_hr; extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 38234bda90178..8520523b91b40 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -545,6 +545,9 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x1550, iwl9260_killer_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)}, @@ -554,6 +557,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, @@ -578,6 +582,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_cfg)}, @@ -604,6 +610,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -630,6 +638,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x31DC, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x1030, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x1551, iwl9560_killer_s_2ac_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x1552, iwl9560_killer_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)}, @@ -656,6 +666,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x34F0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x34F0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x34F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x34F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -682,6 +694,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -708,6 +722,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -743,6 +759,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_cfg_soc)}, @@ -771,6 +789,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -797,6 +817,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA370, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_cfg_soc)}, -- cgit 1.2.3-korg From 299b6365a3b7cf7f5ea1c945a420e9ee4841d6f7 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 22 Jul 2018 23:46:25 +0200 Subject: brcmfmac: fix regression in parsing NVRAM for multiple devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVRAM is designed to work with Broadcom's SDK Linux kernel which fakes PCI domain 0 for all internal MMIO devices. Since official Linux kernel uses platform devices for that purpose there is a mismatch in numbering PCI domains. There used to be a fix for that problem but it was accidentally dropped during the last firmware loading rework. That resulted in brcmfmac not being able to extract device specific NVRAM content and all kind of calibration problems. Reported-by: Aditya Xavier Fixes: 2baa3aaee27f ("brcmfmac: introduce brcmf_fw_alloc_request() function") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Rafał Miłecki Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 45928b5b8d97c..4fffa6988087b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1785,7 +1785,8 @@ brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo) fwreq->items[BRCMF_PCIE_FW_CODE].type = BRCMF_FW_TYPE_BINARY; fwreq->items[BRCMF_PCIE_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM; fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL; - fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus); + /* NVRAM reserves PCI domain 0 for Broadcom's SDK faked bus */ + fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus) + 1; fwreq->bus_nr = devinfo->pdev->bus->number; return fwreq; -- cgit 1.2.3-korg From 62cedf3e60af03e47849fe2bd6a03ec179422a8a Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Fri, 20 Jul 2018 10:39:13 +0200 Subject: locking/rtmutex: Allow specifying a subclass for nested locking Needed for annotating rt_mutex locks. Tested-by: John Sperbeck Signed-off-by: Peter Rosin Signed-off-by: Peter Zijlstra (Intel) Cc: Davidlohr Bueso Cc: Deepa Dinamani Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Chang Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Cc: Will Deacon Cc: Wolfram Sang Link: http://lkml.kernel.org/r/20180720083914.1950-2-peda@axentia.se Signed-off-by: Ingo Molnar --- include/linux/rtmutex.h | 7 +++++++ kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 1b92a28dd672b..6fd615a0eea94 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -106,7 +106,14 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock) extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); extern void rt_mutex_destroy(struct rt_mutex *lock); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); +#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) +#else extern void rt_mutex_lock(struct rt_mutex *lock); +#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) +#endif + extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4f014be7a4b8b..2823d4163a37c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1465,6 +1465,29 @@ rt_mutex_fastunlock(struct rt_mutex *lock, rt_mutex_postunlock(&wake_q); } +static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) +{ + might_sleep(); + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/** + * rt_mutex_lock_nested - lock a rt_mutex + * + * @lock: the rt_mutex to be locked + * @subclass: the lockdep subclass + */ +void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) +{ + __rt_mutex_lock(lock, subclass); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); +#endif + +#ifndef CONFIG_DEBUG_LOCK_ALLOC /** * rt_mutex_lock - lock a rt_mutex * @@ -1472,12 +1495,10 @@ rt_mutex_fastunlock(struct rt_mutex *lock, */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - might_sleep(); - - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); + __rt_mutex_lock(lock, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock); +#endif /** * rt_mutex_lock_interruptible - lock a rt_mutex interruptible -- cgit 1.2.3-korg From 7b94ea50514d1a0dc94f02723b603c27bc0ea597 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Fri, 20 Jul 2018 10:39:14 +0200 Subject: i2c/mux, locking/core: Annotate the nested rt_mutex usage If an i2c topology has instances of nested muxes, then a lockdep splat is produced when when i2c_parent_lock_bus() is called. Here is an example: ============================================ WARNING: possible recursive locking detected -------------------------------------------- insmod/68159 is trying to acquire lock: (i2c_register_adapter#2){+.+.}, at: i2c_parent_lock_bus+0x32/0x50 [i2c_mux] but task is already holding lock: (i2c_register_adapter#2){+.+.}, at: i2c_parent_lock_bus+0x32/0x50 [i2c_mux] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(i2c_register_adapter#2); lock(i2c_register_adapter#2); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by insmod/68159: #0: (i2c_register_adapter#2){+.+.}, at: i2c_parent_lock_bus+0x32/0x50 [i2c_mux] stack backtrace: CPU: 13 PID: 68159 Comm: insmod Tainted: G O Call Trace: dump_stack+0x67/0x98 __lock_acquire+0x162e/0x1780 lock_acquire+0xba/0x200 rt_mutex_lock+0x44/0x60 i2c_parent_lock_bus+0x32/0x50 [i2c_mux] i2c_parent_lock_bus+0x3e/0x50 [i2c_mux] i2c_smbus_xfer+0xf0/0x700 i2c_smbus_read_byte+0x42/0x70 my2c_init+0xa2/0x1000 [my2c] do_one_initcall+0x51/0x192 do_init_module+0x62/0x216 load_module+0x20f9/0x2b50 SYSC_init_module+0x19a/0x1c0 SyS_init_module+0xe/0x10 do_syscall_64+0x6c/0x1a0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 Reported-by: John Sperbeck Tested-by: John Sperbeck Signed-off-by: Peter Rosin Signed-off-by: Peter Zijlstra (Intel) Cc: Davidlohr Bueso Cc: Deepa Dinamani Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Chang Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Cc: Will Deacon Cc: Wolfram Sang Link: http://lkml.kernel.org/r/20180720083914.1950-3-peda@axentia.se Signed-off-by: Ingo Molnar --- drivers/i2c/i2c-core-base.c | 2 +- drivers/i2c/i2c-mux.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 301285c54603f..15c95aaa484cf 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -624,7 +624,7 @@ static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr) static void i2c_adapter_lock_bus(struct i2c_adapter *adapter, unsigned int flags) { - rt_mutex_lock(&adapter->bus_lock); + rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter)); } /** diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 300ab4b672e49..29646aa6132e9 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -144,7 +144,7 @@ static void i2c_mux_lock_bus(struct i2c_adapter *adapter, unsigned int flags) struct i2c_mux_priv *priv = adapter->algo_data; struct i2c_adapter *parent = priv->muxc->parent; - rt_mutex_lock(&parent->mux_lock); + rt_mutex_lock_nested(&parent->mux_lock, i2c_adapter_depth(adapter)); if (!(flags & I2C_LOCK_ROOT_ADAPTER)) return; i2c_lock_bus(parent, flags); @@ -181,7 +181,7 @@ static void i2c_parent_lock_bus(struct i2c_adapter *adapter, struct i2c_mux_priv *priv = adapter->algo_data; struct i2c_adapter *parent = priv->muxc->parent; - rt_mutex_lock(&parent->mux_lock); + rt_mutex_lock_nested(&parent->mux_lock, i2c_adapter_depth(adapter)); i2c_lock_bus(parent, flags); } -- cgit 1.2.3-korg From c0dc373a780f4ec63e45a573b9551763abd8cd1a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 17 Jul 2018 16:16:00 -0400 Subject: locking/pvqspinlock/x86: Use LOCK_PREFIX in __pv_queued_spin_unlock() assembly code The LOCK_PREFIX macro should be used in the __raw_callee_save___pv_queued_spin_unlock() assembly code, so that the lock prefix can be patched out on UP systems. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Joe Mario Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1531858560-21547-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 9ef5ee03d2d79..159622ee06748 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -43,7 +43,7 @@ asm (".pushsection .text;" "push %rdx;" "mov $0x1,%eax;" "xor %edx,%edx;" - "lock cmpxchg %dl,(%rdi);" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" "cmp $0x1,%al;" "jne .slowpath;" "pop %rdx;" -- cgit 1.2.3-korg From 6cd0c583b04b2bd9415e07b51b63ab799949dd66 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 23 Jul 2018 12:19:07 +0800 Subject: sched/topology: Check variable group before dereferencing it The 'group' variable in sched_domain_debug_one() is not checked when firstly used in cpumask_test_cpu(cpu, sched_group_span(group)), but it might be NULL (it is checked later in the following while loop) and may cause NULL pointer dereference. We need to check it before using to avoid NULL dereference. Signed-off-by: Yi Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Jiang Biao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: zhong.weidong@zte.com.cn Link: http://lkml.kernel.org/r/1532319547-33335-1-git-send-email-wang.yi59@zte.com.cn Signed-off-by: Ingo Molnar --- kernel/sched/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 05a831427bc74..56a0fed30c0a8 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -47,7 +47,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); } - if (!cpumask_test_cpu(cpu, sched_group_span(group))) { + if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) { printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); } -- cgit 1.2.3-korg From 2610e88946632afb78aa58e61f11368ac4c0af7b Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 17 Jul 2018 12:35:29 -0700 Subject: stop_machine: Disable preemption after queueing stopper threads This commit: 9fb8d5dc4b64 ("stop_machine, Disable preemption when waking two stopper threads") does not fully address the race condition that can occur as follows: On one CPU, call it CPU 3, thread 1 invokes cpu_stop_queue_two_works(2, 3,...), and the execution is such that thread 1 queues the works for migration/2 and migration/3, and is preempted after releasing the locks for migration/2 and migration/3, but before waking the threads. Then, On CPU 2, a kworker, call it thread 2, is running, and it invokes cpu_stop_queue_two_works(1, 2,...), such that thread 2 queues the works for migration/1 and migration/2. Meanwhile, on CPU 3, thread 1 resumes execution, and wakes migration/2 and migration/3. This means that when CPU 2 releases the locks for migration/1 and migration/2, but before it wakes those threads, it can be preempted by migration/2. If thread 2 is preempted by migration/2, then migration/2 will execute the first work item successfully, since migration/3 was woken up by CPU 3, but when it goes to execute the second work item, it disables preemption, calls multi_cpu_stop(), and thus, CPU 2 will wait forever for migration/1, which should have been woken up by thread 2. However migration/1 cannot be woken up by thread 2, since it is a kworker, so it is affine to CPU 2, but CPU 2 is running migration/2 with preemption disabled, so thread 2 will never run. Disable preemption after queueing works for stopper threads to ensure that the operation of queueing the works and waking the stopper threads is atomic. Co-Developed-by: Prasad Sodagudi Co-Developed-by: Pavankumar Kondeti Signed-off-by: Isaac J. Manjarres Signed-off-by: Prasad Sodagudi Signed-off-by: Pavankumar Kondeti Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bigeasy@linutronix.de Cc: gregkh@linuxfoundation.org Cc: matt@codeblueprint.co.uk Fixes: 9fb8d5dc4b64 ("stop_machine, Disable preemption when waking two stopper threads") Link: http://lkml.kernel.org/r/1531856129-9871-1-git-send-email-isaacm@codeaurora.org Signed-off-by: Ingo Molnar --- kernel/stop_machine.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 1ff523dae6e2b..e190d1ef3a23b 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -260,6 +260,15 @@ retry: err = 0; __cpu_stop_queue_work(stopper1, work1, &wakeq); __cpu_stop_queue_work(stopper2, work2, &wakeq); + /* + * The waking up of stopper threads has to happen + * in the same scheduling context as the queueing. + * Otherwise, there is a possibility of one of the + * above stoppers being woken up by another CPU, + * and preempting us. This will cause us to n ot + * wake up the other stopper forever. + */ + preempt_disable(); unlock: raw_spin_unlock(&stopper2->lock); raw_spin_unlock_irq(&stopper1->lock); @@ -271,7 +280,6 @@ unlock: } if (!err) { - preempt_disable(); wake_up_q(&wakeq); preempt_enable(); } -- cgit 1.2.3-korg From 840d719604b0925ca23dde95f1767e4528668369 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 20 Jul 2018 11:16:30 +0200 Subject: sched/deadline: Update rq_clock of later_rq when pushing a task Daniel Casini got this warn while running a DL task here at RetisLab: [ 461.137582] ------------[ cut here ]------------ [ 461.137583] rq->clock_update_flags < RQCF_ACT_SKIP [ 461.137599] WARNING: CPU: 4 PID: 2354 at kernel/sched/sched.h:967 assert_clock_updated.isra.32.part.33+0x17/0x20 [a ton of modules] [ 461.137646] CPU: 4 PID: 2354 Comm: label_image Not tainted 4.18.0-rc4+ #3 [ 461.137647] Hardware name: ASUS All Series/Z87-K, BIOS 0801 09/02/2013 [ 461.137649] RIP: 0010:assert_clock_updated.isra.32.part.33+0x17/0x20 [ 461.137649] Code: ff 48 89 83 08 09 00 00 eb c6 66 0f 1f 84 00 00 00 00 00 55 48 c7 c7 98 7a 6c a5 c6 05 bc 0d 54 01 01 48 89 e5 e8 a9 84 fb ff <0f> 0b 5d c3 0f 1f 44 00 00 0f 1f 44 00 00 83 7e 60 01 74 0a 48 3b [ 461.137673] RSP: 0018:ffffa77e08cafc68 EFLAGS: 00010082 [ 461.137674] RAX: 0000000000000000 RBX: ffff8b3fc1702d80 RCX: 0000000000000006 [ 461.137674] RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff8b3fded164b0 [ 461.137675] RBP: ffffa77e08cafc68 R08: 0000000000000026 R09: 0000000000000339 [ 461.137676] R10: ffff8b3fd060d410 R11: 0000000000000026 R12: ffffffffa4e14e20 [ 461.137677] R13: ffff8b3fdec22940 R14: ffff8b3fc1702da0 R15: ffff8b3fdec22940 [ 461.137678] FS: 00007efe43ee5700(0000) GS:ffff8b3fded00000(0000) knlGS:0000000000000000 [ 461.137679] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 461.137680] CR2: 00007efe30000010 CR3: 0000000301744003 CR4: 00000000001606e0 [ 461.137680] Call Trace: [ 461.137684] push_dl_task.part.46+0x3bc/0x460 [ 461.137686] task_woken_dl+0x60/0x80 [ 461.137689] ttwu_do_wakeup+0x4f/0x150 [ 461.137690] ttwu_do_activate+0x77/0x80 [ 461.137692] try_to_wake_up+0x1d6/0x4c0 [ 461.137693] wake_up_q+0x32/0x70 [ 461.137696] do_futex+0x7e7/0xb50 [ 461.137698] __x64_sys_futex+0x8b/0x180 [ 461.137701] do_syscall_64+0x5a/0x110 [ 461.137703] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 461.137705] RIP: 0033:0x7efe4918ca26 [ 461.137705] Code: 00 00 00 74 17 49 8b 48 20 44 8b 59 10 41 83 e3 30 41 83 fb 20 74 1e be 85 00 00 00 41 ba 01 00 00 00 41 b9 01 00 00 04 0f 05 <48> 3d 01 f0 ff ff 73 1f 31 c0 c3 be 8c 00 00 00 49 89 c8 4d 31 d2 [ 461.137738] RSP: 002b:00007efe43ee4928 EFLAGS: 00000283 ORIG_RAX: 00000000000000ca [ 461.137739] RAX: ffffffffffffffda RBX: 0000000005094df0 RCX: 00007efe4918ca26 [ 461.137740] RDX: 0000000000000001 RSI: 0000000000000085 RDI: 0000000005094e24 [ 461.137741] RBP: 00007efe43ee49c0 R08: 0000000005094e20 R09: 0000000004000001 [ 461.137741] R10: 0000000000000001 R11: 0000000000000283 R12: 0000000000000000 [ 461.137742] R13: 0000000005094df8 R14: 0000000000000001 R15: 0000000000448a10 [ 461.137743] ---[ end trace 187df4cad2bf7649 ]--- This warning happened in the push_dl_task(), because __add_running_bw()->cpufreq_update_util() is getting the rq_clock of the later_rq before its update, which takes place at activate_task(). The fix then is to update the rq_clock before calling add_running_bw(). To avoid double rq_clock_update() call, we set ENQUEUE_NOCLOCK flag to activate_task(). Reported-by: Daniel Casini Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Clark Williams Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Fixes: e0367b12674b sched/deadline: Move CPU frequency selection triggering points Link: http://lkml.kernel.org/r/ca31d073a4788acf0684a8b255f14fea775ccf20.1532077269.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 10c7b51c0d1fd..b5fbdde6afa93 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2090,8 +2090,14 @@ retry: sub_rq_bw(&next_task->dl, &rq->dl); set_task_cpu(next_task, later_rq->cpu); add_rq_bw(&next_task->dl, &later_rq->dl); + + /* + * Update the later_rq clock here, because the clock is used + * by the cpufreq_update_util() inside __add_running_bw(). + */ + update_rq_clock(later_rq); add_running_bw(&next_task->dl, &later_rq->dl); - activate_task(later_rq, next_task, 0); + activate_task(later_rq, next_task, ENQUEUE_NOCLOCK); ret = 1; resched_curr(later_rq); -- cgit 1.2.3-korg From f3d133ee0a17d5694c6f21873eec9863e11fa423 Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Wed, 18 Jul 2018 08:46:55 +0800 Subject: sched/rt: Restore rt_runtime after disabling RT_RUNTIME_SHARE NO_RT_RUNTIME_SHARE feature is used to prevent a CPU borrow enough runtime with a spin-rt-task. However, if RT_RUNTIME_SHARE feature is enabled and rt_rq has borrowd enough rt_runtime at the beginning, rt_runtime can't be restored to its initial bandwidth rt_runtime after we disable RT_RUNTIME_SHARE. E.g. on my PC with 4 cores, procedure to reproduce: 1) Make sure RT_RUNTIME_SHARE is enabled cat /sys/kernel/debug/sched_features GENTLE_FAIR_SLEEPERS START_DEBIT NO_NEXT_BUDDY LAST_BUDDY CACHE_HOT_BUDDY WAKEUP_PREEMPTION NO_HRTICK NO_DOUBLE_TICK LB_BIAS NONTASK_CAPACITY TTWU_QUEUE NO_SIS_AVG_CPU SIS_PROP NO_WARN_DOUBLE_CLOCK RT_PUSH_IPI RT_RUNTIME_SHARE NO_LB_MIN ATTACH_AGE_LOAD WA_IDLE WA_WEIGHT WA_BIAS 2) Start a spin-rt-task ./loop_rr & 3) set affinity to the last cpu taskset -p 8 $pid_of_loop_rr 4) Observe that last cpu have borrowed enough runtime. cat /proc/sched_debug | grep rt_runtime .rt_runtime : 950.000000 .rt_runtime : 900.000000 .rt_runtime : 950.000000 .rt_runtime : 1000.000000 5) Disable RT_RUNTIME_SHARE echo NO_RT_RUNTIME_SHARE > /sys/kernel/debug/sched_features 6) Observe that rt_runtime can not been restored cat /proc/sched_debug | grep rt_runtime .rt_runtime : 950.000000 .rt_runtime : 900.000000 .rt_runtime : 950.000000 .rt_runtime : 1000.000000 This patch help to restore rt_runtime after we disable RT_RUNTIME_SHARE. Signed-off-by: Hailong Liu Signed-off-by: Jiang Biao Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: zhong.weidong@zte.com.cn Link: http://lkml.kernel.org/r/1531874815-39357-1-git-send-email-liu.hailong6@zte.com.cn Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 572567078b60b..eaaec8364f96f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -836,6 +836,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) * can be time-consuming. Try to avoid it when possible. */ raw_spin_lock(&rt_rq->rt_runtime_lock); + if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF) + rt_rq->rt_runtime = rt_b->rt_runtime; skip = !rt_rq->rt_time && !rt_rq->rt_nr_running; raw_spin_unlock(&rt_rq->rt_runtime_lock); if (skip) -- cgit 1.2.3-korg From 6cbc304f2f360f25cc8607817239d6f4a2fd3dc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 May 2018 15:48:41 +0200 Subject: perf/x86/intel: Fix unwind errors from PEBS entries (mk-II) Vince reported the perf_fuzzer giving various unwinder warnings and Josh reported: > Deja vu. Most of these are related to perf PEBS, similar to the > following issue: > > b8000586c90b ("perf/x86/intel: Cure bogus unwind from PEBS entries") > > This is basically the ORC version of that. setup_pebs_sample_data() is > assembling a franken-pt_regs which ORC isn't happy about. RIP is > inconsistent with some of the other registers (like RSP and RBP). And where the previous unwinder only needed BP,SP ORC also requires IP. But we cannot spoof IP because then the sample will get displaced, entirely negating the point of PEBS. So cure the whole thing differently by doing the unwind early; this does however require a means to communicate we did the unwind early. We (ab)use an unused sample_type bit for this, which we set on events that fill out the data->callchain before the normal perf_prepare_sample(). Debugged-by: Josh Poimboeuf Reported-by: Vince Weaver Tested-by: Josh Poimboeuf Tested-by: Prashant Bhole Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 3 +++ arch/x86/events/intel/ds.c | 25 +++++++++++-------------- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 2 ++ kernel/events/core.c | 6 ++++-- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 707b2a96e516b..86f0c15dcc2db 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2997,6 +2997,9 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); + + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; } if (needs_branch_stack(event)) { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8cf03f1019380..8dbba77e05184 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1185,17 +1185,21 @@ static void setup_pebs_sample_data(struct perf_event *event, data->data_src.val = val; } + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happend between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) + data->callchain = perf_callchain(event, iregs); + /* * We use the interrupt regs as a base because the PEBS record does not * contain a full regs set, specifically it seems to lack segment * descriptors, which get used by things like user_mode(). * * In the simple case fix up only the IP for PERF_SAMPLE_IP. - * - * We must however always use BP,SP from iregs for the unwinder to stay - * sane; the record BP,SP can point into thin air when the record is - * from a previous PMI context or an (I)RET happend between the record - * and PMI. */ *regs = *iregs; @@ -1214,15 +1218,8 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->si = pebs->si; regs->di = pebs->di; - /* - * Per the above; only set BP,SP if we don't need callchains. - * - * XXX: does this make sense? - */ - if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { - regs->bp = pebs->bp; - regs->sp = pebs->sp; - } + regs->bp = pebs->bp; + regs->sp = pebs->sp; #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1fa12887ec020..87f6db437e4af 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1130,6 +1130,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); +extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b8e288a1f7409..eeb787b1c53c7 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -143,6 +143,8 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 8f0434a9951af..cdb32cf8e33cb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6343,7 +6343,7 @@ static u64 perf_virt_to_phys(u64 virt) static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; -static struct perf_callchain_entry * +struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { bool kernel = !event->attr.exclude_callchain_kernel; @@ -6382,7 +6382,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; - data->callchain = perf_callchain(event, regs); + if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) + data->callchain = perf_callchain(event, regs); + size += data->callchain->nr; header->size += size * sizeof(u64); -- cgit 1.2.3-korg From 7f635ff187ab6be0b350b3ec06791e376af238ab Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 16 Jul 2018 17:13:51 -0600 Subject: perf/core: Fix crash when using HW tracing kernel filters In function perf_event_parse_addr_filter(), the path::dentry of each struct perf_addr_filter is left unassigned (as it should be) when the pattern being parsed is related to kernel space. But in function perf_addr_filter_match() the same dentries are given to d_inode() where the value is not expected to be NULL, resulting in the following splat: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 pc : perf_event_mmap+0x2fc/0x5a0 lr : perf_event_mmap+0x2c8/0x5a0 Process uname (pid: 2860, stack limit = 0x000000001cbcca37) Call trace: perf_event_mmap+0x2fc/0x5a0 mmap_region+0x124/0x570 do_mmap+0x344/0x4f8 vm_mmap_pgoff+0xe4/0x110 vm_mmap+0x2c/0x40 elf_map+0x60/0x108 load_elf_binary+0x450/0x12c4 search_binary_handler+0x90/0x290 __do_execve_file.isra.13+0x6e4/0x858 sys_execve+0x3c/0x50 el0_svc_naked+0x30/0x34 This patch is fixing the problem by introducing a new check in function perf_addr_filter_match() to see if the filter's dentry is NULL. Signed-off-by: Mathieu Poirier Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: miklos@szeredi.hu Cc: namhyung@kernel.org Cc: songliubraving@fb.com Fixes: 9511bce9fe8e ("perf/core: Fix bad use of igrab()") Link: http://lkml.kernel.org/r/1531782831-1186-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index cdb32cf8e33cb..eec2d5fb676bc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7337,6 +7337,10 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, struct file *file, unsigned long offset, unsigned long size) { + /* d_inode(NULL) won't be equal to any mapped user-space file */ + if (!filter->path.dentry) + return false; + if (d_inode(filter->path.dentry) != file_inode(file)) return false; -- cgit 1.2.3-korg From 92a4728608a8fd228c572bc8ff50dd98aa0ddf2a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jul 2018 16:08:27 -0700 Subject: x86/boot: Fix if_changed build flip/flop bug Dirk Gouders reported that two consecutive "make" invocations on an already compiled tree will show alternating behaviors: $ make CALL scripts/checksyscalls.sh DESCEND objtool CHK include/generated/compile.h DATAREL arch/x86/boot/compressed/vmlinux Kernel: arch/x86/boot/bzImage is ready (#48) Building modules, stage 2. MODPOST 165 modules $ make CALL scripts/checksyscalls.sh DESCEND objtool CHK include/generated/compile.h LD arch/x86/boot/compressed/vmlinux ZOFFSET arch/x86/boot/zoffset.h AS arch/x86/boot/header.o LD arch/x86/boot/setup.elf OBJCOPY arch/x86/boot/setup.bin OBJCOPY arch/x86/boot/vmlinux.bin BUILD arch/x86/boot/bzImage Setup is 15644 bytes (padded to 15872 bytes). System is 6663 kB CRC 3eb90f40 Kernel: arch/x86/boot/bzImage is ready (#48) Building modules, stage 2. MODPOST 165 modules He bisected it back to: commit 98f78525371b ("x86/boot: Refuse to build with data relocations") The root cause was the use of the "if_changed" kbuild function multiple times for the same target. It was designed to only be used once per target, otherwise it will effectively always trigger, flipping back and forth between the two commands getting recorded by "if_changed". Instead, this patch merges the two commands into a single function to get stable build artifacts (i.e. .vmlinux.cmd), and a single build behavior. Bisected-and-Reported-by: Dirk Gouders Fix-Suggested-by: Masahiro Yamada Signed-off-by: Kees Cook Reviewed-by: Masahiro Yamada Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180724230827.GA37823@beast Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fa42f895fdde7..169c2feda14a0 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -106,9 +106,13 @@ define cmd_check_data_rel done endef +# We need to run two commands under "if_changed", so merge them into a +# single invocation. +quiet_cmd_check-and-link-vmlinux = LD $@ + cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld) + $(obj)/vmlinux: $(vmlinux-objs-y) FORCE - $(call if_changed,check_data_rel) - $(call if_changed,ld) + $(call if_changed,check-and-link-vmlinux) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE -- cgit 1.2.3-korg From 603ba2dfb338b307aebe95fe344c479a59b3a175 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:32:15 +0200 Subject: drm/atomic: Check old_plane_state->crtc in drm_atomic_helper_async_check() Async plane update is supposed to work only when updating the FB or FB position of an already enabled plane. That does not apply to requests where the plane was previously disabled or assigned to a different CTRC. Check old_plane_state->crtc value to make sure async plane update is allowed. Fixes: fef9df8b5945 ("drm/atomic: initial support for asynchronous plane update") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180724133215.31917-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/drm_atomic_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 130da5195f3b6..ff858b8900454 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1527,7 +1527,8 @@ int drm_atomic_helper_async_check(struct drm_device *dev, if (n_planes != 1) return -EINVAL; - if (!new_plane_state->crtc) + if (!new_plane_state->crtc || + old_plane_state->crtc != new_plane_state->crtc) return -EINVAL; funcs = plane->helper_private; -- cgit 1.2.3-korg From de2d8db395c32d121d02871819444b631f73e0b6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:33:00 +0200 Subject: drm/atomic: Initialize variables in drm_atomic_helper_async_check() to make gcc happy drm_atomic_helper_async_check() declares the plane, old_plane_state and new_plane_state variables to iterate over all planes of the atomic state and make sure only one plane is enabled. Unfortunately gcc is not smart enough to figure out that the check on n_planes is enough to guarantee that plane, new_plane_state and old_plane_state are initialized. Explicitly initialize those variables to NULL to make gcc happy. Fixes: fef9df8b5945 ("drm/atomic: initial support for asynchronous plane update") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180724133300.32023-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/drm_atomic_helper.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ff858b8900454..81e32199d3ef4 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1510,8 +1510,9 @@ int drm_atomic_helper_async_check(struct drm_device *dev, { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; - struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane *plane = NULL; + struct drm_plane_state *old_plane_state = NULL; + struct drm_plane_state *new_plane_state = NULL; const struct drm_plane_helper_funcs *funcs; int i, n_planes = 0; -- cgit 1.2.3-korg From a6a00918d4ad8718c3ccde38c02cec17f116b2fd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:36:01 +0200 Subject: drm/vc4: Reset ->{x, y}_scaling[1] when dealing with uniplanar formats This is needed to ensure ->is_unity is correct when the plane was previously configured to output a multi-planar format with scaling enabled, and is then being reconfigured to output a uniplanar format. Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180724133601.32114-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/vc4/vc4_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 1d34619eb3fe3..a951ec75d01f8 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -320,6 +320,9 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->x_scaling[0] = VC4_SCALING_TPZ; if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) vc4_state->y_scaling[0] = VC4_SCALING_TPZ; + } else { + vc4_state->x_scaling[1] = VC4_SCALING_NONE; + vc4_state->y_scaling[1] = VC4_SCALING_NONE; } vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && -- cgit 1.2.3-korg From ecbc42ca5d665e9238a4cdb595024d2e6cf87f2d Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 23 Jul 2018 23:36:04 +0900 Subject: virtio_net: Fix incosistent received bytes counter When received packets are dropped in virtio_net driver, received packets counter is incremented but bytes counter is not. As a result, for instance if we drop all packets by XDP, only received is counted and bytes stays 0, which looks inconsistent. IMHO received packets/bytes should be counted if packets are produced by the hypervisor, like what common NICs on physical machines are doing. So fix the bytes counter. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 53085c63277b4..2b6ec927809e9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -586,7 +586,8 @@ static struct sk_buff *receive_small(struct net_device *dev, struct receive_queue *rq, void *buf, void *ctx, unsigned int len, - unsigned int *xdp_xmit) + unsigned int *xdp_xmit, + unsigned int *rbytes) { struct sk_buff *skb; struct bpf_prog *xdp_prog; @@ -601,6 +602,7 @@ static struct sk_buff *receive_small(struct net_device *dev, int err; len -= vi->hdr_len; + *rbytes += len; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -705,11 +707,13 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, - unsigned int len) + unsigned int len, + unsigned int *rbytes) { struct page *page = buf; struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + *rbytes += len - vi->hdr_len; if (unlikely(!skb)) goto err; @@ -727,7 +731,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, void *buf, void *ctx, unsigned int len, - unsigned int *xdp_xmit) + unsigned int *xdp_xmit, + unsigned int *rbytes) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); @@ -740,6 +745,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, int err; head_skb = NULL; + *rbytes += len - vi->hdr_len; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -877,6 +883,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_buf; } + *rbytes += len; page = virt_to_head_page(buf); truesize = mergeable_ctx_to_truesize(ctx); @@ -932,6 +939,7 @@ err_skb: dev->stats.rx_length_errors++; break; } + *rbytes += len; page = virt_to_head_page(buf); put_page(page); } @@ -942,14 +950,13 @@ xdp_xmit: return NULL; } -static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len, void **ctx, - unsigned int *xdp_xmit) +static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, + void *buf, unsigned int len, void **ctx, + unsigned int *xdp_xmit, unsigned int *rbytes) { struct net_device *dev = vi->dev; struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; - int ret; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -961,23 +968,22 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, } else { put_page(virt_to_head_page(buf)); } - return 0; + return; } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit); + skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, + rbytes); else if (vi->big_packets) - skb = receive_big(dev, vi, rq, buf, len); + skb = receive_big(dev, vi, rq, buf, len, rbytes); else - skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit); + skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, rbytes); if (unlikely(!skb)) - return 0; + return; hdr = skb_vnet_hdr(skb); - ret = skb->len; - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -994,12 +1000,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, ntohs(skb->protocol), skb->len, skb->pkt_type); napi_gro_receive(&rq->napi, skb); - return ret; + return; frame_err: dev->stats.rx_frame_errors++; dev_kfree_skb(skb); - return 0; } /* Unlike mergeable buffers, all buffers are allocated to the @@ -1249,13 +1254,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget, while (received < budget && (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { - bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit); + receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &bytes); received++; } } else { while (received < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { - bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit); + receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &bytes); received++; } } -- cgit 1.2.3-korg From 7856e8616273098dc6c09a6e084afd98a283ff0d Mon Sep 17 00:00:00 2001 From: dann frazier Date: Mon, 23 Jul 2018 16:55:40 -0600 Subject: hinic: Link the logical network device to the pci device in sysfs Otherwise interfaces get exposed under /sys/devices/virtual, which doesn't give udev the context it needs for PCI-based predictable interface names. Signed-off-by: dann frazier Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 5b122728dcb47..09e9da10b7865 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -983,6 +983,7 @@ static int nic_dev_init(struct pci_dev *pdev) hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS, nic_dev, link_status_event_handler); + SET_NETDEV_DEV(netdev, &pdev->dev); err = register_netdev(netdev); if (err) { dev_err(&pdev->dev, "Failed to register netdev\n"); -- cgit 1.2.3-korg From 9aee40006190a3cda9a4d2dbae71e92617c8c362 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Mon, 23 Jul 2018 17:49:39 -0700 Subject: tcp: ack immediately when a cwr packet arrives We observed high 99 and 99.9% latencies when doing RPCs with DCTCP. The problem is triggered when the last packet of a request arrives CE marked. The reply will carry the ECE mark causing TCP to shrink its cwnd to 1 (because there are no packets in flight). When the 1st packet of the next request arrives, the ACK was sometimes delayed even though it is CWR marked, adding up to 40ms to the RPC latency. This patch insures that CWR marked data packets arriving will be acked immediately. Packetdrill script to reproduce the problem: 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < [ect0] SEW 0:0(0) win 32792 0.100 > SE. 0:0(0) ack 1 0.110 < [ect0] . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 0.200 < [ect0] . 1:1001(1000) ack 1 win 257 0.200 > [ect01] . 1:1(0) ack 1001 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 1:2(1) ack 1001 0.200 < [ect0] . 1001:2001(1000) ack 2 win 257 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 2:3(1) ack 2001 0.200 < [ect0] . 2001:3001(1000) ack 3 win 257 0.200 < [ect0] . 3001:4001(1000) ack 3 win 257 0.200 > [ect01] . 3:3(0) ack 4001 0.210 < [ce] P. 4001:4501(500) ack 3 win 257 +0.001 read(4, ..., 4500) = 4500 +0 write(4, ..., 1) = 1 +0 > [ect01] PE. 3:4(1) ack 4501 +0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257 // Previously the ACK sequence below would be 4501, causing a long RTO +0.040~+0.045 > [ect01] . 4:4(0) ack 5501 // delayed ack +0.311 < [ect0] . 5501:6501(1000) ack 4 win 257 // More data +0 > [ect01] . 4:4(0) ack 6501 // now acks everything +0.500 < F. 9501:9501(0) ack 4 win 257 Modified based on comments by Neal Cardwell Signed-off-by: Lawrence Brakmo Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3bcd30a2ba068..f9dcb29be12da 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -246,8 +246,15 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp) static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) { - if (tcp_hdr(skb)->cwr) + if (tcp_hdr(skb)->cwr) { tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + + /* If the sender is telling us it has entered CWR, then its + * cwnd may be very low (even just 1 packet), so we should ACK + * immediately. + */ + tcp_enter_quickack_mode((struct sock *)tp, 2); + } } static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) -- cgit 1.2.3-korg From b87bac1012c483462e7776c7b7320b659dbb3295 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Jul 2018 06:06:13 +0000 Subject: net: igmp: make function __ip_mc_inc_group() static Fixes the following sparse warnings: net/ipv4/igmp.c:1391:6: warning: symbol '__ip_mc_inc_group' was not declared. Should it be static? Fixes: 6e2059b53f98 ("ipv4/igmp: init group mode as INCLUDE when join source group") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 28fef7d15959f..75151be21413f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1387,7 +1387,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev, /* * A socket has joined a multicast group on device dev. */ -void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode) +static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, + unsigned int mode) { struct ip_mc_list *im; #ifdef CONFIG_IP_MULTICAST -- cgit 1.2.3-korg From 9e4e5b5c86661e767f3074bedbbf2ca59f8fbd18 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Thu, 26 Jul 2018 00:46:11 +0800 Subject: bpf, x32: Fix regression caused by commit 24dea04767e6 Commit 24dea04767e6 ("bpf, x32: remove ld_abs/ld_ind") removed the 4 /* Extra space for skb_copy_bits buffer */ from _STACK_SIZE, but it didn't fix the concerned code in emit_prologue and emit_epilogue, and this error will bring very strange kernel runtime errors. This patch fixes it. Fixes: 24dea04767e6 ("bpf, x32: remove ld_abs/ld_ind") Reported-by: Meelis Roos Bisected-by: Meelis Roos Signed-off-by: Wang YanQing Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 55799873ebe53..8f6cc71e08482 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1441,8 +1441,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth) /* sub esp,STACK_SIZE */ EMIT2_off32(0x81, 0xEC, STACK_SIZE); - /* sub ebp,SCRATCH_SIZE+4+12*/ - EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); + /* sub ebp,SCRATCH_SIZE+12*/ + EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12); /* xor ebx,ebx */ EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); @@ -1475,8 +1475,8 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth) /* mov edx,dword ptr [ebp+off]*/ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); - /* add ebp,SCRATCH_SIZE+4+12*/ - EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); + /* add ebp,SCRATCH_SIZE+12*/ + EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12); /* mov ebx,dword ptr [ebp-12]*/ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); -- cgit 1.2.3-korg From d24458e43b103c7eb7b2fd57bcac392fd7750438 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 23 Jul 2018 11:43:03 +0200 Subject: xsk: fix poll/POLLIN premature returns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polling for the ingress queues relies on reading the producer/consumer pointers of the Rx queue. Prior this commit, a cached consumer pointer could be used, instead of the actual consumer pointer and therefore report POLLIN prematurely. This patch makes sure that the non-cached consumer pointer is used instead. Reported-by: Qi Zhang Tested-by: Qi Zhang Fixes: c497176cb2e4 ("xsk: add Rx receive functions and poll support") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 52ecaf7706427..8a64b150be546 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -250,7 +250,7 @@ static inline bool xskq_full_desc(struct xsk_queue *q) static inline bool xskq_empty_desc(struct xsk_queue *q) { - return xskq_nb_free(q, q->prod_tail, 1) == q->nentries; + return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries; } void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); -- cgit 1.2.3-korg From c259b4fb33ee6e7667bf1d34bf0803b7c5fdbdce Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jul 2018 15:39:27 -0700 Subject: netdevsim: don't leak devlink resources Devlink resources registered with devlink_resource_register() have to be unregistered. Fixes: 37923ed6b8ce ("netdevsim: Add simple FIB resource controller via devlink") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/netdevsim/devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/devlink.c b/drivers/net/netdevsim/devlink.c index ba663e5af168b..5135fc371f01b 100644 --- a/drivers/net/netdevsim/devlink.c +++ b/drivers/net/netdevsim/devlink.c @@ -207,6 +207,7 @@ void nsim_devlink_teardown(struct netdevsim *ns) struct net *net = nsim_to_net(ns); bool *reg_devlink = net_generic(net, nsim_devlink_id); + devlink_resources_unregister(ns->devlink, NULL); devlink_unregister(ns->devlink); devlink_free(ns->devlink); ns->devlink = NULL; -- cgit 1.2.3-korg From 942a656f1f228f06a37adad0e6c347773cfe7bd6 Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Wed, 25 Jul 2018 19:39:52 +0530 Subject: cxgb4: Added missing break in ndo_udp_tunnel_{add/del} Break statements were missing for Geneve case in ndo_udp_tunnel_{add/del}, thereby raw mac matchall entries were not getting added. Fixes: c746fc0e8b2d("cxgb4: add geneve offload support for T6") Signed-off-by: Arjun Vynipadath Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index bc03c175a3cdf..a8926e97935eb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3072,6 +3072,7 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev, adapter->geneve_port = 0; t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0); + break; default: return; } @@ -3157,6 +3158,7 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev, t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F); + break; default: return; } -- cgit 1.2.3-korg From 92cab799bbc6fa1fca84bd1692285a5f926c17e9 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 4 Jul 2018 10:57:58 -0400 Subject: media: bpf: ensure bpf program is freed on detach Currently we are leaking bpf programs when they are detached from the lirc device; the refcount never reaches zero. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/bpf-lirc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 40826bba06b6d..55400317ec539 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -174,6 +174,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog) rcu_assign_pointer(raw->progs, new_array); bpf_prog_array_free(old_array); + bpf_prog_put(prog); unlock: mutex_unlock(&ir_raw_handler_lock); return ret; -- cgit 1.2.3-korg From 7f3fc7ddf719cd6faaf787722c511f6918ac6aab Mon Sep 17 00:00:00 2001 From: tangpengpeng Date: Thu, 26 Jul 2018 14:45:16 +0800 Subject: net: fix amd-xgbe flow-control issue If we enable or disable xgbe flow-control by ethtool , it does't work.Because the parameter is not properly assigned,so we need to adjust the assignment order of the parameters. Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic") Signed-off-by: tangpengpeng Acked-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 4b5d625de8f0b..8a3a60bb26888 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1111,14 +1111,14 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) if (pdata->tx_pause != pdata->phy.tx_pause) { new_state = 1; - pdata->hw_if.config_tx_flow_control(pdata); pdata->tx_pause = pdata->phy.tx_pause; + pdata->hw_if.config_tx_flow_control(pdata); } if (pdata->rx_pause != pdata->phy.rx_pause) { new_state = 1; - pdata->hw_if.config_rx_flow_control(pdata); pdata->rx_pause = pdata->phy.rx_pause; + pdata->hw_if.config_rx_flow_control(pdata); } /* Speed support */ -- cgit 1.2.3-korg From 4f206a0fabc3e806349add0996b3a999739559d2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2018 11:52:06 +0300 Subject: tools/power turbostat: fix bogus summary values This patch fixes a regression introduced in commit 8cb48b32a5de ("tools/power turbostat: track thread ID in cpu_topology") Turbostat uses incorrect cores number ('topo.num_cores') - its value is count of logical CPUs, instead of count of physical cores. So it is twice as large as it should be on a typical Intel system. For example, on a 6 core Xeon system 'topo.num_cores' is 12, and on a 52 core Xeon system 'topo.num_cores' is 104. And interestingly, on a 68-core Knights Landing Intel system 'topo.num_cores' is 272, because this system has 4 logical CPUs per core. As a result, some of the turbostat calculations are incorrect. For example, on idle 52-core Xeon system when all cores are ~99% in Core C6 (CPU%c6), the summary (very first) line shows ~48% Core C6, while it should be ~99%. This patch fixes the problem by fixing 'topo.num_cores' calculation. Was: 1. Init 'thread_id' for all CPUs to -1 2. Run 'get_thread_siblings()' which sets it to 0 or 1 3. Increment 'topo.num_cores' when thread_id != -1 (bug!) Now: 1. Init 'thread_id' for all CPUs to -1 2. Run 'get_thread_siblings()' which sets it to 0 or 1 3. Increment 'topo.num_cores' when thread_id is not 0 I did not have a chance to test this on an AMD machine, and only tested on a couple of Intel Xeons (6 and 52 cores). Reported-by: Vladislav Govtva Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 81a1df0fb5e21..1b53a2489ebb6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4838,7 +4838,7 @@ void topology_probe() siblings = get_thread_siblings(&cpus[i]); if (siblings > max_siblings) max_siblings = siblings; - if (cpus[i].thread_id != -1) + if (cpus[i].thread_id == 0) topo.num_cores++; if (debug > 1) -- cgit 1.2.3-korg From cfce494db3bfccd2a0774652b95f286639acef36 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 25 Jul 2018 17:25:29 -0400 Subject: tools/power turbostat: fix x2apic debug message output file A recently added x2apic debug message was hard-coded to stderr. That doesn't work with "-o outfile". Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1b53a2489ebb6..02e71accad169 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1690,7 +1690,7 @@ void get_apic_id(struct thread_data *t) t->x2apic_id = edx; if (debug && (t->apic_id != t->x2apic_id)) - fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); + fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); } /* -- cgit 1.2.3-korg From 2ffbb22406079fec2c3a6ad6ee1dc99fede740ac Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 26 Jul 2018 09:08:54 -0400 Subject: tools/power turbostat: Fix logical node enumeration to allow for non-sequential physical nodes turbostat fails on some multi-package topologies because the logical node enumeration assumes that the nodes are sequentially numbered, which causes the logical numa nodes to not be enumerated, or enumerated incorrectly. Use a more robust enumeration algorithm which allows for non-seqential physical nodes. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 106 ++++++++++++++++------------------ 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 02e71accad169..2b0135599f37f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2471,55 +2471,43 @@ int get_core_id(int cpu) void set_node_data(void) { - char path[80]; - FILE *filep; - int pkg, node, cpu; - - struct pkg_node_info { - int count; - int min; - } *pni; - - pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); - if (!pni) - err(1, "calloc pkg_node_count"); - - for (pkg = 0; pkg < topo.num_packages; pkg++) - pni[pkg].min = topo.num_cpus; - - for (node = 0; node <= topo.max_node_num; node++) { - /* find the "first" cpu in the node */ - sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); - filep = fopen(path, "r"); - if (!filep) - continue; - fscanf(filep, "%d", &cpu); - fclose(filep); - - pkg = cpus[cpu].physical_package_id; - pni[pkg].count++; - - if (node < pni[pkg].min) - pni[pkg].min = node; - } - - for (pkg = 0; pkg < topo.num_packages; pkg++) - if (pni[pkg].count > topo.nodes_per_pkg) - topo.nodes_per_pkg = pni[0].count; - - /* Fake 1 node per pkg for machines that don't - * expose nodes and thus avoid -nan results - */ - if (topo.nodes_per_pkg == 0) - topo.nodes_per_pkg = 1; - - for (cpu = 0; cpu < topo.num_cpus; cpu++) { - pkg = cpus[cpu].physical_package_id; - node = cpus[cpu].physical_node_id; - cpus[cpu].logical_node_id = node - pni[pkg].min; + int pkg, node, lnode, cpu, cpux; + int cpu_count; + + /* initialize logical_node_id */ + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) + cpus[cpu].logical_node_id = -1; + + cpu_count = 0; + for (pkg = 0; pkg < topo.num_packages; pkg++) { + lnode = 0; + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { + if (cpus[cpu].physical_package_id != pkg) + continue; + /* find a cpu with an unset logical_node_id */ + if (cpus[cpu].logical_node_id != -1) + continue; + cpus[cpu].logical_node_id = lnode; + node = cpus[cpu].physical_node_id; + cpu_count++; + /* + * find all matching cpus on this pkg and set + * the logical_node_id + */ + for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { + if ((cpus[cpux].physical_package_id == pkg) && + (cpus[cpux].physical_node_id == node)) { + cpus[cpux].logical_node_id = lnode; + cpu_count++; + } + } + lnode++; + if (lnode > topo.nodes_per_pkg) + topo.nodes_per_pkg = lnode; + } + if (cpu_count >= topo.max_cpu_num) + break; } - free(pni); - } int get_physical_node_id(struct cpu_topology *thiscpu) @@ -4840,14 +4828,6 @@ void topology_probe() max_siblings = siblings; if (cpus[i].thread_id == 0) topo.num_cores++; - - if (debug > 1) - fprintf(outf, - "cpu %d pkg %d node %d core %d thread %d\n", - i, cpus[i].physical_package_id, - cpus[i].physical_node_id, - cpus[i].physical_core_id, - cpus[i].thread_id); } topo.cores_per_node = max_core_id + 1; @@ -4873,6 +4853,20 @@ void topology_probe() topo.threads_per_core = max_siblings; if (debug > 1) fprintf(outf, "max_siblings %d\n", max_siblings); + + if (debug < 1) + return; + + for (i = 0; i <= topo.max_cpu_num; ++i) { + fprintf(outf, + "cpu %d pkg %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, + cpus[i].physical_node_id, + cpus[i].logical_node_id, + cpus[i].physical_core_id, + cpus[i].thread_id); + } + } void -- cgit 1.2.3-korg From 7effaf06c3cdef6855e127886c7405b9ab62f90d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 24 Jul 2018 14:12:20 +0300 Subject: net: rollback orig value on failure of dev_qdisc_change_tx_queue_len Fix dev_change_tx_queue_len so it rolls back original value upon a failure in dev_qdisc_change_tx_queue_len. This is already done for notifirers' failures, share the code. In case of failure in dev_qdisc_change_tx_queue_len, some tx queues would still be of the new length, while they should be reverted. Currently, the revert is not done, and is marked with a TODO label in dev_qdisc_change_tx_queue_len, and should find some nice solution to do it. Yet it is still better to not apply the newly requested value. Fixes: 48bfd55e7e41 ("net_sched: plug in qdisc ops change_tx_queue_len") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Reported-by: Ran Rozenstein Cc: Cong Wang Signed-off-by: David S. Miller --- net/core/dev.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index a5aa1c7444e68..559a91271f82d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7149,16 +7149,19 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) dev->tx_queue_len = new_len; res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); res = notifier_to_errno(res); - if (res) { - netdev_err(dev, - "refused to change device tx_queue_len\n"); - dev->tx_queue_len = orig_len; - return res; - } - return dev_qdisc_change_tx_queue_len(dev); + if (res) + goto err_rollback; + res = dev_qdisc_change_tx_queue_len(dev); + if (res) + goto err_rollback; } return 0; + +err_rollback: + netdev_err(dev, "refused to change device tx_queue_len\n"); + dev->tx_queue_len = orig_len; + return res; } /** -- cgit 1.2.3-korg From 9e630bcb7701f94dbd729fe57d37c089c763ad9f Mon Sep 17 00:00:00 2001 From: Avinash Repaka Date: Tue, 24 Jul 2018 20:31:58 -0700 Subject: RDS: RDMA: Fix the NULL-ptr deref in rds_ib_get_mr Registration of a memory region(MR) through FRMR/fastreg(unlike FMR) needs a connection/qp. With a proxy qp, this dependency on connection will be removed, but that needs more infrastructure patches, which is a work in progress. As an intermediate fix, the get_mr returns EOPNOTSUPP when connection details are not populated. The MR registration through sendmsg() will continue to work even with fast registration, since connection in this case is formed upfront. This patch fixes the following crash: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 4244 Comm: syzkaller468044 Not tainted 4.16.0-rc6+ #361 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:rds_ib_get_mr+0x5c/0x230 net/rds/ib_rdma.c:544 RSP: 0018:ffff8801b059f890 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8801b07e1300 RCX: ffffffff8562d96e RDX: 000000000000000d RSI: 0000000000000001 RDI: 0000000000000068 RBP: ffff8801b059f8b8 R08: ffffed0036274244 R09: ffff8801b13a1200 R10: 0000000000000004 R11: ffffed0036274243 R12: ffff8801b13a1200 R13: 0000000000000001 R14: ffff8801ca09fa9c R15: 0000000000000000 FS: 00007f4d050af700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4d050aee78 CR3: 00000001b0d9b006 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __rds_rdma_map+0x710/0x1050 net/rds/rdma.c:271 rds_get_mr_for_dest+0x1d4/0x2c0 net/rds/rdma.c:357 rds_setsockopt+0x6cc/0x980 net/rds/af_rds.c:347 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x4456d9 RSP: 002b:00007f4d050aedb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00000000006dac3c RCX: 00000000004456d9 RDX: 0000000000000007 RSI: 0000000000000114 RDI: 0000000000000004 RBP: 00000000006dac38 R08: 00000000000000a0 R09: 0000000000000000 R10: 0000000020000380 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffbfb36d6f R14: 00007f4d050af9c0 R15: 0000000000000005 Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 cc 01 00 00 4c 8b bb 80 04 00 00 48 b8 00 00 00 00 00 fc ff df 49 8d 7f 68 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 9c 01 00 00 4d 8b 7f 68 48 b8 00 00 00 00 00 RIP: rds_ib_get_mr+0x5c/0x230 net/rds/ib_rdma.c:544 RSP: ffff8801b059f890 ---[ end trace 7e1cea13b85473b0 ]--- Reported-by: syzbot+b51c77ef956678a65834@syzkaller.appspotmail.com Signed-off-by: Santosh Shilimkar Signed-off-by: Avinash Repaka Signed-off-by: David S. Miller --- net/rds/ib_frmr.c | 5 +++++ net/rds/ib_mr.h | 3 ++- net/rds/ib_rdma.c | 21 +++++++++++++-------- net/rds/rdma.c | 13 ++++++++----- net/rds/rds.h | 5 ++++- net/rds/send.c | 12 +++++++----- 6 files changed, 39 insertions(+), 20 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 48332a6ed7383..d152e48ea371a 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -344,6 +344,11 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, struct rds_ib_frmr *frmr; int ret; + if (!ic) { + /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/ + return ERR_PTR(-EOPNOTSUPP); + } + do { if (ibmr) rds_ib_free_frmr(ibmr, true); diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 0ea4ab017a8cc..655f01d427fe5 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -115,7 +115,8 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, - struct rds_sock *rs, u32 *key_ret); + struct rds_sock *rs, u32 *key_ret, + struct rds_connection *conn); void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index e678699268a25..2e49a40a5e113 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -537,11 +537,12 @@ void rds_ib_flush_mrs(void) } void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, - struct rds_sock *rs, u32 *key_ret) + struct rds_sock *rs, u32 *key_ret, + struct rds_connection *conn) { struct rds_ib_device *rds_ibdev; struct rds_ib_mr *ibmr = NULL; - struct rds_ib_connection *ic = rs->rs_conn->c_transport_data; + struct rds_ib_connection *ic = NULL; int ret; rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); @@ -550,6 +551,9 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, goto out; } + if (conn) + ic = conn->c_transport_data; + if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV; goto out; @@ -559,17 +563,18 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); else ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret); - if (ibmr) - rds_ibdev = NULL; - - out: - if (!ibmr) + if (IS_ERR(ibmr)) { + ret = PTR_ERR(ibmr); pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); + } else { + return ibmr; + } + out: if (rds_ibdev) rds_ib_dev_put(rds_ibdev); - return ibmr; + return ERR_PTR(ret); } void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 634cfcb7bba68..80920e47f2c79 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -170,7 +170,8 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, } static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, - u64 *cookie_ret, struct rds_mr **mr_ret) + u64 *cookie_ret, struct rds_mr **mr_ret, + struct rds_conn_path *cp) { struct rds_mr *mr = NULL, *found; unsigned int nr_pages; @@ -269,7 +270,8 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, * Note that dma_map() implies that pending writes are * flushed to RAM, so no dma_sync is needed here. */ trans_private = rs->rs_transport->get_mr(sg, nents, rs, - &mr->r_key); + &mr->r_key, + cp ? cp->cp_conn : NULL); if (IS_ERR(trans_private)) { for (i = 0 ; i < nents; i++) @@ -330,7 +332,7 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) sizeof(struct rds_get_mr_args))) return -EFAULT; - return __rds_rdma_map(rs, &args, NULL, NULL); + return __rds_rdma_map(rs, &args, NULL, NULL, NULL); } int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) @@ -354,7 +356,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) new_args.cookie_addr = args.cookie_addr; new_args.flags = args.flags; - return __rds_rdma_map(rs, &new_args, NULL, NULL); + return __rds_rdma_map(rs, &new_args, NULL, NULL, NULL); } /* @@ -782,7 +784,8 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, rm->m_rdma_cookie != 0) return -EINVAL; - return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr); + return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, + &rm->rdma.op_rdma_mr, rm->m_conn_path); } /* diff --git a/net/rds/rds.h b/net/rds/rds.h index f2272fb8cd456..60b3b787fbdb3 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -464,6 +464,8 @@ struct rds_message { struct scatterlist *op_sg; } data; }; + + struct rds_conn_path *m_conn_path; }; /* @@ -544,7 +546,8 @@ struct rds_transport { unsigned int avail); void (*exit)(void); void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, - struct rds_sock *rs, u32 *key_ret); + struct rds_sock *rs, u32 *key_ret, + struct rds_connection *conn); void (*sync_mr)(void *trans_private, int direction); void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); diff --git a/net/rds/send.c b/net/rds/send.c index 94c7f74909be3..59f17a2335f44 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1169,6 +1169,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) rs->rs_conn = conn; } + if (conn->c_trans->t_mp_capable) + cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; + else + cpath = &conn->c_path[0]; + + rm->m_conn_path = cpath; + /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); if (ret) { @@ -1192,11 +1199,6 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - if (conn->c_trans->t_mp_capable) - cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; - else - cpath = &conn->c_path[0]; - if (rds_destroy_pending(conn)) { ret = -EAGAIN; goto out; -- cgit 1.2.3-korg From 36e0f12bbfd3016f495904b35e41c5711707509f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 26 Jul 2018 23:17:03 +0900 Subject: xdp: add NULL pointer check in __xdp_return() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rhashtable_lookup() can return NULL. so that NULL pointer check routine should be added. Fixes: 02b55e5657c3 ("xdp: add MEM_TYPE_ZERO_COPY") Signed-off-by: Taehee Yoo Acked-by: Martin KaFai Lau Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/core/xdp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 9d1f22072d5d5..6771f1855b961 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -345,7 +345,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, rcu_read_lock(); /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - xa->zc_alloc->free(xa->zc_alloc, handle); + if (!WARN_ON_ONCE(!xa)) + xa->zc_alloc->free(xa->zc_alloc, handle); rcu_read_unlock(); default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ -- cgit 1.2.3-korg From 5f300e8004cb80182a24c0fa488218a4a43e6aac Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 26 Jul 2018 09:57:59 -0700 Subject: bpf: btf: Use exact btf value_size match in map_check_btf() The current map_check_btf() in BPF_MAP_TYPE_ARRAY rejects '> map->value_size' to ensure map_seq_show_elem() will not access things beyond an array element. Yonghong suggested that using '!=' is a more correct check. The 8 bytes round_up on value_size is stored in array->elem_size. Hence, using '!=' on map->value_size is a proper check. This patch also adds new tests to check the btf array key type and value type. Two of these new tests verify the btf's value_size (the change in this patch). It also fixes two existing tests that wrongly encoded a btf's type size (pprint_test) and the value_type_id (in one of the raw_tests[]). However, that do not affect these two BTF verification tests before or after this test changes. These two tests mainly failed at array creation time after this patch. Fixes: a26ca7c982cb ("bpf: btf: Add pretty print support to the basic arraymap") Suggested-by: Yonghong Song Acked-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- kernel/bpf/arraymap.c | 2 +- tools/testing/selftests/bpf/test_btf.c | 86 +++++++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 544e58f5f6429..2aa55d030c774 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -378,7 +378,7 @@ static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, return -EINVAL; value_type = btf_type_id_size(btf, &btf_value_id, &value_size); - if (!value_type || value_size > map->value_size) + if (!value_type || value_size != map->value_size) return -EINVAL; return 0; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 402c0f7cc418d..ffdd27737c9e7 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -507,7 +507,7 @@ static struct btf_raw_test raw_tests[] = { .key_size = sizeof(int), .value_size = sizeof(void *) * 4, .key_type_id = 1, - .value_type_id = 4, + .value_type_id = 5, .max_entries = 4, }, @@ -1292,6 +1292,88 @@ static struct btf_raw_test raw_tests[] = { .err_str = "type != 0", }, +{ + .descr = "arraymap invalid btf key (a bit field)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* 32 bit int with 32 bit offset */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 2, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "arraymap invalid btf key (!= 32 bits)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* 16 bit int with 0 bit offset */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 2, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "arraymap invalid btf value (too small)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + /* btf_value_size < map->value_size */ + .value_size = sizeof(__u64), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "arraymap invalid btf value (too big)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + /* btf_value_size > map->value_size */ + .value_size = sizeof(__u16), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -2051,7 +2133,7 @@ static struct btf_raw_test pprint_test = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32), BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ -- cgit 1.2.3-korg From 101f0cd4f2216d32f1b8a75a2154cf3997484ee2 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 26 Jul 2018 23:40:33 +0300 Subject: net: ena: Fix use of uninitialized DMA address bits field UBSAN triggers the following undefined behaviour warnings: [...] [ 13.236124] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:468:22 [ 13.240043] shift exponent 64 is too large for 64-bit type 'long long unsigned int' [...] [ 13.744769] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:373:4 [ 13.748694] shift exponent 64 is too large for 64-bit type 'long long unsigned int' [...] When splitting the address to high and low, GENMASK_ULL is used to generate a bitmask with dma_addr_bits field from io_sq (in ena_com_prepare_tx and ena_com_add_single_rx_desc). The problem is that dma_addr_bits is not initialized with a proper value (besides being cleared in ena_com_create_io_queue). Assign dma_addr_bits the correct value that is stored in ena_dev when initializing the SQ. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 1b9d3130af4d6..17f12c18d225a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -333,6 +333,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); + io_sq->dma_addr_bits = ena_dev->dma_addr_bits; io_sq->desc_entry_size = (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? sizeof(struct ena_eth_io_tx_desc) : -- cgit 1.2.3-korg From 12864ff8545f6b8144fdf1bb89b5663357f29ec4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 26 Jul 2018 10:58:20 +0200 Subject: ACPI / LPSS: Avoid PM quirks on suspend and resume from hibernation Commit a09c59130688 (ACPI / LPSS: Avoid PM quirks on suspend and resume from S3) modified the ACPI driver for Intel SoCs (LPSS) to avoid applying PM quirks on suspend and resume from S3 to address system-wide suspend and resume problems on some systems, but it is reported that the same issue also affects hibernation, so extend the approach used by that commit to cover hibernation as well. Fixes: a09c59130688 (ACPI / LPSS: Avoid PM quirks on suspend and resume from S3) Link: https://bugs.launchpad.net/bugs/1774950 Reported-by: Kai-Heng Feng Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/acpi/acpi_lpss.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index f8fecfec5df9b..9706613eecf9e 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -879,6 +879,7 @@ static void acpi_lpss_dismiss(struct device *dev) #define LPSS_GPIODEF0_DMA_LLP BIT(13) static DEFINE_MUTEX(lpss_iosf_mutex); +static bool lpss_iosf_d3_entered; static void lpss_iosf_enter_d3_state(void) { @@ -921,6 +922,9 @@ static void lpss_iosf_enter_d3_state(void) iosf_mbi_modify(LPSS_IOSF_UNIT_LPIOEP, MBI_CR_WRITE, LPSS_IOSF_GPIODEF0, value1, mask1); + + lpss_iosf_d3_entered = true; + exit: mutex_unlock(&lpss_iosf_mutex); } @@ -935,6 +939,11 @@ static void lpss_iosf_exit_d3_state(void) mutex_lock(&lpss_iosf_mutex); + if (!lpss_iosf_d3_entered) + goto exit; + + lpss_iosf_d3_entered = false; + iosf_mbi_modify(LPSS_IOSF_UNIT_LPIOEP, MBI_CR_WRITE, LPSS_IOSF_GPIODEF0, value1, mask1); @@ -944,13 +953,13 @@ static void lpss_iosf_exit_d3_state(void) iosf_mbi_modify(LPSS_IOSF_UNIT_LPIO1, MBI_CFG_WRITE, LPSS_IOSF_PMCSR, value2, mask2); +exit: mutex_unlock(&lpss_iosf_mutex); } -static int acpi_lpss_suspend(struct device *dev, bool runtime) +static int acpi_lpss_suspend(struct device *dev, bool wakeup) { struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); - bool wakeup = runtime || device_may_wakeup(dev); int ret; if (pdata->dev_desc->flags & LPSS_SAVE_CTX) @@ -963,14 +972,14 @@ static int acpi_lpss_suspend(struct device *dev, bool runtime) * wrong status for devices being about to be powered off. See * lpss_iosf_enter_d3_state() for further information. */ - if ((runtime || !pm_suspend_via_firmware()) && + if (acpi_target_system_state() == ACPI_STATE_S0 && lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) lpss_iosf_enter_d3_state(); return ret; } -static int acpi_lpss_resume(struct device *dev, bool runtime) +static int acpi_lpss_resume(struct device *dev) { struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); int ret; @@ -979,8 +988,7 @@ static int acpi_lpss_resume(struct device *dev, bool runtime) * This call is kept first to be in symmetry with * acpi_lpss_runtime_suspend() one. */ - if ((runtime || !pm_resume_via_firmware()) && - lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) + if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) lpss_iosf_exit_d3_state(); ret = acpi_dev_resume(dev); @@ -1004,12 +1012,12 @@ static int acpi_lpss_suspend_late(struct device *dev) return 0; ret = pm_generic_suspend_late(dev); - return ret ? ret : acpi_lpss_suspend(dev, false); + return ret ? ret : acpi_lpss_suspend(dev, device_may_wakeup(dev)); } static int acpi_lpss_resume_early(struct device *dev) { - int ret = acpi_lpss_resume(dev, false); + int ret = acpi_lpss_resume(dev); return ret ? ret : pm_generic_resume_early(dev); } @@ -1024,7 +1032,7 @@ static int acpi_lpss_runtime_suspend(struct device *dev) static int acpi_lpss_runtime_resume(struct device *dev) { - int ret = acpi_lpss_resume(dev, true); + int ret = acpi_lpss_resume(dev); return ret ? ret : pm_generic_runtime_resume(dev); } -- cgit 1.2.3-korg From 8129e2a1cb39bf0df12cc13c9dc7bb581296bd7b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 25 Jul 2018 16:35:09 +0300 Subject: tools/virtio: add dma barrier stubs Fixes: 55e49dc43a8 ("virtio_ring: switch to dma_XX barriers for rpmsg") Signed-off-by: Michael S. Tsirkin --- tools/virtio/asm/barrier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 0ac3caf90877f..d0351f83aebed 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -13,8 +13,8 @@ } while (0); /* Weak barriers should be used. If not - it's a bug */ # define mb() abort() -# define rmb() abort() -# define wmb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() #else #error Please fill in barrier macros #endif -- cgit 1.2.3-korg From f2467ee0698e0fb98d94ed3f11d3e0c19c4228d4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 25 Jul 2018 16:38:31 +0300 Subject: tools/virtio: add kmalloc_array stub Fixes: 6da2ec56059 ("treewide: kmalloc() -> kmalloc_array()") Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/kernel.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index fca8381bbe041..fb22bccfbc8a7 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -52,6 +52,11 @@ static inline void *kmalloc(size_t s, gfp_t gfp) return __kmalloc_fake; return malloc(s); } +static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp) +{ + return kmalloc(n * s, gfp); +} + static inline void *kzalloc(size_t s, gfp_t gfp) { void *p = kmalloc(s, gfp); -- cgit 1.2.3-korg From 5aa3d1a20a233d4a5f1ec3d62da3f19d9afea682 Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 27 Jul 2018 07:50:53 -0400 Subject: tools/power turbostat: Read extended processor family from CPUID This fixes the reported family on modern AMD processors (e.g. Ryzen, which is family 0x17). Previously these processors all showed up as family 0xf. See the document https://support.amd.com/TechDocs/56255_OSRR.pdf section CPUID_Fn00000001_EAX for how to calculate the family from the BaseFamily and ExtFamily values. This matches the code in arch/x86/lib/cpu.c Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2b0135599f37f..6c8effebf7c59 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4457,7 +4457,9 @@ void process_cpuid() family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; - if (family == 6 || family == 0xf) + if (family == 0xf) + family += (fms >> 20) & 0xff; + if (family >= 6) model += ((fms >> 16) & 0xf) << 4; if (!quiet) { -- cgit 1.2.3-korg From 538c48f27ac669cebd6d9abe1ce8b46d55f917ee Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 27 Jul 2018 12:55:08 -0400 Subject: tools/power turbostat: version 18.07.27 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6c8effebf7c59..980bd9d20646b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5096,7 +5096,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 18.06.20" + fprintf(outf, "turbostat version 18.07.27" " - Len Brown \n"); } -- cgit 1.2.3-korg From 4c612add7b18844ddd733ebdcbe754520155999b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 24 Jul 2018 17:13:02 +0300 Subject: ARC: dma [non IOC]: fix arc_dma_sync_single_for_(device|cpu) ARC backend for dma_sync_single_for_(device|cpu) was broken as it was not honoring the @dir argument and simply forcing it based on the call: - arc_dma_sync_single_for_device(dir) assumed DMA_TO_DEVICE (cache wback) - arc_dma_sync_single_for_cpu(dir) assumed DMA_FROM_DEVICE (cache inv) This is not true given the DMA API programming model and has been discussed here [1] in some detail. Interestingly while the deficiency has been there forever, it only started showing up after 4.17 dma common ops rework, commit a8eb92d02dd7 ("arc: fix arc_dma_{map,unmap}_page") which wired up these calls under the more commonly used dma_map_page API triggering the issue. [1]: https://lkml.org/lkml/2018/5/18/979 Fixes: commit a8eb92d02dd7 ("arc: fix arc_dma_{map,unmap}_page") Cc: stable@kernel.org # v4.17+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: reworked changelog] --- arch/arc/mm/dma.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 8c10718409795..ec47e6079f5d0 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -129,14 +129,59 @@ int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } +/* + * Cache operations depending on function and direction argument, inspired by + * https://lkml.org/lkml/2018/5/18/979 + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] + * dma-mapping: provide a generic dma-noncoherent implementation)" + * + * | map == for_device | unmap == for_cpu + * |---------------------------------------------------------------- + * TO_DEV | writeback writeback | none none + * FROM_DEV | invalidate invalidate | invalidate* invalidate* + * BIDIR | writeback+inv writeback+inv | invalidate invalidate + * + * [*] needed for CPU speculative prefetches + * + * NOTE: we don't check the validity of direction argument as it is done in + * upper layer functions (in include/linux/dma-mapping.h) + */ + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_wback(paddr, size); + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback(paddr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(paddr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(paddr, size); + break; + + default: + break; + } } void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_inv(paddr, size); + switch (dir) { + case DMA_TO_DEVICE: + break; + + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv(paddr, size); + break; + + default: + break; + } } -- cgit 1.2.3-korg From eb2777397fd83a4a7eaa26984d09d3babb845d2a Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:43 +0300 Subject: ARC: dma [non-IOC] setup SMP_CACHE_BYTES and cache_line_size As for today we don't setup SMP_CACHE_BYTES and cache_line_size for ARC, so they are set to L1_CACHE_BYTES by default. L1 line length (L1_CACHE_BYTES) might be easily smaller than L2 line (which is usually the case BTW). This breaks code. For example this breaks ethernet infrastructure on HSDK/AXS103 boards with IOC disabled, involving manual cache flushes Functions which alloc and manage sk_buff packet data area rely on SMP_CACHE_BYTES define. In the result we can share last L2 cache line in sk_buff linear packet data area between DMA buffer and some useful data in other structure. So we can lose this data when we invalidate DMA buffer. sk_buff linear packet data area | | | skb->end skb->tail V | | V V ----------------------------------------------. packet data | | ----------------------------------------------. ---------------------.--------------------------------------------------. SLC line | SLC (L2 cache) line (128B) | ---------------------.--------------------------------------------------. ^ ^ | | These cache lines will be invalidated when we invalidate skb linear packet data area before DMA transaction starting. This leads to issues painful to debug as it reproduces only if (sk_buff->end - sk_buff->tail) < SLC_LINE_SIZE and if we have some useful data right after sk_buff->end. Fix that by hardcode SMP_CACHE_BYTES to max line length we may have. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 3 +++ arch/arc/include/asm/cache.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9cf59fc60eab8..5151d81476a1b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -50,6 +50,9 @@ config ARC select HAVE_KERNEL_LZMA select ARCH_HAS_PTE_SPECIAL +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + config MIGHT_HAVE_PCI bool diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 8486f328cc5d2..ff7d3232764a2 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -48,7 +48,9 @@ }) /* Largest line length for either L1 or L2 is 128 bytes */ -#define ARCH_DMA_MINALIGN 128 +#define SMP_CACHE_BYTES 128 +#define cache_line_size() SMP_CACHE_BYTES +#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); -- cgit 1.2.3-korg From a54e43f993f8ec2f063b616a0e4d2b09e08d78a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Jul 2018 17:10:39 -0500 Subject: PCI: mobiveil: Avoid integer overflow in IB_WIN_SIZE IB_WIN_SIZE is larger than INT_MAX so we need to cast it to u64. Fixes: 9af6bcb11e12 ("PCI: mobiveil: Add Mobiveil PCIe Host Bridge IP driver") Signed-off-by: Dan Carpenter Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-mobiveil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-mobiveil.c b/drivers/pci/controller/pcie-mobiveil.c index 4d6c20e47bed4..cf0aa7cee5b0a 100644 --- a/drivers/pci/controller/pcie-mobiveil.c +++ b/drivers/pci/controller/pcie-mobiveil.c @@ -107,7 +107,7 @@ #define CFG_WINDOW_TYPE 0 #define IO_WINDOW_TYPE 1 #define MEM_WINDOW_TYPE 2 -#define IB_WIN_SIZE (256 * 1024 * 1024 * 1024) +#define IB_WIN_SIZE ((u64)256 * 1024 * 1024 * 1024) #define MAX_PIO_WINDOWS 8 /* Parameters for the waiting for link up routine */ -- cgit 1.2.3-korg From b611da43b68193dcb7e632adb44d506374a5d3ef Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 27 Jul 2018 10:21:26 +0200 Subject: perf build: Build error in libbpf missing initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In linux-next tree compiling the perf tool with additional make flags EXTRA_CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -O2" causes a compiler error. It is the warning 'variable may be used uninitialized' which is treated as error: I compile it using a FEDORA 28 installation, my gcc compiler version: gcc (GCC) 8.0.1 20180324 (Red Hat 8.0.1-0.20). The file that causes the error is tools/lib/bpf/libbpf.c. [root@p23lp27] # make V=1 EXTRA_CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -O2" [...] Makefile.config:849: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h' CC libbpf.o libbpf.c: In function ‘bpf_perf_event_read_simple’: libbpf.c:2342:6: error: ‘ret’ may be used uninitialized in this function [-Werror=maybe-uninitialized] int ret; ^ cc1: all warnings being treated as errors mv: cannot stat './.libbpf.o.tmp': No such file or directory /home6/tmricht/linux-next/tools/build/Makefile.build:96: recipe for target 'libbpf.o' failed Suggested-by: Jakub Kicinski Signed-off-by: Thomas Richter Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d881d370616c1..1aafdbe827fed 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2273,8 +2273,8 @@ bpf_perf_event_read_simple(void *mem, unsigned long size, volatile struct perf_event_mmap_page *header = mem; __u64 data_tail = header->data_tail; __u64 data_head = header->data_head; + int ret = LIBBPF_PERF_EVENT_ERROR; void *base, *begin, *end; - int ret; asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ if (data_head == data_tail) -- cgit 1.2.3-korg From 3eee1f75f2b9c107d4a097e8b640553376a5b171 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 28 Jul 2018 00:17:56 +0200 Subject: bpf: fix bpf_skb_load_bytes_relative pkt length check The len > skb_headlen(skb) cannot be used as a maximum upper bound for the packet length since it does not have any relation to the full linear packet length when filtering is used from upper layers (e.g. in case of reuseport BPF programs) as by then skb->data, skb->len already got mangled through __skb_pull() and others. Fixes: 4e1ec56cdc59 ("bpf: add skb_load_bytes_relative helper") Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau --- net/core/filter.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 06da770f543fd..9dfd145eedcc3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1712,24 +1712,26 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { + u8 *end = skb_tail_pointer(skb); + u8 *net = skb_network_header(skb); + u8 *mac = skb_mac_header(skb); u8 *ptr; - if (unlikely(offset > 0xffff || len > skb_headlen(skb))) + if (unlikely(offset > 0xffff || len > (end - mac))) goto err_clear; switch (start_header) { case BPF_HDR_START_MAC: - ptr = skb_mac_header(skb) + offset; + ptr = mac + offset; break; case BPF_HDR_START_NET: - ptr = skb_network_header(skb) + offset; + ptr = net + offset; break; default: goto err_clear; } - if (likely(ptr >= skb_mac_header(skb) && - ptr + len <= skb_tail_pointer(skb))) { + if (likely(ptr >= mac && ptr + len <= end)) { memcpy(to, ptr, len); return 0; } -- cgit 1.2.3-korg From 71eb5255f55bdb484d35ff7c9a1803f453dfbf82 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 29 Jul 2018 00:28:31 +0900 Subject: bpf: use GFP_ATOMIC instead of GFP_KERNEL in bpf_parse_prog() bpf_parse_prog() is protected by rcu_read_lock(). so that GFP_KERNEL is not allowed in the bpf_parse_prog(). [51015.579396] ============================= [51015.579418] WARNING: suspicious RCU usage [51015.579444] 4.18.0-rc6+ #208 Not tainted [51015.579464] ----------------------------- [51015.579488] ./include/linux/rcupdate.h:303 Illegal context switch in RCU read-side critical section! [51015.579510] other info that might help us debug this: [51015.579532] rcu_scheduler_active = 2, debug_locks = 1 [51015.579556] 2 locks held by ip/1861: [51015.579577] #0: 00000000a8c12fd1 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x2e0/0x910 [51015.579711] #1: 00000000bf815f8e (rcu_read_lock){....}, at: lwtunnel_build_state+0x96/0x390 [51015.579842] stack backtrace: [51015.579869] CPU: 0 PID: 1861 Comm: ip Not tainted 4.18.0-rc6+ #208 [51015.579891] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 [51015.579911] Call Trace: [51015.579950] dump_stack+0x74/0xbb [51015.580000] ___might_sleep+0x16b/0x3a0 [51015.580047] __kmalloc_track_caller+0x220/0x380 [51015.580077] kmemdup+0x1c/0x40 [51015.580077] bpf_parse_prog+0x10e/0x230 [51015.580164] ? kasan_kmalloc+0xa0/0xd0 [51015.580164] ? bpf_destroy_state+0x30/0x30 [51015.580164] ? bpf_build_state+0xe2/0x3e0 [51015.580164] bpf_build_state+0x1bb/0x3e0 [51015.580164] ? bpf_parse_prog+0x230/0x230 [51015.580164] ? lock_is_held_type+0x123/0x1a0 [51015.580164] lwtunnel_build_state+0x1aa/0x390 [51015.580164] fib_create_info+0x1579/0x33d0 [51015.580164] ? sched_clock_local+0xe2/0x150 [51015.580164] ? fib_info_update_nh_saddr+0x1f0/0x1f0 [51015.580164] ? sched_clock_local+0xe2/0x150 [51015.580164] fib_table_insert+0x201/0x1990 [51015.580164] ? lock_downgrade+0x610/0x610 [51015.580164] ? fib_table_lookup+0x1920/0x1920 [51015.580164] ? lwtunnel_valid_encap_type.part.6+0xcb/0x3a0 [51015.580164] ? rtm_to_fib_config+0x637/0xbd0 [51015.580164] inet_rtm_newroute+0xed/0x1b0 [51015.580164] ? rtm_to_fib_config+0xbd0/0xbd0 [51015.580164] rtnetlink_rcv_msg+0x331/0x910 [ ... ] Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: Daniel Borkmann --- net/core/lwt_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e7e626fb87bb3..e45098593dc00 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -217,7 +217,7 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) return -EINVAL; - prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL); + prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); if (!prog->name) return -ENOMEM; -- cgit 1.2.3-korg From ee614c871014045b45fae149b7245fc22a0bbdd8 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Fri, 27 Jul 2018 20:56:52 -0700 Subject: nfp: flower: fix port metadata conversion bug Function nfp_flower_repr_get_type_and_port expects an enum nfp_repr_type return value but, if the repr type is unknown, returns a value of type enum nfp_flower_cmsg_port_type. This means that if FW encodes the port ID in a way the driver does not understand instead of dropping the frame driver may attribute it to a physical port (uplink) provided the port number is less than physical port count. Fix this and ensure a net_device of NULL is returned if the repr can not be determined. Fixes: 1025351a88a4 ("nfp: add flower app") Signed-off-by: John Hurley Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 1decf3a1cad34..e57d23746585f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -80,7 +80,7 @@ nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port) return NFP_REPR_TYPE_VF; } - return NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC; + return __NFP_REPR_TYPE_MAX; } static struct net_device * @@ -91,6 +91,8 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id) u8 port = 0; repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port); + if (repr_type > NFP_REPR_TYPE_MAX) + return NULL; reprs = rcu_dereference(app->reprs[repr_type]); if (!reprs) -- cgit 1.2.3-korg From 136f55f660192ce04af091642efc75d85e017364 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 28 Jul 2018 09:52:10 +0200 Subject: net: lan78xx: fix rx handling before first packet is send As long the bh tasklet isn't scheduled once, no packet from the rx path will be handled. Since the tx path also schedule the same tasklet this situation only persits until the first packet transmission. So fix this issue by scheduling the tasklet after link reset. Link: https://github.com/raspberrypi/linux/issues/2617 Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet") Suggested-by: Floris Bos Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ed10d49eb5e0b..aeca484a75b89 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1242,6 +1242,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) mod_timer(&dev->stat_monitor, jiffies + STAT_UPDATE_TIMER); } + + tasklet_schedule(&dev->bh); } return ret; -- cgit 1.2.3-korg From ab123fe071c9aa9680ecd62eb080eb26cff4892c Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Fri, 27 Jul 2018 11:19:29 -0700 Subject: enic: handle mtu change for vf properly When driver gets notification for mtu change, driver does not handle it for all RQs. It handles only RQ[0]. Fix is to use enic_change_mtu() interface to change mtu for vf. Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 78 ++++++++++------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 90c645b8538e0..6b0376123cdeb 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2047,28 +2047,42 @@ static int enic_stop(struct net_device *netdev) return 0; } +static int _enic_change_mtu(struct net_device *netdev, int new_mtu) +{ + bool running = netif_running(netdev); + int err = 0; + + ASSERT_RTNL(); + if (running) { + err = enic_stop(netdev); + if (err) + return err; + } + + netdev->mtu = new_mtu; + + if (running) { + err = enic_open(netdev); + if (err) + return err; + } + + return 0; +} + static int enic_change_mtu(struct net_device *netdev, int new_mtu) { struct enic *enic = netdev_priv(netdev); - int running = netif_running(netdev); if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (running) - enic_stop(netdev); - - netdev->mtu = new_mtu; - if (netdev->mtu > enic->port_mtu) netdev_warn(netdev, - "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + "interface MTU (%d) set higher than port MTU (%d)\n", + netdev->mtu, enic->port_mtu); - if (running) - enic_open(netdev); - - return 0; + return _enic_change_mtu(netdev, new_mtu); } static void enic_change_mtu_work(struct work_struct *work) @@ -2076,47 +2090,9 @@ static void enic_change_mtu_work(struct work_struct *work) struct enic *enic = container_of(work, struct enic, change_mtu_work); struct net_device *netdev = enic->netdev; int new_mtu = vnic_dev_mtu(enic->vdev); - int err; - unsigned int i; - - new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu)); rtnl_lock(); - - /* Stop RQ */ - del_timer_sync(&enic->notify_timer); - - for (i = 0; i < enic->rq_count; i++) - napi_disable(&enic->napi[i]); - - vnic_intr_mask(&enic->intr[0]); - enic_synchronize_irqs(enic); - err = vnic_rq_disable(&enic->rq[0]); - if (err) { - rtnl_unlock(); - netdev_err(netdev, "Unable to disable RQ.\n"); - return; - } - vnic_rq_clean(&enic->rq[0], enic_free_rq_buf); - vnic_cq_clean(&enic->cq[0]); - vnic_intr_clean(&enic->intr[0]); - - /* Fill RQ with new_mtu-sized buffers */ - netdev->mtu = new_mtu; - vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); - /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[0]) == 0) { - rtnl_unlock(); - netdev_err(netdev, "Unable to alloc receive buffers.\n"); - return; - } - - /* Start RQ */ - vnic_rq_enable(&enic->rq[0]); - napi_enable(&enic->napi[0]); - vnic_intr_unmask(&enic->intr[0]); - enic_notify_timer_start(enic); - + (void)_enic_change_mtu(netdev, new_mtu); rtnl_unlock(); netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); -- cgit 1.2.3-korg From 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 27 Jul 2018 18:15:46 +0200 Subject: ipv4: remove BUG_ON() from fib_compute_spec_dst Remove BUG_ON() from fib_compute_spec_dst routine and check in_dev pointer during flowi4 data structure initialization. fib_compute_spec_dst routine can be run concurrently with device removal where ip_ptr net_device pointer is set to NULL. This can happen if userspace enables pkt info on UDP rx socket and the device is removed while traffic is flowing Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e46cdd310e5f8..2998b0e47d4b6 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -292,19 +292,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return ip_hdr(skb)->daddr; in_dev = __in_dev_get_rcu(dev); - BUG_ON(!in_dev); net = dev_net(dev); scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { + bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, - .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); -- cgit 1.2.3-korg From b0753408aadf32c7ece9e6b765017881e54af833 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 18:57:47 +0300 Subject: net: mdio-mux: bcm-iproc: fix wrong getter and setter pair mdio_mux_iproc_probe() uses platform_set_drvdata() to store md pointer in device, whereas mdio_mux_iproc_remove() restores md pointer by dev_get_platdata(&pdev->dev). This leads to wrong resources release. The patch replaces getter to platform_get_drvdata. Fixes: 98bc865a1ec8 ("net: mdio-mux: Add MDIO mux driver for iProc SoCs") Signed-off-by: Anton Vasilyev Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux-bcm-iproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index 0831b7142df7a..0c5b68e7da51a 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -218,7 +218,7 @@ out: static int mdio_mux_iproc_remove(struct platform_device *pdev) { - struct iproc_mdiomux_desc *md = dev_get_platdata(&pdev->dev); + struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev); mdio_mux_uninit(md->mux_handle); mdiobus_unregister(md->mii_bus); -- cgit 1.2.3-korg From c8e8cd579bb4265651df8223730105341e61a2d1 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 27 Jul 2018 22:43:01 +0000 Subject: net: socket: fix potential spectre v1 gadget in socketcall 'call' is a user-controlled value, so sanitize the array index after the bounds check to avoid speculating past the bounds of the 'nargs' array. Found with the help of Smatch: net/socket.c:2508 __do_sys_socketcall() warn: potential spectre issue 'nargs' [r] (local cap) Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 85633622c94d0..4ac3b834cce90 100644 --- a/net/socket.c +++ b/net/socket.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -2522,6 +2523,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) if (call < 1 || call > SYS_SENDMMSG) return -EINVAL; + call = array_index_nospec(call, SYS_SENDMMSG + 1); len = nargs[call]; if (len > sizeof(a)) -- cgit 1.2.3-korg From e978de7a6d382ec378830ca2cf38e902df0b6d84 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 27 Jul 2018 22:43:02 +0000 Subject: net: socket: Fix potential spectre v1 gadget in sock_is_registered 'family' can be a user-controlled value, so sanitize it after the bounds check to avoid speculative out-of-bounds access. Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: David S. Miller --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 4ac3b834cce90..8c24d5dc4bc8f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2690,7 +2690,8 @@ EXPORT_SYMBOL(sock_unregister); bool sock_is_registered(int family) { - return family < NPROTO && rcu_access_pointer(net_families[family]); + return family < NPROTO && + rcu_access_pointer(net_families[array_index_nospec(family, NPROTO)]); } static int __init sock_init(void) -- cgit 1.2.3-korg From 383d470936c05554219094a4d364d964cb324827 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 27 Jul 2018 17:19:12 -0400 Subject: tcp_bbr: fix bw probing to raise in-flight data for very small BDPs For some very small BDPs (with just a few packets) there was a quantization effect where the target number of packets in flight during the super-unity-gain (1.25x) phase of gain cycling was implicitly truncated to a number of packets no larger than the normal unity-gain (1.0x) phase of gain cycling. This meant that in multi-flow scenarios some flows could get stuck with a lower bandwidth, because they did not push enough packets inflight to discover that there was more bandwidth available. This was really only an issue in multi-flow LAN scenarios, where RTTs and BDPs are low enough for this to be an issue. This fix ensures that gain cycling can raise inflight for small BDPs by ensuring that in PROBE_BW mode target inflight values with a super-unity gain are always greater than inflight values with a gain <= 1. Importantly, this applies whether the inflight value is calculated for use as a cwnd value, or as a target inflight value for the end of the super-unity phase in bbr_is_next_cycle_phase() (both need to be bigger to ensure we can probe with more packets in flight reliably). This is a candidate fix for stable releases. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 58e2f479ffb4d..4bfff3c87e8e2 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -354,6 +354,10 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; + /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ + if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT) + cwnd += 2; + return cwnd; } -- cgit 1.2.3-korg From 460a53106ac39b74625d1104d9a113e0f3716c18 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Sat, 28 Jul 2018 14:05:19 -0700 Subject: ACPICA: AML Parser: ignore control method status in module-level code Previous change in the AML parser code blindly set all non-successful dispatcher statuses to AE_OK. That approach is incorrect, though, because successful control method invocations from module-level return AE_CTRL_TRANSFER. Overwriting AE_OK to this status causes the AML parser to think that there was no return value from the control method invocation. Fixes: 92c0f4af386 (ACPICA: AML Parser: ignore dispatcher error status during table load) Reported-by: Linus Torvalds Tested-by: Linus Torvalds Tested-by: Oleksandr Natalenko Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/psloop.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c index ee840be150b5e..44f35ab3347d1 100644 --- a/drivers/acpi/acpica/psloop.c +++ b/drivers/acpi/acpica/psloop.c @@ -709,15 +709,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state) } else if ((walk_state-> parse_flags & ACPI_PARSE_MODULE_LEVEL) + && status != AE_CTRL_TRANSFER && ACPI_FAILURE(status)) { /* - * ACPI_PARSE_MODULE_LEVEL means that we are loading a table by - * executing it as a control method. However, if we encounter - * an error while loading the table, we need to keep trying to - * load the table rather than aborting the table load. Set the - * status to AE_OK to proceed with the table load. If we get a - * failure at this point, it means that the dispatcher got an - * error while processing Op (most likely an AML operand error. + * ACPI_PARSE_MODULE_LEVEL flag means that we are currently + * loading a table by executing it as a control method. + * However, if we encounter an error while loading the table, + * we need to keep trying to load the table rather than + * aborting the table load (setting the status to AE_OK + * continues the table load). If we get a failure at this + * point, it means that the dispatcher got an error while + * processing Op (most likely an AML operand error) or a + * control method was called from module level and the + * dispatcher returned AE_CTRL_TRANSFER. In the latter case, + * leave the status alone, there's nothing wrong with it. */ status = AE_OK; } -- cgit 1.2.3-korg From 9939a46d90c6c76f4533d534dbadfa7b39dc6acc Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 15:05:37 +0300 Subject: NET: stmmac: align DMA stuff to largest cache line length As for today STMMAC_ALIGN macro (which is used to align DMA stuff) relies on L1 line length (L1_CACHE_BYTES). This isn't correct in case of system with several cache levels which might have L1 cache line length smaller than L2 line. This can lead to sharing one cache line between DMA buffer and other data, so we can lose this data while invalidate DMA buffer before DMA transaction. Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for aligning. Signed-off-by: Eugeniy Paltsev Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 60f59abab009e..ef6a8d39db2f1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -53,7 +53,7 @@ #include "dwmac1000.h" #include "hwif.h" -#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) +#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) /* Module parameters */ -- cgit 1.2.3-korg From 7acf9d4237c46894e0fa0492dd96314a41742e84 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 27 Jul 2018 16:54:44 +0100 Subject: netlink: Do not subscribe to non-existent groups Make ABI more strict about subscribing to group > ngroups. Code doesn't check for that and it looks bogus. (one can subscribe to non-existing group) Still, it's possible to bind() to all possible groups with (-1) Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 393573a99a5a3..ac805caed2e2c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1008,6 +1008,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (err) return err; } + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- cgit 1.2.3-korg From 25432eba9cd8f2ef5afef55be811b010a004b5fa Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Sat, 28 Jul 2018 15:26:01 -0700 Subject: openvswitch: meter: Fix setting meter id for new entries The meter code would create an entry for each new meter. However, it would not set the meter id in the new entry, so every meter would appear to have a meter id of zero. This commit properly sets the meter id when adding the entry. Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure") Signed-off-by: Justin Pettit Cc: Andy Zhou Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index b891a91577f80..c038e021a5916 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -211,6 +211,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) if (!meter) return ERR_PTR(-ENOMEM); + meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]); meter->used = div_u64(ktime_get_ns(), 1000 * 1000); meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; @@ -280,6 +281,10 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) u32 meter_id; bool failed; + if (!a[OVS_METER_ATTR_ID]) { + return -ENODEV; + } + meter = dp_meter_create(a); if (IS_ERR_OR_NULL(meter)) return PTR_ERR(meter); @@ -298,11 +303,6 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!a[OVS_METER_ATTR_ID]) { - err = -ENODEV; - goto exit_unlock; - } - meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); /* Cannot fail after this. */ -- cgit 1.2.3-korg From 72c05f32f4a5055c9c8fe889bb6903ec959c0aad Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 18:50:42 +0300 Subject: can: ems_usb: Fix memory leak on ems_usb_disconnect() ems_usb_probe() allocates memory for dev->tx_msg_buffer, but there is no its deallocation in ems_usb_disconnect(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 12ff0020ecd60..b7dfd4109d24e 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -1072,6 +1072,7 @@ static void ems_usb_disconnect(struct usb_interface *intf) usb_free_urb(dev->intr_urb); kfree(dev->intr_in_buffer); + kfree(dev->tx_msg_buffer); } } -- cgit 1.2.3-korg From afc9f65e01cd114cb2cedf544d22239116ce0cc6 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 13 Jul 2018 11:12:22 +0100 Subject: ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+ When building the kernel as Thumb-2 with binutils 2.29 or newer, if the assembler has seen the .type directive (via ENDPROC()) for a symbol, it automatically handles the setting of the lowest bit when the symbol is used with ADR. The badr macro on the other hand handles this lowest bit manually. This leads to a jump to a wrong address in the wrong state in the syscall return path: Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2 Modules linked in: CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8 PC is at ret_fast_syscall+0x4/0x62 LR is at sys_brk+0x109/0x128 pc : [<80101004>] lr : [<801c8a35>] psr: 60000013 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 50c5387d Table: 9e82006a DAC: 00000051 Process modprobe (pid: 652, stack limit = 0x(ptrval)) 80101000 : 80101000: b672 cpsid i 80101002: f8d9 2008 ldr.w r2, [r9, #8] 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000 80101184 : 80101184: f8d9 a000 ldr.w sl, [r9] 80101188: e92d 0030 stmdb sp!, {r4, r5} 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0 80101190: d117 bne.n 801011c2 <__sys_trace> 80101192: 46ba mov sl, r7 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190 80101198: bf28 it cs 8010119a: f04f 0a00 movcs.w sl, #0 8010119e: f3af 8014 nop.w {20} 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2 To fix this, add a new symbol name which doesn't have ENDPROC used on it and use that with badr. We can't remove the badr usage since that would would cause breakage with older binutils. Signed-off-by: Vincent Whitchurch Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 106a1466518d0..746565a876dcd 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -48,6 +48,7 @@ saved_pc .req lr * from those features make this path too inefficient. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts @@ -78,6 +79,7 @@ fast_work_pending: * call. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 @@ -255,7 +257,7 @@ local_restart: tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace - invoke_syscall tbl, scno, r10, ret_fast_syscall + invoke_syscall tbl, scno, r10, __ret_fast_syscall add r1, sp, #S_OFF 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) -- cgit 1.2.3-korg From f5dbee6e3881b1dbfdcc36008d48bd29549ab2f4 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 28 Jul 2018 05:11:15 -0400 Subject: media: rc: read out of bounds if bpf reports high protocol number The repeat period is read from a static array. If a keydown event is reported from bpf with a high protocol number, we read out of bounds. This is unlikely to end up with a reasonable repeat period at the best of times, in which case no timely key up event is generated. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 2e222d9ee01f5..ca68e1d2b2f98 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -679,6 +679,14 @@ static void ir_timer_repeat(struct timer_list *t) spin_unlock_irqrestore(&dev->keylock, flags); } +static unsigned int repeat_period(int protocol) +{ + if (protocol >= ARRAY_SIZE(protocols)) + return 100; + + return protocols[protocol].repeat_period; +} + /** * rc_repeat() - signals that a key is still pressed * @dev: the struct rc_dev descriptor of the device @@ -691,7 +699,7 @@ void rc_repeat(struct rc_dev *dev) { unsigned long flags; unsigned int timeout = nsecs_to_jiffies(dev->timeout) + - msecs_to_jiffies(protocols[dev->last_protocol].repeat_period); + msecs_to_jiffies(repeat_period(dev->last_protocol)); struct lirc_scancode sc = { .scancode = dev->last_scancode, .rc_proto = dev->last_protocol, .keycode = dev->keypressed ? dev->last_keycode : KEY_RESERVED, @@ -803,7 +811,7 @@ void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u32 scancode, if (dev->keypressed) { dev->keyup_jiffies = jiffies + nsecs_to_jiffies(dev->timeout) + - msecs_to_jiffies(protocols[protocol].repeat_period); + msecs_to_jiffies(repeat_period(protocol)); mod_timer(&dev->timer_keyup, dev->keyup_jiffies); } spin_unlock_irqrestore(&dev->keylock, flags); -- cgit 1.2.3-korg From 8eb0e6421958e9777db98448a4030d8ae940c9a0 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 27 Jul 2018 13:19:45 -0400 Subject: media: v4l: vsp1: Fix deadlock in VSPDL DRM pipelines The VSP uses a lock to protect the BRU and BRS assignment when configuring pipelines. The lock is taken in vsp1_du_atomic_begin() and released in vsp1_du_atomic_flush(), as well as taken and released in vsp1_du_setup_lif(). This guards against multiple pipelines trying to assign the same BRU and BRS at the same time. The DRM framework calls the .atomic_begin() operations in a loop over all CRTCs included in an atomic commit. On a VSPDL (the only VSP type where this matters), a single VSP instance handles two CRTCs, with a single lock. This results in a deadlock when the .atomic_begin() operation is called on the second CRTC. The DRM framework serializes atomic commits that affect the same CRTCs, but doesn't know about two CRTCs sharing the same VSPDL. Two commits affecting the VSPDL LIF0 and LIF1 respectively can thus race each other, hence the need for a lock. This could be fixed on the DRM side by forcing serialization of commits affecting CRTCs backed by the same VSPDL, but that would negatively affect performances, as the locking is only needed when the BRU and BRS need to be reassigned, which is an uncommon case. The lock protects the whole .atomic_begin() to .atomic_flush() sequence. The only operation that can occur in-between is vsp1_du_atomic_update(), which doesn't touch the BRU and BRS, and thus doesn't need to be protected by the lock. We can thus only take the lock around the pipeline setup calls in vsp1_du_atomic_flush(), which fixes the deadlock. Fixes: f81f9adc4ee1 ("media: v4l: vsp1: Assign BRU and BRS to pipelines dynamically") Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/vsp1/vsp1_drm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index edb35a5c57ea0..a99fc0ced7a7a 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -728,9 +728,6 @@ EXPORT_SYMBOL_GPL(vsp1_du_setup_lif); */ void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index) { - struct vsp1_device *vsp1 = dev_get_drvdata(dev); - - mutex_lock(&vsp1->drm->lock); } EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin); @@ -846,6 +843,7 @@ void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index, drm_pipe->crc = cfg->crc; + mutex_lock(&vsp1->drm->lock); vsp1_du_pipeline_setup_inputs(vsp1, pipe); vsp1_du_pipeline_configure(pipe); mutex_unlock(&vsp1->drm->lock); -- cgit 1.2.3-korg From 89da619bc18d79bca5304724c11d4ba3b67ce2c6 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 18 Jul 2018 10:29:28 +0800 Subject: virtio_balloon: fix another race between migration and ballooning Kernel panic when with high memory pressure, calltrace looks like, PID: 21439 TASK: ffff881be3afedd0 CPU: 16 COMMAND: "java" #0 [ffff881ec7ed7630] machine_kexec at ffffffff81059beb #1 [ffff881ec7ed7690] __crash_kexec at ffffffff81105942 #2 [ffff881ec7ed7760] crash_kexec at ffffffff81105a30 #3 [ffff881ec7ed7778] oops_end at ffffffff816902c8 #4 [ffff881ec7ed77a0] no_context at ffffffff8167ff46 #5 [ffff881ec7ed77f0] __bad_area_nosemaphore at ffffffff8167ffdc #6 [ffff881ec7ed7838] __node_set at ffffffff81680300 #7 [ffff881ec7ed7860] __do_page_fault at ffffffff8169320f #8 [ffff881ec7ed78c0] do_page_fault at ffffffff816932b5 #9 [ffff881ec7ed78f0] page_fault at ffffffff8168f4c8 [exception RIP: _raw_spin_lock_irqsave+47] RIP: ffffffff8168edef RSP: ffff881ec7ed79a8 RFLAGS: 00010046 RAX: 0000000000000246 RBX: ffffea0019740d00 RCX: ffff881ec7ed7fd8 RDX: 0000000000020000 RSI: 0000000000000016 RDI: 0000000000000008 RBP: ffff881ec7ed79a8 R8: 0000000000000246 R9: 000000000001a098 R10: ffff88107ffda000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000008 R14: ffff881ec7ed7a80 R15: ffff881be3afedd0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 It happens in the pagefault and results in double pagefault during compacting pages when memory allocation fails. Analysed the vmcore, the page leads to second pagefault is corrupted with _mapcount=-256, but private=0. It's caused by the race between migration and ballooning, and lock missing in virtballoon_migratepage() of virtio_balloon driver. This patch fix the bug. Fixes: e22504296d4f64f ("virtio_balloon: introduce migration primitives to balloon pages") Cc: stable@vger.kernel.org Signed-off-by: Jiang Biao Signed-off-by: Huang Chong Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6b237e3f49830..3988c09143221 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -513,7 +513,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, tell_host(vb, vb->inflate_vq); /* balloon's page migration 2nd step -- deflate "page" */ + spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_delete(page); + spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb, vb->pfns, page); tell_host(vb, vb->deflate_vq); -- cgit 1.2.3-korg From 2c3ee0e1779d2e08bc08734bc8475daaf94d0ba4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 11:41:56 -0300 Subject: tools headers uapi: Update tools's copy of linux/perf_event.h To get the changes in: 6cbc304f2f36 ("perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)") That do not imply any changes in the tooling side, the (ab)use of sample_type is entirely done in kernel space, nothing for userspace to witness here. This cures the following warning during perf's build: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Prashant Bhole Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-o64mjoy35s9gd1gitunw1zg4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b8e288a1f7409..eeb787b1c53c7 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,6 +143,8 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, }; /* -- cgit 1.2.3-korg From 7def16d1d2668a4a3663291c9ace307b81934704 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 11:48:19 -0300 Subject: tools headers powerpc: Update asm/unistd.h copy to pick new The new 'io_pgetevents' syscall was wired up in PowerPC in the following cset: b2f82565f2ca ("powerpc: Wire up io_pgetevents") Update tools/arch/powerpc/ copy of the asm/unistd.h file so that 'perf trace' on PowerPC gets it in its syscall table. This elliminated the following perf build warning: Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/unistd.h' differs from latest version at 'arch/powerpc/include/uapi/asm/unistd.h' Cc: Alexander Shishkin Cc: Breno Leitao Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: linuxppc-dev@lists.ozlabs.org Cc: Michael Ellerman Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Link: https://lkml.kernel.org/n/tip-9uvu7tz4ud3bxxfyxwryuz47@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/unistd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h index ac5ba55066dd7..985534d0b448b 100644 --- a/tools/arch/powerpc/include/uapi/asm/unistd.h +++ b/tools/arch/powerpc/include/uapi/asm/unistd.h @@ -399,5 +399,6 @@ #define __NR_pkey_free 385 #define __NR_pkey_mprotect 386 #define __NR_rseq 387 +#define __NR_io_pgetevents 388 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit 1.2.3-korg From fc73bfd6005c7fe5c3a2f04d4db7fa5d37cd3ebd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 11:56:13 -0300 Subject: tools headers uapi: Refresh linux/bpf.h copy To get the changes in: 4c79579b44b1 ("bpf: Change bpf_fib_lookup to return lookup status") That do not entail changes in tools/perf/ use of it, elliminating the following perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yei494y6b3mn6bjzz9g0ws12@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 59b19b6a40d73..b7db3261c62d1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1857,7 +1857,8 @@ union bpf_attr { * is resolved), the nexthop address is returned in ipv4_dst * or ipv6_dst based on family, smac is set to mac address of * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only). + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. * * *plen* argument is the size of the passed in struct. * *flags* argument can be a combination of one or more of the @@ -1873,9 +1874,10 @@ union bpf_attr { * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. * Return - * Egress device index on success, 0 if packet needs to continue - * up the stack for further processing or a negative error in case - * of failure. + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * * packet is not forwarded or needs assist from full stack * * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * Description @@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args { #define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_OUTPUT BIT(1) +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + struct bpf_fib_lookup { /* input: network family for lookup (AF_INET, AF_INET6) * output: network family of egress nexthop @@ -2625,7 +2639,11 @@ struct bpf_fib_lookup { /* total length of packet from network header - used for MTU check */ __u16 tot_len; - __u32 ifindex; /* L3 device index for lookup */ + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; union { /* inputs to lookup */ -- cgit 1.2.3-korg From 1f27a050fc679d16e68a40e0bb575364a89fad66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 12:26:54 -0300 Subject: tools arch: Update arch/x86/lib/memcpy_64.S copy used in 'perf bench mem memcpy' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To cope with the changes in: 12c89130a56a ("x86/asm/memcpy_mcsafe: Add write-protection-fault handling") 60622d68227d ("x86/asm/memcpy_mcsafe: Return bytes remaining") bd131544aa7e ("x86/asm/memcpy_mcsafe: Add labels for __memcpy_mcsafe() write fault handling") da7bc9c57eb0 ("x86/asm/memcpy_mcsafe: Remove loop unrolling") This needed introducing a file with a copy of the mcsafe_handle_tail() function, that is used in the new memcpy_64.S file, as well as a dummy mcsafe_test.h header. Testing it: $ nm ~/bin/perf | grep mcsafe 0000000000484130 T mcsafe_handle_tail 0000000000484300 T __memcpy_mcsafe $ $ perf bench mem memcpy # Running 'mem/memcpy' benchmark: # function 'default' (Default memcpy() provided by glibc) # Copying 1MB bytes ... 44.389205 GB/sec # function 'x86-64-unrolled' (unrolled memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 22.710756 GB/sec # function 'x86-64-movsq' (movsq-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 42.459239 GB/sec # function 'x86-64-movsb' (movsb-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 42.459239 GB/sec $ This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' Cc: Adrian Hunter Cc: Dan Williams Cc: David Ahern Cc: Jiri Olsa Cc: Mika Penttilä Cc: Namhyung Kim Cc: Tony Luck Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-igdpciheradk3gb3qqal52d0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/mcsafe_test.h | 13 ++++ tools/arch/x86/lib/memcpy_64.S | 112 +++++++++++++++---------------- tools/perf/bench/Build | 1 + tools/perf/bench/mem-memcpy-x86-64-asm.S | 1 + tools/perf/bench/mem-memcpy-x86-64-lib.c | 24 +++++++ 5 files changed, 93 insertions(+), 58 deletions(-) create mode 100644 tools/arch/x86/include/asm/mcsafe_test.h create mode 100644 tools/perf/bench/mem-memcpy-x86-64-lib.c diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 0000000000000..2ccd588fbad45 --- /dev/null +++ b/tools/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* _MCSAFE_TEST_H_ */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 9a53a06e5a3ef..298ef1479240b 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -183,12 +184,15 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_TEST_CTL + /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -204,58 +208,33 @@ ENTRY(memcpy_mcsafe_unrolled) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_read_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi + MCSAFE_TEST_SRC %rsi 8 .E_read_words + MCSAFE_TEST_DST %rdi 8 .E_write_words +.L_write_words: + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -264,38 +243,55 @@ ENTRY(memcpy_mcsafe_unrolled) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* + * Return number of bytes not copied for any failure. Note that + * there is no "tail" handling since the source buffer is 8-byte + * aligned and poison is cacheline aligned. + */ +.E_read_words: + shll $3, %ecx +.E_leading_bytes: + addl %edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret + /* + * For write fault handling, given the destination is unaligned, + * we handle faults on multi-byte writes with a byte-by-byte + * copy up to the write-protected page. + */ +.E_write_words: + shll $3, %ecx + addl %edx, %ecx + movl %ecx, %edx + jmp mcsafe_handle_tail + .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 60bf119430479..eafce1a130a17 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -7,6 +7,7 @@ perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index b43f8d2a34ec1..9ad015a1e2024 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -6,6 +6,7 @@ #define altinstr_replacement text #define globl p2align 4; .globl #define _ASM_EXTABLE_FAULT(x, y) +#define _ASM_EXTABLE(x, y) #include "../../arch/x86/lib/memcpy_64.S" /* diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c new file mode 100644 index 0000000000000..4130734dde84b --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-lib.c @@ -0,0 +1,24 @@ +/* + * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy + * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' + * happy. + */ +#include + +unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); + +unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) { + /* + * Call the assembly routine back directly since + * memcpy_mcsafe() may silently fallback to memcpy. + */ + unsigned long rem = __memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} -- cgit 1.2.3-korg From 44fe619b1418ff4e9d2f9518a940fbe2fb686a08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 13:15:03 -0300 Subject: perf tools: Fix the build on the alpine:edge distro The UAPI file byteorder/little_endian.h uses the __always_inline define without including the header where it is defined, linux/stddef.h, this ends up working in all the other distros because that file gets included seemingly by luck from one of the files included from little_endian.h. But not on Alpine:edge, that fails for all files where perf_event.h is included but linux/stddef.h isn't include before that. Adding the missing linux/stddef.h file where it breaks on Alpine:edge to fix that, in all other distros, that is just a very small header anyway. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-9r1pifftxvuxms8l7ir73p5l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/pmu.c | 1 + tools/perf/arch/x86/util/tsc.c | 1 + tools/perf/perf.h | 1 + tools/perf/util/header.h | 1 + tools/perf/util/namespaces.h | 1 + 5 files changed, 5 insertions(+) diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index 63a74c32ddc5d..e33ef5bc31c57 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include "../../util/intel-pt.h" diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 06bae7023a510..950539f9a4f77 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -2,6 +2,7 @@ #include #include +#include #include #include "../../perf.h" diff --git a/tools/perf/perf.h b/tools/perf/perf.h index a1a97956136f9..d215714f48df5 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -5,6 +5,7 @@ #include #include #include +#include #include extern bool test_attr__enabled; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 90d4577a92dc6..6d7fe44aadc0d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -2,6 +2,7 @@ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H +#include #include #include #include diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 760558dcfd181..cae1a9a397222 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -10,6 +10,7 @@ #define __PERF_NAMESPACES_H #include +#include #include #include #include -- cgit 1.2.3-korg From 822fb18a82abaf4ee7058793d95d340f5dab7bfc Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Fri, 27 Jul 2018 17:56:08 +0800 Subject: xen-netfront: wait xenbus state change when load module manually When loading module manually, after call xenbus_switch_state to initializes the state of the netfront device, the driver state did not change so fast that may lead no dev created in latest kernel. This patch adds wait to make sure xenbus knows the driver is not in closed/unknown state. Current state: [vm]# ethtool eth0 Settings for eth0: Link detected: yes [vm]# modprobe -r xen_netfront [vm]# modprobe xen_netfront [vm]# ethtool eth0 Settings for eth0: Cannot get device settings: No such device Cannot get wake-on-lan settings: No such device Cannot get message level: No such device Cannot get link status: No such device No data available With the patch installed. [vm]# ethtool eth0 Settings for eth0: Link detected: yes [vm]# modprobe -r xen_netfront [vm]# modprobe xen_netfront [vm]# ethtool eth0 Settings for eth0: Link detected: yes Signed-off-by: Xiao Liang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a57daecf1d574..2d8812dd1534a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,6 +87,7 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_load_q); static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); struct netfront_stats { @@ -1330,6 +1331,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); xenbus_switch_state(dev, XenbusStateInitialising); + wait_event(module_load_q, + xenbus_read_driver_state(dev->otherend) != + XenbusStateClosed && + xenbus_read_driver_state(dev->otherend) != + XenbusStateUnknown); return netdev; exit: -- cgit 1.2.3-korg From df18b50448fab1dff093731dfd0e25e77e1afcd1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 30 Jul 2018 16:23:10 +0200 Subject: net/ipv6: fix metrics leak Since commit d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info"), ipv6 metrics are shared and refcounted. rt6_set_from() assigns the rt->from pointer and increases the refcount on from's metrics. This reference is never released. Introduce the fib6_metrics_release() helper and use it to release the metrics. Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d212738e9d100..211a2d437b565 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -167,11 +167,22 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) return f6i; } +static void fib6_metrics_release(struct fib6_info *f6i) +{ + struct dst_metrics *m; + + if (!f6i) + return; + + m = f6i->fib6_metrics; + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) + kfree(m); +} + void fib6_info_destroy_rcu(struct rcu_head *head) { struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); struct rt6_exception_bucket *bucket; - struct dst_metrics *m; WARN_ON(f6i->fib6_node); @@ -201,9 +212,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head) if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); - m = f6i->fib6_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); + fib6_metrics_release(f6i); kfree(f6i); } @@ -887,6 +896,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, from = rcu_dereference_protected(pcpu_rt->from, lockdep_is_held(&table->tb6_lock)); + fib6_metrics_release(from); rcu_assign_pointer(pcpu_rt->from, NULL); fib6_info_release(from); } -- cgit 1.2.3-korg From 386177da9e601ed176d54c04324d9ebf44c70620 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:44 +0300 Subject: ARC: add SMP_CACHE_BYTES value validate Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger or equal to any cache line length by comparing it with values previously read from ARC cache BCR registers. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9dbe645ee127e..b95365e1253a2 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1246,6 +1246,16 @@ void __init arc_cache_init_master(void) } } + /* + * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger + * or equal to any cache line length. + */ + BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES, + "SMP_CACHE_BYTES must be >= any cache line length"); + if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES)) + panic("L2 Cache line [%d] > kernel Config [%d]\n", + l2_line_sz, SMP_CACHE_BYTES); + /* Note that SLC disable not formally supported till HS 3.0 */ if (is_isa_arcv2() && l2_line_sz && !slc_enable) arc_slc_disable(); -- cgit 1.2.3-korg From 05b466bf846d2e8d2f0baf8dfd81a42cc933e237 Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Sat, 28 Jul 2018 10:54:41 +0300 Subject: ARC: [plat-eznps] Add missing struct nps_host_reg_aux_dpc Fixing compilation issue caused by missing struct nps_host_reg_aux_dpc definition. Fixes: 3f9cd874dcc87 ("ARC: [plat-eznps] avoid toggling of DPC register") Reported-by: Randy Dunlap Signed-off-by: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index 0c7d11022d0f8..bd34b96bc5915 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -143,6 +143,15 @@ struct nps_host_reg_gim_p_int_dst { }; /* AUX registers definition */ +struct nps_host_reg_aux_dpc { + union { + struct { + u32 ien:1, men:1, hen:1, reserved:29; + }; + u32 value; + }; +}; + struct nps_host_reg_aux_udmc { union { struct { -- cgit 1.2.3-korg From b1f32ce1c3d2c11959b7e6a2c58dc5197c581966 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Jul 2018 11:10:51 -0700 Subject: arc: [plat-eznps] fix data type errors in platform headers Add to fix build errors. Both ctop.h and use u32 types and cause many errors. Examples: ../include/soc/nps/common.h:71:4: error: unknown type name 'u32' u32 __reserved:20, cluster:4, core:4, thread:4; ../include/soc/nps/common.h:76:3: error: unknown type name 'u32' u32 value; ../include/soc/nps/common.h:124:4: error: unknown type name 'u32' u32 base:8, cl_x:4, cl_y:4, ../include/soc/nps/common.h:127:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:83:4: error: unknown type name 'u32' u32 gen:1, gdis:1, clk_gate_dis:1, asb:1, ../arch/arc/plat-eznps/include/plat/ctop.h:86:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:93:4: error: unknown type name 'u32' u32 csa:22, dmsid:6, __reserved:3, cs:1; ../arch/arc/plat-eznps/include/plat/ctop.h:95:3: error: unknown type name 'u32' u32 value; Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index bd34b96bc5915..4f6a1673b3a6e 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -21,6 +21,7 @@ #error "Incorrect ctop.h include" #endif +#include #include /* core auxiliary registers */ -- cgit 1.2.3-korg From 9e2ea405543d9ddfe05b351f1679e53bd9c11f80 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: [plat-eznps] fix printk warning in arc/plat-eznps/mtm.c Fix printk format warning in arch/arc/plat-eznps/mtm.c: In file included from ../include/linux/printk.h:7, from ../include/linux/kernel.h:14, from ../include/linux/list.h:9, from ../include/linux/smp.h:12, from ../arch/arc/plat-eznps/mtm.c:17: ../arch/arc/plat-eznps/mtm.c: In function 'set_mtm_hs_ctr': ../include/linux/kern_levels.h:5:18: warning: format '%d' expects argument of type 'int', but argument 2 has type 'long int' [-Wformat=] #define KERN_SOH "\001" /* ASCII Start Of Header */ ^~~~~~ ../include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH' #define KERN_ERR KERN_SOH "3" /* error conditions */ ^~~~~~~~ ../include/linux/printk.h:308:9: note: in expansion of macro 'KERN_ERR' printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~ ../arch/arc/plat-eznps/mtm.c:166:3: note: in expansion of macro 'pr_err' pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ^~~~~~ ../arch/arc/plat-eznps/mtm.c:166:40: note: format string is defined here pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ~^ %ld The hs_ctr variable can just be int instead of long, so also change kstrtol() to kstrtoint() and leave the format string as %d. Also add 2 header files since they are used in mtm.c and we prefer not to depend on accidental/indirect #includes. Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/mtm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c index 2388de3d09ef9..ed0077ef666eb 100644 --- a/arch/arc/plat-eznps/mtm.c +++ b/arch/arc/plat-eznps/mtm.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include @@ -157,10 +159,10 @@ void mtm_enable_core(unsigned int cpu) /* Verify and set the value of the mtm hs counter */ static int __init set_mtm_hs_ctr(char *ctr_str) { - long hs_ctr; + int hs_ctr; int ret; - ret = kstrtol(ctr_str, 0, &hs_ctr); + ret = kstrtoint(ctr_str, 0, &hs_ctr); if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) { pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", -- cgit 1.2.3-korg From 2423665ec53f2a29191b35382075e9834288a975 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: fix build errors in arc/include/asm/delay.h Fix build errors in arch/arc/'s delay.h: - add "extern unsigned long loops_per_jiffy;" - add for "u64" In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:61:12: error: 'u64' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~ In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:63:37: error: 'loops_per_jiffy' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/include/asm/delay.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index d5da2115d78a6..03d6bb0f4e13a 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -17,8 +17,11 @@ #ifndef __ASM_ARC_UDELAY_H #define __ASM_ARC_UDELAY_H +#include #include /* HZ */ +extern unsigned long loops_per_jiffy; + static inline void __delay(unsigned long loops) { __asm__ __volatile__( -- cgit 1.2.3-korg From ec837d620c750c0d4996a907c8c4f7febe1bbeee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: fix type warnings in arc/mm/cache.c Fix type warnings in arch/arc/mm/cache.c. ../arch/arc/mm/cache.c: In function 'flush_anon_page': ../arch/arc/mm/cache.c:1062:55: warning: passing argument 2 of '__flush_dcache_page' makes integer from pointer without a cast [-Wint-conversion] __flush_dcache_page((phys_addr_t)page_address(page), page_address(page)); ^~~~~~~~~~~~~~~~~~ ../arch/arc/mm/cache.c:1013:59: note: expected 'long unsigned int' but argument is of type 'void *' void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) ~~~~~~~~~~~~~~^~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b95365e1253a2..25c631942500f 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1038,7 +1038,7 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, unsigned long pfn) { - unsigned int paddr = pfn << PAGE_SHIFT; + phys_addr_t paddr = pfn << PAGE_SHIFT; u_vaddr &= PAGE_MASK; @@ -1058,8 +1058,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long u_vaddr) { /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_page(page_address(page), u_vaddr); - __flush_dcache_page(page_address(page), page_address(page)); + __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr); + __flush_dcache_page((phys_addr_t)page_address(page), + (phys_addr_t)page_address(page)); } -- cgit 1.2.3-korg From 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 30 Jul 2018 18:32:36 +0100 Subject: netlink: Don't shift with UB on nlk->ngroups On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in hang during boot. Check for 0 ngroups and use (unsigned long long) as a type to shift. Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups"). Reported-by: kernel test robot Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ac805caed2e2c..7d860a22e5fb8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1008,7 +1008,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (err) return err; } - groups &= (1UL << nlk->ngroups) - 1; + + if (nlk->ngroups == 0) + groups = 0; + else + groups &= (1ULL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- cgit 1.2.3-korg From 6f57ed681ed817a4ec444e83f3aa2ad695d5ef34 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 Jun 2018 10:11:10 -0400 Subject: sparc/time: Add missing __init to init_tick_ops() Code that was added to force gcc not to inline any function that isn't explicitly declared as inline uncovered that init_tick_ops() isn't marked as "__init". It is only called by __init functions and more importantly it too calls an __init function which would require it to be __init as well. Link: http://lkml.kernel.org/r/201806060444.hdHcKOBy%fengguang.wu@intel.com Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) Signed-off-by: David S. Miller --- arch/sparc/kernel/time_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 2ef8cfa9677ed..f0eba72aa1ad6 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -814,7 +814,7 @@ static void __init get_tick_patch(void) } } -static void init_tick_ops(struct sparc64_tick_ops *ops) +static void __init init_tick_ops(struct sparc64_tick_ops *ops) { unsigned long freq, quotient, tick; -- cgit 1.2.3-korg From f0afc6b18d3953fb96f836e4d1483eb9855a36b0 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 24 Jul 2018 13:53:04 +0200 Subject: sparc: move MSI related definitions to where they are used The definitions in arch/sparc/include/asm/msi.h are only used in arch/sparc/mm/srmmu.c, so it makes sense to have them in the C file directly. In addition, having a custom arch/sparc/include/asm/msi.h prevents from using the asm-generic version of this header, which is necessary to be able to include when CONFIG_GENERIC_MSI_IRQ_DOMAIN is enabled. Signed-off-by: Thomas Petazzoni Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/msi.h | 32 -------------------------------- arch/sparc/mm/srmmu.c | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 33 deletions(-) delete mode 100644 arch/sparc/include/asm/msi.h diff --git a/arch/sparc/include/asm/msi.h b/arch/sparc/include/asm/msi.h deleted file mode 100644 index 3c17c10744313..0000000000000 --- a/arch/sparc/include/asm/msi.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * msi.h: Defines specific to the MBus - Sbus - Interface. - * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) - */ - -#ifndef _SPARC_MSI_H -#define _SPARC_MSI_H - -/* - * Locations of MSI Registers. - */ -#define MSI_MBUS_ARBEN 0xe0001008 /* MBus Arbiter Enable register */ - -/* - * Useful bits in the MSI Registers. - */ -#define MSI_ASYNC_MODE 0x80000000 /* Operate the MSI asynchronously */ - - -static inline void msi_set_sync(void) -{ - __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t" - "andn %%g3, %2, %%g3\n\t" - "sta %%g3, [%0] %1\n\t" : : - "r" (MSI_MBUS_ARBEN), - "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3"); -} - -#endif /* !(_SPARC_MSI_H) */ diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 1d70c3f6d9868..be9cb00651792 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -116,6 +115,25 @@ static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp) set_pte((pte_t *)ctxp, pte); } +/* + * Locations of MSI Registers. + */ +#define MSI_MBUS_ARBEN 0xe0001008 /* MBus Arbiter Enable register */ + +/* + * Useful bits in the MSI Registers. + */ +#define MSI_ASYNC_MODE 0x80000000 /* Operate the MSI asynchronously */ + +static void msi_set_sync(void) +{ + __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t" + "andn %%g3, %2, %%g3\n\t" + "sta %%g3, [%0] %1\n\t" : : + "r" (MSI_MBUS_ARBEN), + "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3"); +} + void pmd_set(pmd_t *pmdp, pte_t *ptep) { unsigned long ptp; /* Physical address, shifted right by 4 */ -- cgit 1.2.3-korg From 12be1036c536f849ad6f9bba73cffa708aa965c3 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 24 Jul 2018 13:53:05 +0200 Subject: sparc: use asm-generic version of msi.h This is necessary to be able to include when CONFIG_GENERIC_MSI_IRQ_DOMAIN is enabled. Without this, a build with CONFIG_GENERIC_MSI_IRQ_DOMAIN fails with: In file included from drivers//ata/ahci.c:45:0: >> include/linux/msi.h:226:10: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:230:9: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:239:12: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:240:22: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? void (*msi_finish)(msi_alloc_info_t *arg, int retval); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:241:20: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? void (*set_desc)(msi_alloc_info_t *arg, ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:316:18: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? int nvec, msi_alloc_info_t *args); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:318:29: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? int virq, int nvec, msi_alloc_info_t *args); ^~~~~~~~~~~~~~~~ sg_alloc_fn Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index ac67828da2010..410b263ef5c84 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += module.h +generic-y += msi.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h -- cgit 1.2.3-korg From b305f7ed0f4f494ad6f3ef5667501535d5a8fa31 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 25 Jul 2018 10:26:19 +0800 Subject: audit: fix potential null dereference 'context->module.name' The variable 'context->module.name' may be null pointer when kmalloc return null, so it's better to check it before using to avoid null dereference. Another one more thing this patch does is using kstrdup instead of (kmalloc + strcpy), and signal a lost record via audit_log_lost. Cc: stable@vger.kernel.org # 4.11 Signed-off-by: Yi Wang Reviewed-by: Jiang Biao Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditsc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ceb1c4596c511..80d672a110888 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1279,8 +1279,12 @@ static void show_special(struct audit_context *context, int *call_panic) break; case AUDIT_KERN_MODULE: audit_log_format(ab, "name="); - audit_log_untrustedstring(ab, context->module.name); - kfree(context->module.name); + if (context->module.name) { + audit_log_untrustedstring(ab, context->module.name); + kfree(context->module.name); + } else + audit_log_format(ab, "(null)"); + break; } audit_log_end(ab); @@ -2411,8 +2415,9 @@ void __audit_log_kern_module(char *name) { struct audit_context *context = audit_context(); - context->module.name = kmalloc(strlen(name) + 1, GFP_KERNEL); - strcpy(context->module.name, name); + context->module.name = kstrdup(name, GFP_KERNEL); + if (!context->module.name) + audit_log_lost("out of memory in __audit_log_kern_module"); context->type = AUDIT_KERN_MODULE; } -- cgit 1.2.3-korg From 573b3aa6940661dc50c383213d428c27df78be7c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 30 Jul 2018 08:49:03 -0700 Subject: tools/bpftool: fix a percpu_array map dump problem I hit the following problem when I tried to use bpftool to dump a percpu array. $ sudo ./bpftool map show 61: percpu_array name stub flags 0x0 key 4B value 4B max_entries 1 memlock 4096B ... $ sudo ./bpftool map dump id 61 bpftool: malloc.c:2406: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || \ ((unsigned long) (old_size) >= MINSIZE && \ prev_inuse (old_top) && \ ((unsigned long) old_end & (pagesize - 1)) == 0)' failed. Aborted Further debugging revealed that this is due to miscommunication between bpftool and kernel. For example, for the above percpu_array with value size of 4B. The map info returned to user space has value size of 4B. In bpftool, the values array for lookup is allocated like: info->value_size * get_possible_cpus() = 4 * get_possible_cpus() In kernel (kernel/bpf/syscall.c), the values array size is rounded up to multiple of 8. round_up(map->value_size, 8) * num_possible_cpus() = 8 * num_possible_cpus() So when kernel copies the values to user buffer, the kernel will overwrite beyond user buffer boundary. This patch fixed the issue by allocating and stepping through percpu map value array properly in bpftool. Fixes: 71bb428fe2c19 ("tools: bpf: add bpftool") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 097b1a5e046b2..f74a8bcbda874 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -90,7 +91,8 @@ static bool map_is_map_of_progs(__u32 type) static void *alloc_value(struct bpf_map_info *info) { if (map_is_per_cpu(info->type)) - return malloc(info->value_size * get_possible_cpus()); + return malloc(round_up(info->value_size, 8) * + get_possible_cpus()); else return malloc(info->value_size); } @@ -161,9 +163,10 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, jsonw_name(json_wtr, "value"); print_hex_data_json(value, info->value_size); } else { - unsigned int i, n; + unsigned int i, n, step; n = get_possible_cpus(); + step = round_up(info->value_size, 8); jsonw_name(json_wtr, "key"); print_hex_data_json(key, info->key_size); @@ -176,7 +179,7 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, jsonw_int_field(json_wtr, "cpu", i); jsonw_name(json_wtr, "value"); - print_hex_data_json(value + i * info->value_size, + print_hex_data_json(value + i * step, info->value_size); jsonw_end_object(json_wtr); @@ -207,9 +210,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, printf("\n"); } else { - unsigned int i, n; + unsigned int i, n, step; n = get_possible_cpus(); + step = round_up(info->value_size, 8); printf("key:\n"); fprint_hex(stdout, key, info->key_size, " "); @@ -217,7 +221,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - fprint_hex(stdout, value + i * info->value_size, + fprint_hex(stdout, value + i * step, info->value_size, " "); printf("\n"); } -- cgit 1.2.3-korg From 2d55d614fcf58187e2937dba11643b9471cd64d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 27 Jul 2018 20:20:08 -0700 Subject: net: xsk: don't return frames via the allocator on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_return_buff() is used when frame has been successfully handled (transmitted) or if an error occurred during delayed processing and there is no way to report it back to xdp_do_redirect(). In case of __xsk_rcv_zc() error is propagated all the way back to the driver, so there is no need to call xdp_return_buff(). Driver will recycle the frame anyway after seeing that error happened. Fixes: 173d3adb6f43 ("xsk: add zero-copy support for Rx") Signed-off-by: Jakub Kicinski Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72335c2e81089..4e937cd7c17dc 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -84,10 +84,8 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len); - if (err) { - xdp_return_buff(xdp); + if (err) xs->rx_dropped++; - } return err; } -- cgit 1.2.3-korg From d512584780d3e6a7cacb2f482834849453d444a1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Jul 2018 14:27:15 -0700 Subject: squashfs: more metadata hardening Anatoly reports another squashfs fuzzing issue, where the decompression parameters themselves are in a compressed block. This causes squashfs_read_data() to be called in order to read the decompression options before the decompression stream having been set up, making squashfs go sideways. Reported-by: Anatoly Trosinenko Acked-by: Phillip Lougher Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/squashfs/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2751476e6b6e8..f098b9f1c3963 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, } if (compressed) { + if (!msblk->stream) + goto read_failure; length = squashfs_decompress(msblk, bh, b, offset, length, output); if (length < 0) -- cgit 1.2.3-korg From 156c8b58ef5cfd97245928c95669fd4cb0f9c388 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Jul 2018 08:28:08 -0400 Subject: perf/x86/intel/uncore: Fix hardcoded index of Broadwell extra PCI devices Masayoshi Mizuma reported that a warning message is shown while a CPU is hot-removed on Broadwell servers: WARNING: CPU: 126 PID: 6 at arch/x86/events/intel/uncore.c:988 uncore_pci_remove+0x10b/0x150 Call Trace: pci_device_remove+0x42/0xd0 device_release_driver_internal+0x148/0x220 pci_stop_bus_device+0x76/0xa0 pci_stop_root_bus+0x44/0x60 acpi_pci_root_remove+0x1f/0x80 acpi_bus_trim+0x57/0x90 acpi_bus_trim+0x2e/0x90 acpi_device_hotplug+0x2bc/0x4b0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x174/0x3a0 worker_thread+0x4c/0x3d0 kthread+0xf8/0x130 This bug was introduced by: commit 15a3e845b01c ("perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs") The index of "QPI Port 2 filter" was hardcode to 2, but this conflicts with the index of "PCU.3" which is "HSWEP_PCI_PCU_3", which equals to 2 as well. To fix the conflict, the hardcoded index needs to be cleaned up: - introduce a new enumerator "BDX_PCI_QPI_PORT2_FILTER" for "QPI Port 2 filter" on Broadwell, - increase UNCORE_EXTRA_PCI_DEV_MAX by one, - clean up the hardcoded index. Debugged-by: Masayoshi Mizuma Suggested-by: Ingo Molnar Reported-by: Masayoshi Mizuma Tested-by: Masayoshi Mizuma Signed-off-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: msys.mizuma@gmail.com Cc: stable@vger.kernel.org Fixes: 15a3e845b01c ("perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs") Link: http://lkml.kernel.org/r/1532953688-15008-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.h | 2 +- arch/x86/events/intel/uncore_snbep.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index c9e1e0bef3c36..e17ab885b1e92 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -28,7 +28,7 @@ #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff -#define UNCORE_EXTRA_PCI_DEV_MAX 3 +#define UNCORE_EXTRA_PCI_DEV_MAX 4 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 87dc0263a2e1e..51d7c117e3c70 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void) enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, + BDX_PCI_QPI_PORT2_FILTER, HSWEP_PCI_PCU_3, }; @@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { }, { /* QPI Port 0 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), }, { /* QPI Port 1 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), }, { /* QPI Port 2 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + BDX_PCI_QPI_PORT2_FILTER), }, { /* PCU.3 (for Capability registers) */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), -- cgit 1.2.3-korg From 01e61a42a5d345a4c0205889498f0c9a0fb9ee8c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 30 Jul 2018 15:00:29 -0700 Subject: cpufreq: intel_pstate: Limit the scope of HWP dynamic boost platforms Dynamic boosting of HWP performance on IO wake showed significant improvement to IO workloads. This series was intended for Skylake Xeon platforms only and feature was enabled by default based on CPU model number. But some Xeon platforms reused the Skylake desktop CPU model number. This caused some undesirable side effects to some graphics workloads. Since they are heavily IO bound, the increase in CPU performance decreased the power available for GPU to do its computing and hence decrease in graphics benchmark performance. For example on a Skylake desktop, GpuTest benchmark showed average FPS reduction from 529 to 506. This change makes sure that HWP boost feature is only enabled for Skylake server platforms by using ACPI FADT preferred PM Profile. If some desktop users wants to get benefit of boost, they can still enable boost from intel_pstate sysfs attribute "hwp_dynamic_boost". Fixes: 41ab43c9c89e (cpufreq: intel_pstate: enable boost for Skylake Xeon) Link: https://bugs.freedesktop.org/show_bug.cgi?id=107410 Reported-by: Eero Tamminen Signed-off-by: Srinivas Pandruvada Reviewed-by: Francisco Jerez Acked-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3c39712561303..d4ed0022b0dd2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -311,12 +311,20 @@ static DEFINE_MUTEX(intel_pstate_limits_lock); #ifdef CONFIG_ACPI -static bool intel_pstate_get_ppc_enable_status(void) +static bool intel_pstate_acpi_pm_profile_server(void) { if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) return true; + return false; +} + +static bool intel_pstate_get_ppc_enable_status(void) +{ + if (intel_pstate_acpi_pm_profile_server()) + return true; + return acpi_ppc; } @@ -459,6 +467,11 @@ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *pol static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) { } + +static inline bool intel_pstate_acpi_pm_profile_server(void) +{ + return false; +} #endif static inline void update_turbo_state(void) @@ -1841,7 +1854,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_hwp_enable(cpu); id = x86_match_cpu(intel_pstate_hwp_boost_ids); - if (id) + if (id && intel_pstate_acpi_pm_profile_server()) hwp_boost = true; } -- cgit 1.2.3-korg From c7513c2a2714204d3588ecaa170ae628fd0d217e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 27 Jul 2018 14:59:15 +0200 Subject: crypto/arm64: aes-ce-gcm - add missing kernel_neon_begin/end pair Calling pmull_gcm_encrypt_block() requires kernel_neon_begin() and kernel_neon_end() to be used since the routine touches the NEON register file. Add the missing calls. Also, since NEON register contents are not preserved outside of a kernel mode NEON region, pass the key schedule array again. Fixes: 7c50136a8aba ("crypto: arm64/aes-ghash - yield NEON after every ...") Acked-by: Herbert Xu Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/crypto/ghash-ce-glue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 7cf0b1aa6ea80..8a10f1d7199ae 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -488,9 +488,13 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes) - pmull_gcm_encrypt_block(iv, iv, NULL, + if (walk.nbytes) { + kernel_neon_begin(); + pmull_gcm_encrypt_block(iv, iv, ctx->aes_key.key_enc, num_rounds(&ctx->aes_key)); + kernel_neon_end(); + } + } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); -- cgit 1.2.3-korg From 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76 Mon Sep 17 00:00:00 2001 From: Hari Vyas Date: Tue, 3 Jul 2018 14:35:41 +0530 Subject: PCI: Fix is_added/is_busmaster race condition When a PCI device is detected, pdev->is_added is set to 1 and proc and sysfs entries are created. When the device is removed, pdev->is_added is checked for one and then device is detached with clearing of proc and sys entries and at end, pdev->is_added is set to 0. is_added and is_busmaster are bit fields in pci_dev structure sharing same memory location. A strange issue was observed with multiple removal and rescan of a PCIe NVMe device using sysfs commands where is_added flag was observed as zero instead of one while removing device and proc,sys entries are not cleared. This causes issue in later device addition with warning message "proc_dir_entry" already registered. Debugging revealed a race condition between the PCI core setting the is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue setting the is_busmaster bit in pci_set_master(). As these fields are not handled atomically, that clears the is_added bit. Move the is_added bit to a separate private flag variable and use atomic functions to set and retrieve the device addition state. This avoids the race because is_added no longer shares a memory location with is_busmaster. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283 Signed-off-by: Hari Vyas Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 4 +++- arch/powerpc/platforms/powernv/pci-ioda.c | 3 ++- arch/powerpc/platforms/pseries/setup.c | 3 ++- drivers/pci/bus.c | 6 +++--- drivers/pci/hotplug/acpiphp_glue.c | 2 +- drivers/pci/pci.h | 11 +++++++++++ drivers/pci/probe.c | 4 ++-- drivers/pci/remove.c | 5 +++-- include/linux/pci.h | 1 - 9 files changed, 27 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index fe9733ffffaa4..471aac313b899 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -42,6 +42,8 @@ #include #include +#include "../../../drivers/pci/pci.h" + /* hose_spinlock protects accesses to the the phb_bitmap. */ static DEFINE_SPINLOCK(hose_spinlock); LIST_HEAD(hose_list); @@ -1014,7 +1016,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus) /* Cardbus can call us to add new devices to a bus, so ignore * those who are already fully discovered */ - if (dev->is_added) + if (pci_dev_is_added(dev)) continue; pcibios_setup_device(dev); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bd0eb6681bcb..70b2e1e0f23c2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -46,6 +46,7 @@ #include "powernv.h" #include "pci.h" +#include "../../../../drivers/pci/pci.h" #define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ @@ -3138,7 +3139,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; pdn = pci_get_pdn(pdev); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 139f0af6c3d91..8a4868a3964ba 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include #include "pseries.h" +#include "../../../../drivers/pci/pci.h" int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) const int *indexes; struct device_node *dn = pci_device_to_OF_node(pdev); - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 35b7fc87eac50..5cb40b2518f93 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -330,7 +330,7 @@ void pci_bus_add_device(struct pci_dev *dev) return; } - dev->is_added = 1; + pci_dev_assign_added(dev, true); } EXPORT_SYMBOL_GPL(pci_bus_add_device); @@ -347,14 +347,14 @@ void pci_bus_add_devices(const struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip already-added devices */ - if (dev->is_added) + if (pci_dev_is_added(dev)) continue; pci_bus_add_device(dev); } list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip if device attach failed */ - if (!dev->is_added) + if (!pci_dev_is_added(dev)) continue; child = dev->subordinate; if (child) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 3a17b290df5dd..ef0b1b6ba86f8 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -509,7 +509,7 @@ static void enable_slot(struct acpiphp_slot *slot) list_for_each_entry(dev, &bus->devices, bus_list) { /* Assume that newly added devices are powered on already. */ - if (!dev->is_added) + if (!pci_dev_is_added(dev)) dev->current_state = PCI_D0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 882f1f9596dff..08817253c8a2f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -288,6 +288,7 @@ struct pci_sriov { /* pci_dev priv_flags */ #define PCI_DEV_DISCONNECTED 0 +#define PCI_DEV_ADDED 1 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { @@ -300,6 +301,16 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags); } +static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) +{ + assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added); +} + +static inline bool pci_dev_is_added(const struct pci_dev *dev) +{ + return test_bit(PCI_DEV_ADDED, &dev->priv_flags); +} + #ifdef CONFIG_PCI_ATS void pci_restore_ats_state(struct pci_dev *dev); #else diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ac876e32de4b0..611adcd9c1698 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2433,13 +2433,13 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) dev = pci_scan_single_device(bus, devfn); if (!dev) return 0; - if (!dev->is_added) + if (!pci_dev_is_added(dev)) nr++; for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { dev = pci_scan_single_device(bus, devfn + fn); if (dev) { - if (!dev->is_added) + if (!pci_dev_is_added(dev)) nr++; dev->multifunction = 1; } diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 6f072eae4f7a5..5e3d0dced2b8d 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -19,11 +19,12 @@ static void pci_stop_dev(struct pci_dev *dev) { pci_pme_active(dev, false); - if (dev->is_added) { + if (pci_dev_is_added(dev)) { device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); - dev->is_added = 0; + + pci_dev_assign_added(dev, false); } if (dev->bus->self) diff --git a/include/linux/pci.h b/include/linux/pci.h index abd5d5e17aeed..c133ccfa002e1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -368,7 +368,6 @@ struct pci_dev { unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int multifunction:1; /* Multi-function device */ - unsigned int is_added:1; unsigned int is_busmaster:1; /* Is busmaster */ unsigned int no_msi:1; /* May not use MSI */ unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ -- cgit 1.2.3-korg From 6751e7c66cb8689491b89fe02c71d1d44394412b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 31 Jul 2018 19:19:50 +0200 Subject: net: dsa: mv88e6xxx: Fix SERDES support on 88E6141/6341 Version 1 of the patch adding SERDES support to the 88E6141/6341 correctly added the ops to the 88E6141/6341. However, by the time version 3 was committed, the ops had moved to the 88E6085/6175. Put them back where they belong. Fixes: 5bafeb6e7e87 ("net: dsa: mv88e6xxx: 88E6141/6341 SERDES support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9ef07a06aceb6..bb28c701381a6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2617,7 +2617,6 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .rmu_disable = mv88e6085_g1_rmu_disable, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .serdes_power = mv88e6341_serdes_power, }; static const struct mv88e6xxx_ops mv88e6095_ops = { @@ -2783,6 +2782,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .serdes_power = mv88e6341_serdes_power, .gpio_ops = &mv88e6352_gpio_ops, }; @@ -2960,7 +2960,6 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, - .serdes_power = mv88e6341_serdes_power, }; static const struct mv88e6xxx_ops mv88e6176_ops = { @@ -3336,6 +3335,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .serdes_power = mv88e6341_serdes_power, .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6390_avb_ops, }; -- cgit 1.2.3-korg From 5f5991f36dce1e69dd8bd7495763eec2e28f08e7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 16 Jul 2018 11:49:27 +0300 Subject: net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager Execute mlx5_eswitch_init() only if we have MLX5_ESWITCH_MANAGER capabilities. Do the same for mlx5_eswitch_cleanup(). Fixes: a9f7705ffd66 ("net/mlx5: Unify vport manager capability check") Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index dd01ad4c0b547..40dba9e8af926 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1696,7 +1696,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int vport_num; int err; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; esw_info(dev, @@ -1765,7 +1765,7 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); -- cgit 1.2.3-korg From 2e8e70d249e8c5c79bf88bbb36bb68154ab15471 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 19 Jul 2018 16:17:00 +0000 Subject: net/mlx5e: Set port trust mode to PCP as default The hairpin offload code has dependency on the trust mode being PCP. Hence we should set PCP as the default for handling cases where we are disallowed to read the trust mode from the FW, or failed to initialize it. Fixes: 106be53b6b0a ('net/mlx5e: Set per priority hairpin pairs') Signed-off-by: Or Gerlitz Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 86bc9ac99586e..e33afa8d24179 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1172,6 +1172,8 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; int err; + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + if (!MLX5_DSCP_SUPPORTED(mdev)) return 0; -- cgit 1.2.3-korg From eacecf2760e321bf1c99b981d44a14bd05721f15 Mon Sep 17 00:00:00 2001 From: Adi Nissim Date: Tue, 5 Jun 2018 11:32:11 +0300 Subject: net/mlx5e: Fix null pointer access when setting MTU of vport representor MTU helper function is used by both conventional mlx5e instances (PF/VF) and the eswitch representors. The representor shouldn't change the nic vport context MTU, the VF is responsible for that. Therefore set_mtu_cb has a null value when changing the representor MTU. Fixes: 250a42b6a764 ("net/mlx5e: Support configurable MTU for vport representors") Signed-off-by: Adi Nissim Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dae4156a710dd..c592678ab5f14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3712,7 +3712,8 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (!reset) { params->sw_mtu = new_mtu; - set_mtu_cb(priv); + if (set_mtu_cb) + set_mtu_cb(priv); netdev->mtu = params->sw_mtu; goto out; } -- cgit 1.2.3-korg From 8e1d162d8e81838119de18b4ca1e302ce906f2a6 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Sun, 15 Jul 2018 13:59:36 +0300 Subject: net/mlx5e: IPoIB, Set the netdevice sw mtu in ipoib enhanced flow After introduction of the cited commit, mlx5e_build_nic_params receives the netdevice mtu in order to set the sw_mtu of mlx5e_params. For enhanced IPoIB, the netdevice mtu is not set in this stage, therefore, the initial sw_mtu equals zero. As a result, the hw_mtu of the receive queue will be calculated incorrectly causing traffic issues. To fix this issue, query for port mtu before building the nic params. Fixes: 472a1e44b349 ("net/mlx5e: Save MTU in channels params") Signed-off-by: Feras Daoud Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index af3bb2f7a5048..b7c21eb21a218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -76,6 +76,7 @@ void mlx5i_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + u16 max_mtu; /* priv init */ priv->mdev = mdev; @@ -84,6 +85,9 @@ void mlx5i_init(struct mlx5_core_dev *mdev, priv->ppriv = ppriv; mutex_init(&priv->state_lock); + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->mtu = max_mtu; + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); -- cgit 1.2.3-korg From 80d20d35af1edd632a5e7a3b9c0ab7ceff92769e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 31 Jul 2018 18:13:58 +0200 Subject: nohz: Fix local_timer_softirq_pending() local_timer_softirq_pending() checks whether the timer softirq is pending with: local_softirq_pending() & TIMER_SOFTIRQ. This is wrong because TIMER_SOFTIRQ is the softirq number and not a bitmask. So the test checks for the wrong bit. Use BIT(TIMER_SOFTIRQ) instead. Fixes: 5d62c183f9e9 ("nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()") Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Reviewed-by: Daniel Bristot de Oliveira Acked-by: Frederic Weisbecker Cc: bigeasy@linutronix.de Cc: peterz@infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180731161358.29472-1-anna-maria@linutronix.de --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da9455a6b42ba..5b33e2f5c0ed3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -642,7 +642,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & TIMER_SOFTIRQ; + return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) -- cgit 1.2.3-korg From 56e2c94f055d328f5f6b0a5c1721cca2f2d4e0a1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2018 20:09:11 -0700 Subject: inet: frag: enforce memory limits earlier We currently check current frags memory usage only when a new frag queue is created. This allows attackers to first consume the memory budget (default : 4 MB) creating thousands of frag queues, then sending tiny skbs to exceed high_thresh limit by 2 to 3 order of magnitude. Note that before commit 648700f76b03 ("inet: frags: use rhashtables for reassembly units"), work queue could be starved under DOS, getting no cpu cycles. After commit 648700f76b03, only the per frag queue timer can eventually remove an incomplete frag queue and its skbs. Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Signed-off-by: Eric Dumazet Reported-by: Jann Horn Cc: Florian Westphal Cc: Peter Oskolkov Cc: Paolo Abeni Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 1e4cf3ab560fa..0d70608cc2e18 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -157,9 +157,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, { struct inet_frag_queue *q; - if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) - return NULL; - q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; @@ -204,6 +201,9 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { struct inet_frag_queue *fq; + if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) + return NULL; + rcu_read_lock(); fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); -- cgit 1.2.3-korg From 4672694bd4f1aebdab0ad763ae4716e89cb15221 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2018 21:50:29 -0700 Subject: ipv4: frags: handle possible skb truesize change ip_frag_queue() might call pskb_pull() on one skb that is already in the fragment queue. We need to take care of possible truesize change, or we might have an imbalance of the netns frags memory usage. IPv6 is immune to this bug, because RFC5722, Section 4, amended by Errata ID 3089 states : When reassembling an IPv6 datagram, if one or more its constituent fragments is determined to be an overlapping fragment, the entire datagram (and any constituent fragments) MUST be silently discarded. Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8e9528ebaa8e1..d14d741fb05e5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -383,11 +383,16 @@ found: int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */ if (i < next->len) { + int delta = -next->truesize; + /* Eat head of the next overlapped fragment * and leave the loop. The next ones cannot overlap. */ if (!pskb_pull(next, i)) goto err; + delta += next->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); next->ip_defrag_offset += i; qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) -- cgit 1.2.3-korg From cb5c6568867325f9905e80c96531d963bec8e5ea Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Mon, 30 Jul 2018 09:56:54 -0700 Subject: enic: do not call enic_change_mtu in enic_probe In commit ab123fe071c9 ("enic: handle mtu change for vf properly") ASSERT_RTNL() is added to _enic_change_mtu() to prevent it from being called without rtnl held. enic_probe() calls enic_change_mtu() without rtnl held. At this point netdev is not registered yet. Remove call to enic_change_mtu and assign the mtu to netdev->mtu. Fixes: ab123fe071c9 ("enic: handle mtu change for vf properly") Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 6b0376123cdeb..60641e2025341 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2892,7 +2892,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ enic->port_mtu = enic->config.mtu; - (void)enic_change_mtu(netdev, enic->port_mtu); err = enic_set_mac_addr(netdev, enic->mac_addr); if (err) { @@ -2982,6 +2981,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* MTU range: 68 - 9000 */ netdev->min_mtu = ENIC_MIN_MTU; netdev->max_mtu = ENIC_MAX_MTU; + netdev->mtu = enic->port_mtu; err = register_netdev(netdev); if (err) { -- cgit 1.2.3-korg From cca19f0b684f4ed6aabf6ad07ae3e15e77bfd78a Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 31 Jul 2018 15:24:52 +0200 Subject: powerpc/64s/radix: Fix missing global invalidations when removing copro With the optimizations for TLB invalidation from commit 0cef77c7798a ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask"), the scope of a TLBI (global vs. local) can now be influenced by the value of the 'copros' counter of the memory context. When calling mm_context_remove_copro(), the 'copros' counter is decremented first before flushing. It may have the unintended side effect of sending local TLBIs when we explicitly need global invalidations in this case. Thus breaking any nMMU user in a bad and unpredictable way. Fix it by flushing first, before updating the 'copros' counter, so that invalidations will be global. Fixes: 0cef77c7798a ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") Signed-off-by: Frederic Barrat Reviewed-by: Nicholas Piggin Tested-by: Vaibhav Jain Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 79d570cbf3325..b2f89b621b159 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -143,24 +143,33 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) { int c; - c = atomic_dec_if_positive(&mm->context.copros); - - /* Detect imbalance between add and remove */ - WARN_ON(c < 0); - /* - * Need to broadcast a global flush of the full mm before - * decrementing active_cpus count, as the next TLBI may be - * local and the nMMU and/or PSL need to be cleaned up. - * Should be rare enough so that it's acceptable. + * When removing the last copro, we need to broadcast a global + * flush of the full mm, as the next TLBI may be local and the + * nMMU and/or PSL need to be cleaned up. + * + * Both the 'copros' and 'active_cpus' counts are looked at in + * flush_all_mm() to determine the scope (local/global) of the + * TLBIs, so we need to flush first before decrementing + * 'copros'. If this API is used by several callers for the + * same context, it can lead to over-flushing. It's hopefully + * not common enough to be a problem. * * Skip on hash, as we don't know how to do the proper flush * for the time being. Invalidations will remain global if - * used on hash. + * used on hash. Note that we can't drop 'copros' either, as + * it could make some invalidations local with no flush + * in-between. */ - if (c == 0 && radix_enabled()) { + if (radix_enabled()) { flush_all_mm(mm); - dec_mm_active_cpus(mm); + + c = atomic_dec_if_positive(&mm->context.copros); + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + + if (c == 0) + dec_mm_active_cpus(mm); } } #else -- cgit 1.2.3-korg From 7e2556e40026a1b0c16f37446ab398d5a5a892e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jul 2018 06:30:54 -0700 Subject: bonding: avoid lockdep confusion in bond_get_stats() syzbot found that the following sequence produces a LOCKDEP splat [1] ip link add bond10 type bond ip link add bond11 type bond ip link set bond11 master bond10 To fix this, we can use the already provided nest_level. This patch also provides correct nesting for dev->addr_list_lock [1] WARNING: possible recursive locking detected 4.18.0-rc6+ #167 Not tainted -------------------------------------------- syz-executor751/4439 is trying to acquire lock: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline] (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 but task is already holding lock: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline] (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&bond->stats_lock)->rlock); lock(&(&bond->stats_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by syz-executor751/4439: #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 #1: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline] #1: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 #2: (____ptrval____) (rcu_read_lock){....}, at: bond_get_stats+0x0/0x560 include/linux/compiler.h:215 stack backtrace: CPU: 0 PID: 4439 Comm: syz-executor751 Not tainted 4.18.0-rc6+ #167 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] check_deadlock kernel/locking/lockdep.c:1809 [inline] validate_chain kernel/locking/lockdep.c:2405 [inline] __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:310 [inline] bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 dev_get_stats+0x10f/0x470 net/core/dev.c:8316 bond_get_stats+0x232/0x560 drivers/net/bonding/bond_main.c:3432 dev_get_stats+0x10f/0x470 net/core/dev.c:8316 rtnl_fill_stats+0x4d/0xac0 net/core/rtnetlink.c:1169 rtnl_fill_ifinfo+0x1aa6/0x3fb0 net/core/rtnetlink.c:1611 rtmsg_ifinfo_build_skb+0xc8/0x190 net/core/rtnetlink.c:3268 rtmsg_ifinfo_event.part.30+0x45/0xe0 net/core/rtnetlink.c:3300 rtmsg_ifinfo_event net/core/rtnetlink.c:3297 [inline] rtnetlink_event+0x144/0x170 net/core/rtnetlink.c:4716 notifier_call_chain+0x180/0x390 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 call_netdevice_notifiers net/core/dev.c:1753 [inline] netdev_features_change net/core/dev.c:1321 [inline] netdev_change_features+0xb3/0x110 net/core/dev.c:7759 bond_compute_features.isra.47+0x585/0xa50 drivers/net/bonding/bond_main.c:1120 bond_enslave+0x1b25/0x5da0 drivers/net/bonding/bond_main.c:1755 bond_do_ioctl+0x7cb/0xae0 drivers/net/bonding/bond_main.c:3528 dev_ifsioc+0x43c/0xb30 net/core/dev_ioctl.c:327 dev_ioctl+0x1b5/0xcc0 net/core/dev_ioctl.c:493 sock_do_ioctl+0x1d3/0x3e0 net/socket.c:992 sock_ioctl+0x30d/0x680 net/socket.c:1093 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:500 [inline] do_vfs_ioctl+0x1de/0x1720 fs/ioctl.c:684 ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701 __do_sys_ioctl fs/ioctl.c:708 [inline] __se_sys_ioctl fs/ioctl.c:706 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440859 Code: e8 2c af 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffc51a92878 EFLAGS: 00000213 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440859 RDX: 0000000020000040 RSI: 0000000000008990 RDI: 0000000000000003 RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000022d5880 R11: 0000000000000213 R12: 0000000000007390 R13: 0000000000401db0 R14: 0000000000000000 R15: 0000000000000000 Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 63e3844c5becf..217b790d22edc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1717,6 +1717,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_upper_unlink; } + bond->nest_level = dev_get_nest_level(bond_dev) + 1; + /* If the mode uses primary, then the following is handled by * bond_change_active_slave(). */ @@ -1764,7 +1766,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (bond_mode_can_use_xmit_hash(bond)) bond_update_slave_arr(bond, NULL); - bond->nest_level = dev_get_nest_level(bond_dev); netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", slave_dev->name, @@ -3415,6 +3416,13 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } +static int bond_get_nest_level(struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + + return bond->nest_level; +} + static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3423,7 +3431,7 @@ static void bond_get_stats(struct net_device *bond_dev, struct list_head *iter; struct slave *slave; - spin_lock(&bond->stats_lock); + spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev)); memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); @@ -4227,6 +4235,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, + .ndo_get_lock_subclass = bond_get_nest_level, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = bond_netpoll_setup, .ndo_netpoll_cleanup = bond_netpoll_cleanup, @@ -4725,6 +4734,7 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; + bond->nest_level = SINGLE_DEPTH_NESTING; netdev_lockdep_set_classes(bond_dev); list_add_tail(&bond->bond_list, &bn->dev_list); -- cgit 1.2.3-korg From b7d0f08e9129c45ed41bc0cfa8e77067881e45fd Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 31 Jul 2018 15:08:20 +0100 Subject: net: stmmac: Fix WoL for PCI-based setups WoL won't work in PCI-based setups because we are not saving the PCI EP state before entering suspend state and not allowing D3 wake. Fix this by using a wrapper around stmmac_{suspend/resume} which correctly sets the PCI EP state. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 40 ++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 8d375e51a5265..6a393b16a1fca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -257,7 +257,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pcim_enable_device(pdev); + ret = pci_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -300,9 +300,45 @@ static int stmmac_pci_probe(struct pci_dev *pdev, static void stmmac_pci_remove(struct pci_dev *pdev) { stmmac_dvr_remove(&pdev->dev); + pci_disable_device(pdev); } -static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_suspend, stmmac_resume); +static int stmmac_pci_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = stmmac_suspend(dev); + if (ret) + return ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int stmmac_pci_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return stmmac_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume); /* synthetic ID, no official vendor */ #define PCI_VENDOR_ID_STMMAC 0x700 -- cgit 1.2.3-korg From e02ee9819a03c5d6439636c8fc152b4cc1b48304 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 31 Jul 2018 10:45:53 -0500 Subject: Documentation: dpaa2: Use correct heading adornment Add overline heading adornment to document title in order to comply with kernel doc requirements. Fixes: 60b9131 staging: fsl-mc: Convert documentation to rst format Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- Documentation/networking/dpaa2/overview.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/dpaa2/overview.rst b/Documentation/networking/dpaa2/overview.rst index 79fede4447d61..d638b5a8aadd4 100644 --- a/Documentation/networking/dpaa2/overview.rst +++ b/Documentation/networking/dpaa2/overview.rst @@ -1,5 +1,6 @@ .. include:: +========================================================= DPAA2 (Data Path Acceleration Architecture Gen2) Overview ========================================================= -- cgit 1.2.3-korg From bc5b6c0b62b932626a135f516a41838c510c6eba Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 21:13:16 +0000 Subject: netlink: Fix spectre v1 gadget in netlink_create() 'protocol' is a user-controlled value, so sanitize it after the bounds check to avoid using it for speculative out-of-bounds access to arrays indexed by it. This addresses the following accesses detected with the help of smatch: * net/netlink/af_netlink.c:654 __netlink_create() warn: potential spectre issue 'nlk_cb_mutex_keys' [w] * net/netlink/af_netlink.c:654 __netlink_create() warn: potential spectre issue 'nlk_cb_mutex_key_strings' [w] * net/netlink/af_netlink.c:685 netlink_create() warn: potential spectre issue 'nl_table' [w] (local cap) Cc: Josh Poimboeuf Signed-off-by: Jeremy Cline Reviewed-by: Josh Poimboeuf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7d860a22e5fb8..c09d16870f74a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -679,6 +680,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; + protocol = array_index_nospec(protocol, MAX_LINKS); netlink_lock_table(); #ifdef CONFIG_MODULES -- cgit 1.2.3-korg From a94c689e6c9e72e722f28339e12dff191ee5a265 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 31 Jul 2018 17:12:52 -0700 Subject: net: dsa: Do not suspend/resume closed slave_dev If a DSA slave network device was previously disabled, there is no need to suspend or resume it. Fixes: 2446254915a7 ("net: dsa: allow switch drivers to implement suspend/resume hooks") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1e3b6a6d8a40d..732369c80644a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1248,6 +1248,9 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_detach(slave_dev); rtnl_lock(); @@ -1261,6 +1264,9 @@ int dsa_slave_resume(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_attach(slave_dev); rtnl_lock(); -- cgit 1.2.3-korg From ebad825cdd4e6b327eaf0dd72439408957049cea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Aug 2018 09:57:50 -0700 Subject: ia64: mark special ia64 memory areas anonymous Commit bfd40eaff5ab ("mm: fix vma_is_anonymous() false-positives") made newly allocated vma's have a dummy vm_ops field so that they wouldn't be mistaken for anonymous mappings, and if you wanted an anonymous vma you had to explicitly say so by calling "vma_set_anonymous()" on it. However, it missed the two special vmas that ia64 processes have: the register backing store and the NaT page. So they wouldn't actually act like anonymous ranges, and page faults on them caused a SIGBUS rather than the creation of a new anon page in them. That obviously will make any ia64 binary very unhappy indeed, and the boot fails early. Fixes: bfd40eaff5ab ("mm: fix vma_is_anonymous() false-positives") Reported-by: Tony Luck Cc: Kirill Shutemov Cc: Andrew Morton Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: John Stultz Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index e6c6dfd98de29..3b85c3ecac38d 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -116,6 +116,7 @@ ia64_init_addr_space (void) */ vma = vm_area_alloc(current->mm); if (vma) { + vma_set_anonymous(vma); vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; @@ -133,6 +134,7 @@ ia64_init_addr_space (void) if (!(current->personality & MMAP_PAGE_ZERO)) { vma = vm_area_alloc(current->mm); if (vma) { + vma_set_anonymous(vma); vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | -- cgit 1.2.3-korg From 44960f2a7b63e224b1091b3e1d6f60e0cdf4be0c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 31 Jul 2018 10:17:04 -0700 Subject: staging: ashmem: Fix SIGBUS crash when traversing mmaped ashmem pages Amit Pundir and Youling in parallel reported crashes with recent mainline kernels running Android: F DEBUG : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** F DEBUG : Build fingerprint: 'Android/db410c32_only/db410c32_only:Q/OC-MR1/102:userdebug/test-key F DEBUG : Revision: '0' F DEBUG : ABI: 'arm' F DEBUG : pid: 2261, tid: 2261, name: zygote >>> zygote <<< F DEBUG : signal 7 (SIGBUS), code 2 (BUS_ADRERR), fault addr 0xec00008 ... ... F DEBUG : backtrace: F DEBUG : #00 pc 00001c04 /system/lib/libc.so (memset+48) F DEBUG : #01 pc 0010c513 /system/lib/libart.so (create_mspace_with_base+82) F DEBUG : #02 pc 0015c601 /system/lib/libart.so (art::gc::space::DlMallocSpace::CreateMspace(void*, unsigned int, unsigned int)+40) F DEBUG : #03 pc 0015c3ed /system/lib/libart.so (art::gc::space::DlMallocSpace::CreateFromMemMap(art::MemMap*, std::__1::basic_string, std::__1::allocator> const&, unsigned int, unsigned int, unsigned int, unsigned int, bool)+36) ... This was bisected back to commit bfd40eaff5ab ("mm: fix vma_is_anonymous() false-positives"). create_mspace_with_base() in the trace above, utilizes ashmem, and with ashmem, for shared mappings we use shmem_zero_setup(), which sets the vma->vm_ops to &shmem_vm_ops. But for private ashmem mappings nothing sets the vma->vm_ops. Looking at the problematic patch, it seems to add a requirement that one call vma_set_anonymous() on a vma, otherwise the dummy_vm_ops will be used. Using the dummy_vm_ops seem to triggger SIGBUS when traversing unmapped pages. Thus, this patch adds a call to vma_set_anonymous() for ashmem private mappings and seems to avoid the reported problem. Fixes: bfd40eaff5ab ("mm: fix vma_is_anonymous() false-positives") Cc: Kirill Shutemov Cc: Andrew Morton Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Joel Fernandes Cc: Colin Cross Cc: Matthew Wilcox Reported-by: Amit Pundir Reported-by: Youling 257 Signed-off-by: John Stultz Signed-off-by: Linus Torvalds --- drivers/staging/android/ashmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index a1a0025b59e0e..d5d33e12e9529 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -402,6 +402,8 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) fput(asma->file); goto out; } + } else { + vma_set_anonymous(vma); } if (vma->vm_file) -- cgit 1.2.3-korg From cdbb65c4c7ead680ebe54f4f0d486e2847a500ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Aug 2018 10:38:43 -0700 Subject: squashfs metadata 2: electric boogaloo Anatoly continues to find issues with fuzzed squashfs images. This time, corrupt, missing, or undersized data for the page filling wasn't checked for, because the squashfs_{copy,read}_cache() functions did the squashfs_copy_data() call without checking the resulting data size. Which could result in the page cache pages being incompletely filled in, and no error indication to the user space reading garbage data. So make a helper function for the "fill in pages" case, because the exact same incomplete sequence existed in two places. [ I should have made a squashfs branch for these things, but I didn't intend to start doing them in the first place. My historical connection through cramfs is why I got into looking at these issues at all, and every time I (continue to) think it's a one-off. Because _this_ time is always the last time. Right? - Linus ] Reported-by: Anatoly Trosinenko Tested-by: Willy Tarreau Cc: Al Viro Cc: Phillip Lougher Signed-off-by: Linus Torvalds --- fs/squashfs/file.c | 25 ++++++++++++++++++------- fs/squashfs/file_direct.c | 8 +------- fs/squashfs/squashfs.h | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index fcff2e0487fef..cce3060650aeb 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -374,13 +374,29 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return squashfs_block_size(size); } +void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) +{ + int copied; + void *pageaddr; + + pageaddr = kmap_atomic(page); + copied = squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + copied, 0, PAGE_SIZE - copied); + kunmap_atomic(pageaddr); + + flush_dcache_page(page); + if (copied == avail) + SetPageUptodate(page); + else + SetPageError(page); +} + /* Copy data into page cache */ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - void *pageaddr; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = page->index & ~mask, end_index = start_index | mask; @@ -406,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, if (PageUptodate(push_page)) goto skip_page; - pageaddr = kmap_atomic(push_page); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(push_page); - SetPageUptodate(push_page); + squashfs_fill_page(push_page, buffer, offset, avail); skip_page: unlock_page(push_page); if (i != page->index) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index cb485d8e0e91b..096990254a2ea 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -144,7 +144,6 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, @@ -159,12 +158,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, if (page[n] == NULL) continue; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); + squashfs_fill_page(page[n], buffer, offset, avail); unlock_page(page[n]); if (page[n] != target_page) put_page(page[n]); diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080a..d8d43724cf2ad 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -67,6 +67,7 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); /* file.c */ +void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); -- cgit 1.2.3-korg From c01f6c9b3207e52fc9973a066a856ddf7a0538d8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 1 Aug 2018 13:27:23 +0100 Subject: rxrpc: Fix user call ID check in rxrpc_service_prealloc_one There just check the user call ID isn't already in use, hence should compare user_call_ID with xcall->user_call_ID, which is current node's user_call_ID. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Suggested-by: David Howells Signed-off-by: YueHaibing Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/call_accept.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a9a9be5519b9a..9d1e298b784c8 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -116,9 +116,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < call->user_call_ID) + if (user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) + else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto id_in_use; -- cgit 1.2.3-korg From 53406ed1bcfdabe4b5bc35e6d17946c6f9f563e2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 1 Aug 2018 11:31:52 -0700 Subject: mm: delete historical BUG from zap_pmd_range() Delete the old VM_BUG_ON_VMA() from zap_pmd_range(), which asserted that mmap_sem must be held when splitting an "anonymous" vma there. Whether that's still strictly true nowadays is not entirely clear, but the danger of sometimes crashing on the BUG is now fairly clear. Even with the new stricter rules for anonymous vma marking, the condition it checks for can possible trigger. Commit 44960f2a7b63 ("staging: ashmem: Fix SIGBUS crash when traversing mmaped ashmem pages") is good, and originally I thought it was safe from that VM_BUG_ON_VMA(), because the /dev/ashmem fd exposed to the user is disconnected from the vm_file in the vma, and madvise(,,MADV_REMOVE) insists on VM_SHARED. But after I read John's earlier mail, drawing attention to the vfs_fallocate() in there: I may be wrong, and I don't know if Android has THP in the config anyway, but it looks to me like an unmap_mapping_range() from ashmem's vfs_fallocate() could hit precisely the VM_BUG_ON_VMA(), once it's vma_is_anonymous(). Signed-off-by: Hugh Dickins Cc: John Stultz Cc: Kirill Shutemov Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 7206a634270be..dab1511294add 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1417,11 +1417,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { - if (next - addr != HPAGE_PMD_SIZE) { - VM_BUG_ON_VMA(vma_is_anonymous(vma) && - !rwsem_is_locked(&tlb->mm->mmap_sem), vma); + if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false, NULL); - } else if (zap_huge_pmd(tlb, vma, pmd, addr)) + else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ } -- cgit 1.2.3-korg From 8b11ec1b5ffb54f71cb5a5e5c8c4d36e5d113085 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Aug 2018 13:43:38 -0700 Subject: mm: do not initialize TLB stack vma's with vma_init() Commit 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and data segments") tried to initialize various left-over ad-hoc vma's "properly", but actually made things worse for the temporary vma's used for TLB flushing. vma_init() doesn't actually initialize all of the vma, just a few fields, so doing something like - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + struct vm_area_struct vma; + + vma_init(&vma, tlb->mm); was actually very bad: instead of having a nicely initialized vma with every field but "vm_mm" zeroed, you'd have an entirely uninitialized vma with only a couple of fields initialized. And they weren't even fields that the code in question mostly cared about. The flush_tlb_range() function takes a "struct vma" rather than a "struct mm_struct", because a few architectures actually care about what kind of range it is - being able to only do an ITLB flush if it's a range that doesn't have data accesses enabled, for example. And all the normal users already have the vma for doing the range invalidation. But a few people want to call flush_tlb_range() with a range they just made up, so they also end up using a made-up vma. x86 just has a special "flush_tlb_mm_range()" function for this, but other architectures (arm and ia64) do the "use fake vma" thing instead, and thus got caught up in the vma_init() changes. At the same time, the TLB flushing code really doesn't care about most other fields in the vma, so vma_init() is just unnecessary and pointless. This fixes things by having an explicit "this is just an initializer for the TLB flush" initializer macro, which is used by the arm/arm64/ia64 people who mis-use this interface with just a dummy vma. Fixes: 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and data segments") Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: Kirill Shutemov Cc: Andrew Morton Cc: John Stultz Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- arch/arm/mach-rpc/ecard.c | 5 +---- arch/arm64/include/asm/tlb.h | 4 +--- arch/arm64/mm/hugetlbpage.c | 10 ++++------ arch/ia64/include/asm/tlb.h | 7 +++---- include/linux/mm.h | 3 +++ 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 8db62cc54a6ac..04b2f22c2739a 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -212,7 +212,7 @@ static DEFINE_MUTEX(ecard_mutex); */ static void ecard_init_pgtables(struct mm_struct *mm) { - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_EXEC); /* We want to set up the page tables for the following mapping: * Virtual Physical @@ -237,9 +237,6 @@ static void ecard_init_pgtables(struct mm_struct *mm) memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); - vma_init(&vma, mm); - vma.vm_flags = VM_EXEC; - flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); } diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index d87f2d646caaa..0ad1cf233470d 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,9 +37,7 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - struct vm_area_struct vma; - - vma_init(&vma, tlb->mm); + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); /* * The ASID allocator will either invalidate the ASID or mark diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 1854e49aa18a7..192b3ba070755 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -108,13 +108,10 @@ static pte_t get_clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma; pte_t orig_pte = huge_ptep_get(ptep); bool valid = pte_valid(orig_pte); unsigned long i, saddr = addr; - vma_init(&vma, mm); - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); @@ -127,8 +124,10 @@ static pte_t get_clear_flush(struct mm_struct *mm, orig_pte = pte_mkdirty(orig_pte); } - if (valid) + if (valid) { + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); flush_tlb_range(&vma, saddr, addr); + } return orig_pte; } @@ -147,10 +146,9 @@ static void clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); unsigned long i, saddr = addr; - vma_init(&vma, mm); for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) pte_clear(mm, addr, ptep); diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index db89e73060818..516355a774bfe 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -115,12 +115,11 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned flush_tlb_all(); } else { /* - * XXX fix me: flush_tlb_range() should take an mm pointer instead of a - * vma pointer. + * flush_tlb_range() takes a vma instead of a mm pointer because + * some architectures want the vm_flags for ITLB/DTLB flush. */ - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); - vma_init(&vma, tlb->mm); /* flush the address range from the tlb: */ flush_tlb_range(&vma, start, end); /* now flush the virt. page-table area mapping the address range: */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ba6d356d18fb..68a5121694ef5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -466,6 +466,9 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } +/* flush_tlb_range() takes a vma, not a mm, and can care about flags */ +#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) } + struct mmu_gather; struct inode; -- cgit 1.2.3-korg From 6ea76bf51339506e9fb00d6caebf5d6b42a571e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Jul 2018 22:21:22 -0400 Subject: NFSv4: Fix _nfs4_do_setlk() The patch to fix the case where a lock request was interrupted ended up changing default handling of errors such as NFS4ERR_DENIED and caused the client to immediately resend the lock request. Let's do a partial revert of that request so that the default is now to exit, but change the way we handle resends to take into account the fact that the user may have interrupted the request. Reported-by: Kenneth Johansson Fixes: a3cf9bca2ace ("NFSv4: Don't add a new lock on an interrupted wait..") Cc: Benjamin Coddington Cc: Jeff Layton Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/nfs4proc.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6dd146885da99..f6c4ccd693f49 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6466,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) - break; + goto out_restart; } - if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - goto out_done; - } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) - goto out_done; - + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_restart; break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (nfs4_stateid_match(&data->arg.open_stateid, + if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - goto out_done; - } else if (nfs4_stateid_match(&data->arg.lock_stateid, + goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - goto out_done; + goto out_restart; } - if (!data->cancelled) - rpc_restart_call_prepare(task); out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + return; +out_restart: + if (!data->cancelled) + rpc_restart_call_prepare(task); + goto out_done; } static void nfs4_lock_release(void *calldata) @@ -6502,7 +6502,7 @@ static void nfs4_lock_release(void *calldata) dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); - if (data->cancelled) { + if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); -- cgit 1.2.3-korg From e6aed040eafb4ce1881bbc59a225f6b27d250396 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Aug 2018 21:32:30 -0700 Subject: Revert "net/ipv6: fix metrics leak" This reverts commit df18b50448fab1dff093731dfd0e25e77e1afcd1. This change causes other problems and use-after-free situations as found by syzbot. Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 211a2d437b565..d212738e9d100 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -167,22 +167,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) return f6i; } -static void fib6_metrics_release(struct fib6_info *f6i) -{ - struct dst_metrics *m; - - if (!f6i) - return; - - m = f6i->fib6_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); -} - void fib6_info_destroy_rcu(struct rcu_head *head) { struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); struct rt6_exception_bucket *bucket; + struct dst_metrics *m; WARN_ON(f6i->fib6_node); @@ -212,7 +201,9 @@ void fib6_info_destroy_rcu(struct rcu_head *head) if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); - fib6_metrics_release(f6i); + m = f6i->fib6_metrics; + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) + kfree(m); kfree(f6i); } @@ -896,7 +887,6 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, from = rcu_dereference_protected(pcpu_rt->from, lockdep_is_held(&table->tb6_lock)); - fib6_metrics_release(from); rcu_assign_pointer(pcpu_rt->from, NULL); fib6_info_release(from); } -- cgit 1.2.3-korg From 1b3a62643660020cdc68e6139a010c06e8fc96c7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 1 Aug 2018 16:32:25 +0300 Subject: x86/boot/compressed/64: Validate trampoline placement against E820 There were two report of boot failure cased by trampoline placed into a reserved memory region. It can happen on machines that don't report EBDA correctly. Fix the problem by re-validating the found address against the E820 table. If the address is in a reserved area, find the next usable region below the initial address. Fixes: 3548e131ec6a ("x86/boot/compressed/64: Find a place for 32-bit trampoline") Reported-by: Dmitry Malkin Reported-by: youling 257 Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180801133225.38121-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/pgtable_64.c | 73 ++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 8c51075452519..9e21573714910 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,3 +1,4 @@ +#include #include #include "pgtable.h" #include "../string.h" @@ -34,10 +35,62 @@ unsigned long *trampoline_32bit __section(.data); extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); +static unsigned long find_trampoline_placement(void) +{ + unsigned long bios_start, ebda_start; + unsigned long trampoline_start; + struct boot_e820_entry *entry; + int i; + + /* + * Find a suitable spot for the trampoline. + * This code is based on reserve_bios_regions(). + */ + + ebda_start = *(unsigned short *)0x40e << 4; + bios_start = *(unsigned short *)0x413 << 10; + + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) + bios_start = BIOS_START_MAX; + + if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; + + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Find the first usable memory region under bios_start. */ + for (i = boot_params->e820_entries - 1; i >= 0; i--) { + entry = &boot_params->e820_table[i]; + + /* Skip all entries above bios_start. */ + if (bios_start <= entry->addr) + continue; + + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; + + /* Adjust bios_start to the end of the entry if needed. */ + if (bios_start > entry->addr + entry->size) + bios_start = entry->addr + entry->size; + + /* Keep bios_start page-aligned. */ + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Skip the entry if it's too small. */ + if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr) + continue; + + break; + } + + /* Place the trampoline just below the end of low memory */ + return bios_start - TRAMPOLINE_32BIT_SIZE; +} + struct paging_config paging_prepare(void *rmode) { struct paging_config paging_config = {}; - unsigned long bios_start, ebda_start; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ boot_params = rmode; @@ -61,23 +114,7 @@ struct paging_config paging_prepare(void *rmode) paging_config.l5_required = 1; } - /* - * Find a suitable spot for the trampoline. - * This code is based on reserve_bios_regions(). - */ - - ebda_start = *(unsigned short *)0x40e << 4; - bios_start = *(unsigned short *)0x413 << 10; - - if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) - bios_start = BIOS_START_MAX; - - if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) - bios_start = ebda_start; - - /* Place the trampoline just below the end of low memory, aligned to 4k */ - paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE; - paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE); + paging_config.trampoline_start = find_trampoline_placement(); trampoline_32bit = (unsigned long *)paging_config.trampoline_start; -- cgit 1.2.3-korg From 71755ee5350b63fb1f283de8561cdb61b47f4d1d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Aug 2018 08:43:35 -0700 Subject: squashfs: more metadata hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The squashfs fragment reading code doesn't actually verify that the fragment is inside the fragment table. The end result _is_ verified to be inside the image when actually reading the fragment data, but before that is done, we may end up taking a page fault because the fragment table itself might not even exist. Another report from Anatoly and his endless squashfs image fuzzing. Reported-by: Анатолий Тросиненко Acked-by:: Phillip Lougher , Cc: Willy Tarreau Signed-off-by: Linus Torvalds --- fs/squashfs/fragment.c | 13 +++++++++---- fs/squashfs/squashfs_fs_sb.h | 1 + fs/squashfs/super.c | 5 +++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 86ad9a4b8c364..0681feab4a849 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int block = SQUASHFS_FRAGMENT_INDEX(fragment); - int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); - u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + int block, offset, size; struct squashfs_fragment_entry fragment_entry; - int size; + u64 start_block; + + if (fragment >= msblk->fragments) + return -EIO; + block = SQUASHFS_FRAGMENT_INDEX(fragment); + offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + + start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3d..ef69c31947bf8 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -75,6 +75,7 @@ struct squashfs_sb_info { unsigned short block_log; long long bytes_used; unsigned int inodes; + unsigned int fragments; int xattr_ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 8a73b97217c8a..40e657386fa52 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); + msblk->fragments = le32_to_cpu(sblk->fragments); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); - TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); @@ -272,7 +273,7 @@ allocate_id_index_table: sb->s_export_op = &squashfs_export_ops; handle_fragments: - fragments = le32_to_cpu(sblk->fragments); + fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; -- cgit 1.2.3-korg From a3f94cb99a854fa381fe7fadd97c4f61633717a5 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 2 Aug 2018 16:45:15 +0100 Subject: Squashfs: Compute expected length from inode size rather than block length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously in squashfs_readpage() when copying data into the page cache, it used the length of the datablock read from the filesystem (after decompression). However, if the filesystem has been corrupted this data block may be short, which will leave pages unfilled. The fix for this is to compute the expected number of bytes to copy from the inode size, and use this to detect if the block is short. Signed-off-by: Phillip Lougher Tested-by: Willy Tarreau Cc: Анатолий Тросиненко Signed-off-by: Linus Torvalds --- fs/squashfs/file.c | 25 ++++++++++--------------- fs/squashfs/file_cache.c | 4 ++-- fs/squashfs/file_direct.c | 16 +++++++++++----- fs/squashfs/squashfs.h | 2 +- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index cce3060650aeb..f1c1430ae7213 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -431,10 +431,9 @@ skip_page: } /* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page) +static int squashfs_readpage_fragment(struct page *page, int expected) { struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -445,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, i_size_read(inode) & - (msblk->block_size - 1), + squashfs_copy_cache(page, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); return res; } -static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +static int squashfs_readpage_sparse(struct page *page, int expected) { - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - - squashfs_copy_cache(page, NULL, bytes, 0); + squashfs_copy_cache(page, NULL, expected, 0); return 0; } @@ -471,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page) struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int index = page->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; + int expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; int res; void *pageaddr; @@ -489,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page) goto error_out; if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + res = squashfs_readpage_sparse(page, expected); else - res = squashfs_readpage_block(page, block, bsize); + res = squashfs_readpage_block(page, block, bsize, expected); } else - res = squashfs_readpage_fragment(page); + res = squashfs_readpage_fragment(page, expected); if (!res) return 0; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index f2310d2a20195..a9ba8d96776ac 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -20,7 +20,7 @@ #include "squashfs.h" /* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) +int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) { struct inode *i = page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, @@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize) ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, buffer->length, 0); + squashfs_copy_cache(page, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 096990254a2ea..80db1b86a27c6 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -21,10 +21,11 @@ #include "page_actor.h" static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); + int pages, struct page **page, int bytes); /* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + int expected) { struct inode *inode = target_page->mapping->host; @@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) * using an intermediate buffer. */ res = squashfs_read_cache(target_page, block, bsize, pages, - page); + page, expected); if (res < 0) goto mark_errored; @@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) if (res < 0) goto mark_errored; + if (res != expected) { + res = -EIO; + goto mark_errored; + } + /* Last page may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (bytes) { @@ -138,12 +144,12 @@ out: static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) + int pages, struct page **page, int bytes) { struct inode *i = target_page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; + int res = buffer->error, n, offset = 0; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d8d43724cf2ad..f89f8a74c6cea 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -72,7 +72,7 @@ void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +extern int squashfs_readpage_block(struct page *, u64, int, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); -- cgit 1.2.3-korg From 258fe208f2829d75ac837c17dbdc697ef653a395 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 2 Aug 2018 15:27:27 +0530 Subject: selftest/net: fix protocol family to work for IPv4. use actual protocol family passed by user rather than hardcoded AF_INTE6 to cerate sockets. current code is not working for IPv4. Signed-off-by: Maninder Singh Signed-off-by: Vaneet Narang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 77f762780199f..e8c5dff448eb0 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -402,7 +402,7 @@ int main(int argc, char *argv[]) exit(1); } - fd = socket(AF_INET6, SOCK_STREAM, 0); + fd = socket(cfg_family, SOCK_STREAM, 0); if (fd == -1) { perror("socket"); exit(1); -- cgit 1.2.3-korg From 79b3dbe4adb3420e74cf755b4beb5d2b43d5928d Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Aug 2018 13:09:27 -0700 Subject: fs: fix iomap_bmap position calculation The position calculation in iomap_bmap() shifts bno the wrong way, so we don't progress properly and end up re-mapping block zero over and over, yielding an unchanging physical block range as the logical block advances: # filefrag -Be file ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 0: 21.. 21: 1: merged 1: 1.. 1: 21.. 21: 1: 22: merged Discontinuity: Block 1 is at 21 (was 22) 2: 2.. 2: 21.. 21: 1: 22: merged Discontinuity: Block 2 is at 21 (was 22) 3: 3.. 3: 21.. 21: 1: 22: merged This breaks the FIBMAP interface for anyone using it (XFS), which in turn breaks LILO, zipl, etc. Bug-actually-spotted-by: Darrick J. Wong Fixes: 89eb1906a953 ("iomap: add an iomap-based bmap implementation") Cc: stable@vger.kernel.org Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/iomap.c b/fs/iomap.c index 77397b5a96ef9..0d0bd88455867 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1443,7 +1443,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops) { struct inode *inode = mapping->host; - loff_t pos = bno >> inode->i_blkbits; + loff_t pos = bno << inode->i_blkbits; unsigned blocksize = i_blocksize(inode); if (filemap_write_and_wait(mapping)) -- cgit 1.2.3-korg From 2d5ba0e2de24ec87636244a01d4e78d095cc1b20 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 3 Aug 2018 01:49:37 +0800 Subject: blk-mq: fix blk_mq_tagset_busy_iter Commit d250bf4e776ff09d5("blk-mq: only iterate over inflight requests in blk_mq_tagset_busy_iter") uses 'blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT' to replace 'blk_mq_request_started(req)', this way is wrong, and causes lots of test system hang during booting. Fix the issue by using blk_mq_request_started(req) inside bt_tags_iter(). Fixes: d250bf4e776ff09d5 ("blk-mq: only iterate over inflight requests in blk_mq_tagset_busy_iter") Cc: Josef Bacik Cc: Christoph Hellwig Cc: Guenter Roeck Cc: Mark Brown Cc: Matt Hart Cc: Johannes Thumshirn Cc: John Garry Cc: Hannes Reinecke , Cc: "Martin K. Petersen" , Cc: James Bottomley Cc: linux-scsi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Bart Van Assche Tested-by: Guenter Roeck Reported-by: Mark Brown Reported-by: Guenter Roeck Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 09b2ee6694fb1..3de0836163c2c 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -271,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * test and set the bit before assining ->rqs[]. */ rq = tags->rqs[bitnr]; - if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) + if (rq && blk_mq_request_started(rq)) iter_data->fn(rq, iter_data->data, reserved); return true; -- cgit 1.2.3-korg From afb41bb039656f0cecb54eeb8b2e2088201295f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Aug 2018 18:22:41 +0100 Subject: drivers: net: lmc: fix case value for target abort error Current value for a target abort error is 0x010, however, this value should in fact be 0x002. As it stands, the range of error is 0..7 so it is currently never being detected. This bug has been in the driver since the early 2.6.12 days (or before). Detected by CoverityScan, CID#744290 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 90a4ad9a2d081..b3a1b6f5c4064 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1362,7 +1362,7 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ case 0x001: printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name); break; - case 0x010: + case 0x002: printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name); break; default: -- cgit 1.2.3-korg From 7e97de0b033bcac4fa9a35cef72e0c06e6a22c67 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 2 Aug 2018 15:36:01 -0700 Subject: memcg: remove memcg_cgroup::id from IDR on mem_cgroup_css_alloc() failure In case of memcg_online_kmem() failure, memcg_cgroup::id remains hashed in mem_cgroup_idr even after memcg memory is freed. This leads to leak of ID in mem_cgroup_idr. This patch adds removal into mem_cgroup_css_alloc(), which fixes the problem. For better readability, it adds a generic helper which is used in mem_cgroup_alloc() and mem_cgroup_id_put_many() as well. Link: http://lkml.kernel.org/r/152354470916.22460.14397070748001974638.stgit@localhost.localdomain Fixes 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Signed-off-by: Kirill Tkhai Acked-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c0280b3143ee..b2173f7e5164e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4037,6 +4037,14 @@ static struct cftype mem_cgroup_legacy_files[] = { static DEFINE_IDR(mem_cgroup_idr); +static void mem_cgroup_id_remove(struct mem_cgroup *memcg) +{ + if (memcg->id.id > 0) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + } +} + static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); @@ -4047,8 +4055,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) < n); if (atomic_sub_and_test(n, &memcg->id.ref)) { - idr_remove(&mem_cgroup_idr, memcg->id.id); - memcg->id.id = 0; + mem_cgroup_id_remove(memcg); /* Memcg ID pins CSS */ css_put(&memcg->css); @@ -4185,8 +4192,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; fail: - if (memcg->id.id > 0) - idr_remove(&mem_cgroup_idr, memcg->id.id); + mem_cgroup_id_remove(memcg); __mem_cgroup_free(memcg); return NULL; } @@ -4245,6 +4251,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; fail: + mem_cgroup_id_remove(memcg); mem_cgroup_free(memcg); return ERR_PTR(-ENOMEM); } -- cgit 1.2.3-korg From eec3636ad198d4ac61e574cb122cb67e9bef5492 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Thu, 2 Aug 2018 15:36:05 -0700 Subject: ipc/shm.c add ->pagesize function to shm_vm_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 05ea88608d4e ("mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct") adds a new ->pagesize() function to hugetlb_vm_ops, intended to cover all hugetlbfs backed files. With System V shared memory model, if "huge page" is specified, the "shared memory" is backed by hugetlbfs files, but the mappings initiated via shmget/shmat have their original vm_ops overwritten with shm_vm_ops, so we need to add a ->pagesize function to shm_vm_ops. Otherwise, vma_kernel_pagesize() returns PAGE_SIZE given a hugetlbfs backed vma, result in below BUG: fs/hugetlbfs/inode.c 443 if (unlikely(page_mapped(page))) { 444 BUG_ON(truncate_op); resulting in hugetlbfs: oracle (4592): Using mlock ulimits for SHM_HUGETLB is deprecated ------------[ cut here ]------------ kernel BUG at fs/hugetlbfs/inode.c:444! Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 ... CPU: 35 PID: 5583 Comm: oracle_5583_sbt Not tainted 4.14.35-1829.el7uek.x86_64 #2 RIP: 0010:remove_inode_hugepages+0x3db/0x3e2 .... Call Trace: hugetlbfs_evict_inode+0x1e/0x3e evict+0xdb/0x1af iput+0x1a2/0x1f7 dentry_unlink_inode+0xc6/0xf0 __dentry_kill+0xd8/0x18d dput+0x1b5/0x1ed __fput+0x18b/0x216 ____fput+0xe/0x10 task_work_run+0x90/0xa7 exit_to_usermode_loop+0xdd/0x116 do_syscall_64+0x187/0x1ae entry_SYSCALL_64_after_hwframe+0x150/0x0 [jane.chu@oracle.com: relocate comment] Link: http://lkml.kernel.org/r/20180731044831.26036-1-jane.chu@oracle.com Link: http://lkml.kernel.org/r/20180727211727.5020-1-jane.chu@oracle.com Fixes: 05ea88608d4e13 ("mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct") Signed-off-by: Jane Chu Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Acked-by: Davidlohr Bueso Acked-by: Michal Hocko Cc: Dan Williams Cc: Jan Kara Cc: Jérôme Glisse Cc: Manfred Spraul Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 12 ++++++++++++ mm/hugetlb.c | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/ipc/shm.c b/ipc/shm.c index 051a3e1fb8df9..fefa00d310fb5 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -427,6 +427,17 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr) return 0; } +static unsigned long shm_pagesize(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + if (sfd->vm_ops->pagesize) + return sfd->vm_ops->pagesize(vma); + + return PAGE_SIZE; +} + #ifdef CONFIG_NUMA static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) { @@ -554,6 +565,7 @@ static const struct vm_operations_struct shm_vm_ops = { .close = shm_close, /* callback for when the vm-area is released */ .fault = shm_fault, .split = shm_split, + .pagesize = shm_pagesize, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 039ddbc574e92..3103099f64fd8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3167,6 +3167,13 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) return 0; } +/* + * When a new function is introduced to vm_operations_struct and added + * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops. + * This is because under System V memory model, mappings created via + * shmget/shmat with "huge page" specified are backed by hugetlbfs files, + * their original vm_ops are overwritten with shm_vm_ops. + */ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, -- cgit 1.2.3-korg From 31e810aa1033a7db50a2746cd34a2432237f6420 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Aug 2018 15:36:09 -0700 Subject: userfaultfd: remove uffd flags from vma->vm_flags if UFFD_EVENT_FORK fails The fix in commit 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") cleared the vma->vm_userfaultfd_ctx but kept userfaultfd flags in vma->vm_flags that were copied from the parent process VMA. As the result, there is an inconsistency between the values of vma->vm_userfaultfd_ctx.ctx and vma->vm_flags which triggers BUG_ON in userfaultfd_release(). Clearing the uffd flags from vma->vm_flags in case of UFFD_EVENT_FORK failure resolves the issue. Link: http://lkml.kernel.org/r/1532931975-25473-1-git-send-email-rppt@linux.vnet.ibm.com Fixes: 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") Signed-off-by: Mike Rapoport Reported-by: syzbot+121be635a7a35ddb7dcb@syzkaller.appspotmail.com Cc: Andrea Arcangeli Cc: Eric Biggers Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 594d192b23317..bad9cea37f12b 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -633,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx); -- cgit 1.2.3-korg From 8c85cbdf371f9ddf256ecc5d9548b26ee8fcfe2f Mon Sep 17 00:00:00 2001 From: Mathieu Xhonneux Date: Wed, 1 Aug 2018 15:34:54 +0000 Subject: selftests/bpf: update test_lwt_seg6local.sh according to iproute2 The shell file for test_lwt_seg6local contains an early iproute2 syntax for installing a seg6local End.BPF route. iproute2 support for this feature has recently been upstreamed, but with an additional keyword required. This patch updates test_lwt_seg6local.sh to the definitive iproute2 syntax Signed-off-by: Mathieu Xhonneux Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_lwt_seg6local.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 270fa8f495732..785eabf2a5931 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -115,14 +115,14 @@ ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4 +ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 -ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 ip netns exec ns4 ip -6 addr add fc42::1 dev lo ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87 ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8 +ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo -- cgit 1.2.3-korg From d1f0301b3333eef5efbfa1fe0f0edbea01863d5d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Aug 2018 14:44:59 +0200 Subject: genirq: Make force irq threading setup more robust The support of force threading interrupts which are set up with both a primary and a threaded handler wreckaged the setup of regular requested threaded interrupts (primary handler == NULL). The reason is that it does not check whether the primary handler is set to the default handler which wakes the handler thread. Instead it replaces the thread handler with the primary handler as it would do with force threaded interrupts which have been requested via request_irq(). So both the primary and the thread handler become the same which then triggers the warnon that the thread handler tries to wakeup a not configured secondary thread. Fortunately this only happens when the driver omits the IRQF_ONESHOT flag when requesting the threaded interrupt, which is normaly caught by the sanity checks when force irq threading is disabled. Fix it by skipping the force threading setup when a regular threaded interrupt is requested. As a consequence the interrupt request which lacks the IRQ_ONESHOT flag is rejected correctly instead of silently wreckaging it. Fixes: 2a1d3ab8986d ("genirq: Handle force threading of irqs with primary and thread handler") Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Tested-by: Kurt Kanzenbach Cc: stable@vger.kernel.org --- kernel/irq/manage.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index daeabd791d589..9a8b7ba9aa88d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1068,6 +1068,13 @@ static int irq_setup_forced_threading(struct irqaction *new) if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) return 0; + /* + * No further action required for interrupts which are requested as + * threaded interrupts already + */ + if (new->handler == irq_default_primary_handler) + return 0; + new->flags |= IRQF_ONESHOT; /* @@ -1075,7 +1082,7 @@ static int irq_setup_forced_threading(struct irqaction *new) * thread handler. We force thread them as well by creating a * secondary action. */ - if (new->handler != irq_default_primary_handler && new->thread_fn) { + if (new->handler && new->thread_fn) { /* Allocate the secondary action */ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!new->secondary) -- cgit 1.2.3-korg From 0a0e0829f990120cef165bbb804237f400953ec2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 Aug 2018 15:31:34 +0200 Subject: nohz: Fix missing tick reprogram when interrupting an inline softirq The full nohz tick is reprogrammed in irq_exit() only if the exit is not in a nesting interrupt. This stands as an optimization: whether a hardirq or a softirq is interrupted, the tick is going to be reprogrammed when necessary at the end of the inner interrupt, with even potential new updates on the timer queue. When soft interrupts are interrupted, it's assumed that they are executing on the tail of an interrupt return. In that case tick_nohz_irq_exit() is called after softirq processing to take care of the tick reprogramming. But the assumption is wrong: softirqs can be processed inline as well, ie: outside of an interrupt, like in a call to local_bh_enable() or from ksoftirqd. Inline softirqs don't reprogram the tick once they are done, as opposed to interrupt tail softirq processing. So if a tick interrupts an inline softirq processing, the next timer will neither be reprogrammed from the interrupting tick's irq_exit() nor after the interrupted softirq processing. This situation may leave the tick unprogrammed while timers are armed. To fix this, simply keep reprogramming the tick even if a softirq has been interrupted. That can be optimized further, but for now correctness is more important. Note that new timers enqueued in nohz_full mode after a softirq gets interrupted will still be handled just fine through self-IPIs triggered by the timer code. Reported-by: Anna-Maria Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Tested-by: Anna-Maria Gleixner Cc: stable@vger.kernel.org # 4.14+ Link: https://lkml.kernel.org/r/1533303094-15855-1-git-send-email-frederic@kernel.org --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 75ffc1d1a2e06..6f584861d329b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -390,7 +390,7 @@ static inline void tick_irq_exit(void) /* Make sure that timer wheel updates are propagated */ if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { - if (!in_interrupt()) + if (!in_irq()) tick_nohz_irq_exit(); } #endif -- cgit 1.2.3-korg From 3757b255bf20ae3c941abae7624ff215bfd9ef05 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:41 +0300 Subject: mlxsw: core_acl_flex_actions: Return error for conflicting actions Spectrum switch ACL action set is built in groups of three actions which may point to additional actions. A group holds a single record which can be set as goto record for pointing at a following group or can be set to mark the termination of the lookup. This is perfectly adequate for handling a series of actions to be executed on a packet. While the SW model allows configuration of conflicting actions where it is clear that some actions will never execute, the mlxsw driver must block such configurations as it creates a conflict over the single terminate/goto record value. For a conflicting actions configuration such as: # tc filter add dev swp49 parent ffff: \ protocol ip pref 10 \ flower skip_sw dst_ip 192.168.101.1 \ action goto chain 100 \ action mirred egress mirror dev swp4 Where it is clear that the last action will never execute, the mlxsw driver was issuing a warning instead of returning an error. Therefore replace that warning with an error for this specific case. Fixes: 4cda7d8d7098 ("mlxsw: core: Introduce flexible actions support") Signed-off-by: Nir Dotan Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 3c0d882ba1838..ce280680258e8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -626,8 +626,8 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, char *oneact; char *actions; - if (WARN_ON(block->finished)) - return NULL; + if (block->finished) + return ERR_PTR(-EINVAL); if (block->cur_act_index + action_size > block->afa->max_acts_per_set) { struct mlxsw_afa_set *set; @@ -637,7 +637,7 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, */ set = mlxsw_afa_set_create(false); if (!set) - return NULL; + return ERR_PTR(-ENOBUFS); set->prev = block->cur_set; block->cur_act_index = 0; block->cur_set->next = set; @@ -724,8 +724,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, MLXSW_AFA_VLAN_CODE, MLXSW_AFA_VLAN_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, @@ -806,8 +806,8 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); return 0; @@ -820,8 +820,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, trap_id); @@ -836,8 +836,8 @@ int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, trap_id); @@ -908,8 +908,8 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); @@ -996,8 +996,8 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, MLXSW_AFA_FORWARD_SIZE); - if (!act) { - err = -ENOBUFS; + if (IS_ERR(act)) { + err = PTR_ERR(act); goto err_append_action; } mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, @@ -1052,8 +1052,8 @@ int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, { char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE, MLXSW_AFA_POLCNT_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, counter_index); return 0; @@ -1123,8 +1123,8 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_VIRFWD_CODE, MLXSW_AFA_VIRFWD_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); return 0; } @@ -1193,8 +1193,8 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_MCROUTER_CODE, MLXSW_AFA_MCROUTER_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, expected_irif, min_mtu, rmid_valid, kvdl_index); return 0; -- cgit 1.2.3-korg From dda0a3a3fb92451d4a922e56365ee1f73c8a9586 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:42 +0300 Subject: mlxsw: core_acl_flex_actions: Remove redundant resource destruction Some ACL actions require the allocation of a separate resource prior to applying the action itself. When facing an error condition during the setup phase of the action, resource should be destroyed. For such actions the destruction was done twice which is dangerous and lead to a potential crash. The destruction took place first upon error on action setup phase and then as the rule was destroyed. The following sequence generated a crash: # tc qdisc add dev swp49 ingress # tc filter add dev swp49 parent ffff: \ protocol ip chain 100 pref 10 \ flower skip_sw dst_ip 192.168.101.1 action drop # tc filter add dev swp49 parent ffff: \ protocol ip pref 10 \ flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \ action mirred egress mirror dev swp4 Therefore add mlxsw_afa_resource_del() as a complement of mlxsw_afa_resource_add() to add symmetry to resource_list membership handling. Call this from mlxsw_afa_fwd_entry_ref_destroy() to make the _fwd_entry_ref_create() and _fwd_entry_ref_destroy() pair of calls a NOP. Fixes: 140ce421217e ("mlxsw: core: Convert fwd_entry_ref list to be generic per-block resource list") Signed-off-by: Nir Dotan Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index ce280680258e8..d664cc0289c27 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -327,12 +327,16 @@ static void mlxsw_afa_resource_add(struct mlxsw_afa_block *block, list_add(&resource->list, &block->resource_list); } +static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource) +{ + list_del(&resource->list); +} + static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block) { struct mlxsw_afa_resource *resource, *tmp; list_for_each_entry_safe(resource, tmp, &block->resource_list, list) { - list_del(&resource->list); resource->destructor(block, resource); } } @@ -530,6 +534,7 @@ static void mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) { + mlxsw_afa_resource_del(&fwd_entry_ref->resource); mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); kfree(fwd_entry_ref); } -- cgit 1.2.3-korg From 7cc6169493990dec488eda0a3f6612729ca25e81 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:43 +0300 Subject: mlxsw: core_acl_flex_actions: Remove redundant counter destruction Each tc flower rule uses a hidden count action. As counter resource may not be available due to limited HW resources, update _counter_create() and _counter_destroy() pair to follow previously introduced symmetric error condition handling, add a call to mlxsw_afa_resource_del() as part of the counter resource destruction. Fixes: c18c1e186ba8 ("mlxsw: core: Make counter index allocated inside the action append") Signed-off-by: Nir Dotan Reviewed-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index d664cc0289c27..a54f23f00a5fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -584,6 +584,7 @@ static void mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block, struct mlxsw_afa_counter *counter) { + mlxsw_afa_resource_del(&counter->resource); block->afa->ops->counter_index_put(block->afa->ops_priv, counter->counter_index); kfree(counter); -- cgit 1.2.3-korg From caebd1b389708bf3d0465be829480fc706a68720 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:44 +0300 Subject: mlxsw: core_acl_flex_actions: Remove redundant mirror resource destruction In previous patch mlxsw_afa_resource_del() was added to avoid a duplicate resource detruction scenario. For mirror actions, such duplicate destruction leads to a crash as in: # tc qdisc add dev swp49 ingress # tc filter add dev swp49 parent ffff: \ protocol ip chain 100 pref 10 \ flower skip_sw dst_ip 192.168.101.1 action drop # tc filter add dev swp49 parent ffff: \ protocol ip pref 10 \ flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \ action mirred egress mirror dev swp4 Therefore add a call to mlxsw_afa_resource_del() in mlxsw_afa_mirror_destroy() in order to clear that resource from rule's resources. Fixes: d0d13c1858a1 ("mlxsw: spectrum_acl: Add support for mirror action") Signed-off-by: Nir Dotan Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index a54f23f00a5fd..f6f6a568d66a5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -862,6 +862,7 @@ static void mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, struct mlxsw_afa_mirror *mirror) { + mlxsw_afa_resource_del(&mirror->resource); block->afa->ops->mirror_del(block->afa->ops_priv, mirror->local_in_port, mirror->span_id, -- cgit 1.2.3-korg From f664e37dcc525768280cb94321424a09beb1c992 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 3 Aug 2018 17:00:11 +0200 Subject: l2tp: fix missing refcount drop in pppol2tp_tunnel_ioctl() If 'session' is not NULL and is not a PPP pseudo-wire, then we fail to drop the reference taken by l2tp_session_get(). Fixes: ecd012e45ab5 ("l2tp: filter out non-PPP sessions in pppol2tp_tunnel_ioctl()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e398797878a97..cf6cca260e7b5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1201,13 +1201,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, l2tp_session_get(sock_net(sk), tunnel, stats.session_id); - if (session && session->pwtype == L2TP_PWTYPE_PPP) { - err = pppol2tp_session_ioctl(session, cmd, - arg); + if (!session) { + err = -EBADR; + break; + } + if (session->pwtype != L2TP_PWTYPE_PPP) { l2tp_session_dec_refcount(session); - } else { err = -EBADR; + break; } + + err = pppol2tp_session_ioctl(session, cmd, arg); + l2tp_session_dec_refcount(session); break; } #ifdef CONFIG_XFRM -- cgit 1.2.3-korg From 961b33c244e5ba1543ae26270a1ba29f29c2db83 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Aug 2018 12:52:58 -0700 Subject: jfs: Fix usercopy whitelist for inline inode data Bart Massey reported what turned out to be a usercopy whitelist false positive in JFS when symlink contents exceeded 128 bytes. The inline inode data (i_inline) is actually designed to overflow into the "extended area" following it (i_inline_ea) when needed. So the whitelist needed to be expanded to include both i_inline and i_inline_ea (the whole size of which is calculated internally using IDATASIZE, 256, instead of sizeof(i_inline), 128). $ cd /mnt/jfs $ touch $(perl -e 'print "B" x 250') $ ln -s B* b $ ls -l >/dev/null [ 249.436410] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'jfs_ip' (offset 616, size 250)! Reported-by: Bart Massey Fixes: 8d2704d382a9 ("jfs: Define usercopy region in jfs_ip slab cache") Cc: Dave Kleikamp Cc: jfs-discussion@lists.sourceforge.net Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- fs/jfs/jfs_dinode.h | 7 +++++++ fs/jfs/jfs_incore.h | 1 + fs/jfs/super.c | 3 +-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 395c4c0d0f066..1682a87c00b25 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -115,6 +115,13 @@ struct dinode { dxd_t _dxd; /* 16: */ union { __le32 _rdev; /* 4: */ + /* + * The fast symlink area + * is expected to overflow + * into _inlineea when + * needed (which will clear + * INLINEEA). + */ u8 _fastsymlink[128]; } _u; u8 _inlineea[128]; diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1f26d1910409a..9940a1e04cbfb 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -87,6 +87,7 @@ struct jfs_inode_info { struct { unchar _unused[16]; /* 16: */ dxd_t _dxd; /* 16: */ + /* _inline may overflow into _inline_ea when needed */ unchar _inline[128]; /* 128: inline symlink */ /* _inline_ea may overlay the last part of * file._xtroot if maxentry = XTROOTINITSLOT diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 1b9264fd54b68..f08571433aba2 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -967,8 +967,7 @@ static int __init init_jfs_fs(void) jfs_inode_cachep = kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - offsetof(struct jfs_inode_info, i_inline), - sizeof_field(struct jfs_inode_info, i_inline), + offsetof(struct jfs_inode_info, i_inline), IDATASIZE, init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; -- cgit 1.2.3-korg From 5607016cd1bbec538050b495669c3c8c5a2cee80 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 3 Aug 2018 10:38:33 +0200 Subject: net/smc: no cursor update send in state SMC_INIT If a writer blocked condition is received without data, the current consumer cursor is immediately sent. Servers could already receive this condition in state SMC_INIT without finished tx-setup. This patch avoids sending a consumer cursor update in this case. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7e8d63fc8aeb..9bde1e4ca288c 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -233,7 +233,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* force immediate tx of current consumer cursor, but * under send_lock to guarantee arrival in seqno-order */ - smc_tx_sndbuf_nonempty(conn); + if (smc->sk.sk_state != SMC_INIT) + smc_tx_sndbuf_nonempty(conn); } } -- cgit 1.2.3-korg From 91874ecf32e41b5d86a4cb9d60e0bee50d828058 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sun, 5 Aug 2018 01:35:53 +0100 Subject: netlink: Don't shift on 64 for ngroups It's legal to have 64 groups for netlink_sock. As user-supplied nladdr->nl_groups is __u32, it's possible to subscribe only to first 32 groups. The check for correctness of .bind() userspace supplied parameter is done by applying mask made from ngroups shift. Which broke Android as they have 64 groups and the shift for mask resulted in an overflow. Fixes: 61f4b23769f0 ("netlink: Don't shift with UB on nlk->ngroups") Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Reported-and-Tested-by: Nathan Chancellor Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c09d16870f74a..56704d95f82d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1013,8 +1013,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups == 0) groups = 0; - else - groups &= (1ULL << nlk->ngroups) - 1; + else if (nlk->ngroups < 8*sizeof(groups)) + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- cgit 1.2.3-korg From a32e236eb93e62a0f692e79b7c3c9636689559b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 3 Aug 2018 12:22:09 -0700 Subject: Partially revert "block: fail op_is_write() requests to read-only partitions" It turns out that commit 721c7fc701c7 ("block: fail op_is_write() requests to read-only partitions"), while obviously correct, causes problems for some older lvm2 installations. The reason is that the lvm snapshotting will continue to write to the snapshow COW volume, even after the volume has been marked read-only. End result: snapshot failure. This has actually been fixed in newer version of the lvm2 tool, but the old tools still exist, and the breakage was reported both in the kernel bugzilla and in the Debian bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200439 https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=900442 The lvm2 fix is here https://sourceware.org/git/?p=lvm2.git;a=commit;h=a6fdb9d9d70f51c49ad11a87ab4243344e6701a3 but until everybody has updated to recent versions, we'll have to weaken the "never write to read-only partitions" check. It now allows the write to happen, but causes a warning, something like this: generic_make_request: Trying to write to read-only block-device dm-3 (partno X) Modules linked in: nf_tables xt_cgroup xt_owner kvm_intel iwlmvm kvm irqbypass iwlwifi CPU: 1 PID: 77 Comm: kworker/1:1 Not tainted 4.17.9-gentoo #3 Hardware name: LENOVO 20B6A019RT/20B6A019RT, BIOS GJET91WW (2.41 ) 09/21/2016 Workqueue: ksnaphd do_metadata RIP: 0010:generic_make_request_checks+0x4ac/0x600 ... Call Trace: generic_make_request+0x64/0x400 submit_bio+0x6c/0x140 dispatch_io+0x287/0x430 sync_io+0xc3/0x120 dm_io+0x1f8/0x220 do_metadata+0x1d/0x30 process_one_work+0x1b9/0x3e0 worker_thread+0x2b/0x3c0 kthread+0x113/0x130 ret_from_fork+0x35/0x40 Note that this is a "revert" in behavior only. I'm leaving alone the actual code cleanups in commit 721c7fc701c7, but letting the previously uncaught request go through with a warning instead of stopping it. Fixes: 721c7fc701c7 ("block: fail op_is_write() requests to read-only partitions") Reported-and-tested-by: WGH Acked-by: Mike Snitzer Cc: Sagi Grimberg Cc: Ilya Dryomov Cc: Jens Axboe Cc: Zdenek Kabelac Signed-off-by: Linus Torvalds --- block/blk-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f84a9b7b6f5aa..ee33590f54eb4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2155,11 +2155,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) if (part->policy && op_is_write(bio_op(bio))) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR + WARN_ONCE(1, "generic_make_request: Trying to write " "to read-only block-device %s (partno %d)\n", bio_devname(bio, b), part->partno); - return true; + /* Older lvm-tools actually trigger this */ + return false; } return false; -- cgit 1.2.3-korg From 1ffaddd029c867d134a1dde39f540dcc8c52e274 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Aug 2018 12:37:41 -0700 Subject: Linux 4.18-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 85f3481a56d6c..7a3c4548162b7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Merciless Moray # *DOCUMENTATION* -- cgit 1.2.3-korg