From 9c86336c15db1c48cbaddff56caf2be0a930e991 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 20 Aug 2018 10:51:05 +0800 Subject: ip6_vti: fix a null pointer deference when destroy vti6 tunnel If load ip6_vti module and create a network namespace when set fb_tunnels_only_for_init_net to 1, then exit the namespace will cause following crash: [ 6601.677036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 6601.679057] PGD 8000000425eca067 P4D 8000000425eca067 PUD 424292067 PMD 0 [ 6601.680483] Oops: 0000 [#1] SMP PTI [ 6601.681223] CPU: 7 PID: 93 Comm: kworker/u16:1 Kdump: loaded Tainted: G E 4.18.0+ #3 [ 6601.683153] Hardware name: Fedora Project OpenStack Nova, BIOS seabios-1.7.5-11.el7 04/01/2014 [ 6601.684919] Workqueue: netns cleanup_net [ 6601.685742] RIP: 0010:vti6_exit_batch_net+0x87/0xd0 [ip6_vti] [ 6601.686932] Code: 7b 08 48 89 e6 e8 b9 ea d3 dd 48 8b 1b 48 85 db 75 ec 48 83 c5 08 48 81 fd 00 01 00 00 75 d5 49 8b 84 24 08 01 00 00 48 89 e6 <48> 8b 78 08 e8 90 ea d3 dd 49 8b 45 28 49 39 c6 4c 8d 68 d8 75 a1 [ 6601.690735] RSP: 0018:ffffa897c2737de0 EFLAGS: 00010246 [ 6601.691846] RAX: 0000000000000000 RBX: 0000000000000000 RCX: dead000000000200 [ 6601.693324] RDX: 0000000000000015 RSI: ffffa897c2737de0 RDI: ffffffff9f2ea9e0 [ 6601.694824] RBP: 0000000000000100 R08: 0000000000000000 R09: 0000000000000000 [ 6601.696314] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8dc323c07e00 [ 6601.697812] R13: ffff8dc324a63100 R14: ffffa897c2737e30 R15: ffffa897c2737e30 [ 6601.699345] FS: 0000000000000000(0000) GS:ffff8dc33fdc0000(0000) knlGS:0000000000000000 [ 6601.701068] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6601.702282] CR2: 0000000000000008 CR3: 0000000424966002 CR4: 00000000001606e0 [ 6601.703791] Call Trace: [ 6601.704329] cleanup_net+0x1b4/0x2c0 [ 6601.705268] process_one_work+0x16c/0x370 [ 6601.706145] worker_thread+0x49/0x3e0 [ 6601.706942] kthread+0xf8/0x130 [ 6601.707626] ? rescuer_thread+0x340/0x340 [ 6601.708476] ? kthread_bind+0x10/0x10 [ 6601.709266] ret_from_fork+0x35/0x40 Reproduce: modprobe ip6_vti echo 1 > /proc/sys/net/core/fb_tunnels_only_for_init_net unshare -n exit This because ip6n->tnls_wc[0] point to fallback device in default, but in non-default namespace, ip6n->tnls_wc[0] will be NULL, so add the NULL check comparatively. Fixes: e2948e5af8ee ("ip6_vti: fix creating fallback tunnel device for vti6") Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 38dec9da90d338..5095367c720492 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1094,7 +1094,8 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, list); + if (t) + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) -- cgit 1.2.3-korg From d3bc0fa8411c35194f99046157e2e26fe60e1d91 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 20 Aug 2018 13:55:45 +0200 Subject: fsnotify: fix false positive warning on inode delete When inode is getting deleted and someone else holds reference to a mark attached to the inode, we just detach the connector from the inode. In that case fsnotify_put_mark() called from fsnotify_destroy_marks() will decide to recalculate mask for the inode and __fsnotify_recalc_mask() will WARN about invalid connector type: WARNING: CPU: 1 PID: 12015 at fs/notify/mark.c:139 __fsnotify_recalc_mask+0x2d7/0x350 fs/notify/mark.c:139 Actually there's no reason to warn about detached connector in __fsnotify_recalc_mask() so just silently skip updating the mask in such case. Reported-by: syzbot+c34692a51b9a6ca93540@syzkaller.appspotmail.com Fixes: 3ac70bfcde81 ("fsnotify: add helper to get mask from connector") Signed-off-by: Jan Kara --- fs/notify/mark.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 05506d60131c69..59cdb27826defe 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -132,13 +132,13 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) struct fsnotify_mark *mark; assert_spin_locked(&conn->lock); + /* We can get detached connector here when inode is getting unlinked. */ + if (!fsnotify_valid_obj_type(conn->type)) + return; hlist_for_each_entry(mark, &conn->list, obj_list) { if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) new_mask |= mark->mask; } - if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) - return; - *fsnotify_conn_mask_p(conn) = new_mask; } -- cgit 1.2.3-korg From f1506a69e3e72196c7c5ce4fd420d5e1a6965ed3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 Jul 2018 16:17:49 -0300 Subject: thermal: qoriq: Use devm_thermal_zone_of_sensor_register() By using the managed devm_thermal_zone_of_sensor_register() we can drop the explicit call to thermal_zone_of_sensor_unregister() in the qoriq_tmu_remove() function, which simplifies the code a bit. So switch to devm_thermal_zone_of_sensor_register(). Signed-off-by: Fabio Estevam Reviewed-by: Daniel Lezcano Signed-off-by: Eduardo Valentin --- drivers/thermal/qoriq_thermal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index c866cc16596067..e32d6ac79145fa 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -233,8 +233,9 @@ static int qoriq_tmu_probe(struct platform_device *pdev) if (ret < 0) goto err_tmu; - data->tz = thermal_zone_of_sensor_register(&pdev->dev, data->sensor_id, - data, &tmu_tz_ops); + data->tz = devm_thermal_zone_of_sensor_register(&pdev->dev, + data->sensor_id, + data, &tmu_tz_ops); if (IS_ERR(data->tz)) { ret = PTR_ERR(data->tz); dev_err(&pdev->dev, @@ -261,8 +262,6 @@ static int qoriq_tmu_remove(struct platform_device *pdev) { struct qoriq_tmu_data *data = platform_get_drvdata(pdev); - thermal_zone_of_sensor_unregister(&pdev->dev, data->tz); - /* Disable monitoring */ tmu_write(data, TMR_DISABLE, &data->regs->tmr); -- cgit 1.2.3-korg From 1a893a5a198eff228ddc1a364830f8928b8f9ac5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 Jul 2018 16:17:50 -0300 Subject: thermal: qoriq: Simplify the 'site' variable assignment There is no need to assign zero to the variable 'site' and then perform a compound bitwise OR operation afterwards. Make it simpler by assigning the final 'site' value directly. Signed-off-by: Fabio Estevam Reviewed-by: Daniel Lezcano Signed-off-by: Eduardo Valentin --- drivers/thermal/qoriq_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index e32d6ac79145fa..f807e4d1f72e30 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -197,7 +197,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) int ret; struct qoriq_tmu_data *data; struct device_node *np = pdev->dev.of_node; - u32 site = 0; + u32 site; if (!np) { dev_err(&pdev->dev, "Device OF-Node is NULL"); @@ -244,7 +244,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) } /* Enable monitoring */ - site |= 0x1 << (15 - data->sensor_id); + site = 0x1 << (15 - data->sensor_id); tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr); return 0; -- cgit 1.2.3-korg From 2dfef650217c0e24754cd4c3abbb43e98131a7cf Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 Jul 2018 16:17:51 -0300 Subject: thermal: qoriq: Switch to SPDX identifier Adopt the SPDX license identifier headers to ease license compliance management. Signed-off-by: Fabio Estevam Reviewed-by: Daniel Lezcano Acked-by: Philippe Ombredanne Signed-off-by: Eduardo Valentin --- drivers/thermal/qoriq_thermal.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index f807e4d1f72e30..450ed66edf582b 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -1,16 +1,6 @@ -/* - * Copyright 2016 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright 2016 Freescale Semiconductor, Inc. #include #include -- cgit 1.2.3-korg From f00d25f3154b676fcea4502a25b94bd7f142ca74 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:42 +0300 Subject: qed: Wait for ready indication before rereading the shmem The MFW might be reset and re-update its shared memory. Upon the detection of such a reset the driver rereads this memory, but it has to wait till the data is valid. This patch adds the missing wait for a data ready indication. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 50 +++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index d89a0e22f6e430..bdcacb31d88b26 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -183,18 +183,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn) return 0; } +/* Maximum of 1 sec to wait for the SHMEM ready indication */ +#define QED_MCP_SHMEM_RDY_MAX_RETRIES 20 +#define QED_MCP_SHMEM_RDY_ITER_MS 50 + static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info = p_hwfn->mcp_info; + u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES; + u8 msec = QED_MCP_SHMEM_RDY_ITER_MS; u32 drv_mb_offsize, mfw_mb_offsize; u32 mcp_pf_id = MCP_PF_ID(p_hwfn); p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR); - if (!p_info->public_base) - return 0; + if (!p_info->public_base) { + DP_NOTICE(p_hwfn, + "The address of the MCP scratch-pad is not configured\n"); + return -EINVAL; + } p_info->public_base |= GRCBASE_MCP; + /* Get the MFW MB address and number of supported messages */ + mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, + SECTION_OFFSIZE_ADDR(p_info->public_base, + PUBLIC_MFW_MB)); + p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); + p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, + sup_msgs)); + + /* The driver can notify that there was an MCP reset, and might read the + * SHMEM values before the MFW has completed initializing them. + * To avoid this, the "sup_msgs" field in the MFW mailbox is used as a + * data ready indication. + */ + while (!p_info->mfw_mb_length && --cnt) { + msleep(msec); + p_info->mfw_mb_length = + (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, sup_msgs)); + } + + if (!cnt) { + DP_NOTICE(p_hwfn, + "Failed to get the SHMEM ready notification after %d msec\n", + QED_MCP_SHMEM_RDY_MAX_RETRIES * msec); + return -EBUSY; + } + /* Calculate the driver and MFW mailbox address */ drv_mb_offsize = qed_rd(p_hwfn, p_ptt, SECTION_OFFSIZE_ADDR(p_info->public_base, @@ -204,13 +243,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 0x%x\n", drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id); - /* Set the MFW MB address */ - mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, - SECTION_OFFSIZE_ADDR(p_info->public_base, - PUBLIC_MFW_MB)); - p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); - p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr); - /* Get the current driver mailbox sequence before sending * the first command */ -- cgit 1.2.3-korg From 76271809f49056f079e202bf6513d17b0d6dd34d Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:43 +0300 Subject: qed: Wait for MCP halt and resume commands to take place Successive iterations of halting and resuming the management chip (MCP) might fail, since currently the driver doesn't wait for these operations to actually take place. This patch prevents the driver from moving forward before the operations are reflected in the state register. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 46 +++++++++++++++++++++----- drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bdcacb31d88b26..5f3dbdc7ff1da0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -2109,31 +2109,61 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } +/* A maximal 100 msec waiting time for the MCP to halt */ +#define QED_MCP_HALT_SLEEP_MS 10 +#define QED_MCP_HALT_MAX_RETRIES 10 + int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 resp = 0, param = 0; + u32 resp = 0, param = 0, cpu_state, cnt = 0; int rc; rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp, ¶m); - if (rc) + if (rc) { DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + return rc; + } - return rc; + do { + msleep(QED_MCP_HALT_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) + break; + } while (++cnt < QED_MCP_HALT_MAX_RETRIES); + + if (cnt == QED_MCP_HALT_MAX_RETRIES) { + DP_NOTICE(p_hwfn, + "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state); + return -EBUSY; + } + + return 0; } +#define QED_MCP_RESUME_SLEEP_MS 10 + int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 value, cpu_mode; + u32 cpu_mode, cpu_state; qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff); - value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); - value &= ~MCP_REG_CPU_MODE_SOFT_HALT; - qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value); cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT; + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode); + msleep(QED_MCP_RESUME_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); - return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) { + DP_NOTICE(p_hwfn, + "Failed to resume the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + cpu_mode, cpu_state); + return -EBUSY; + } + + return 0; } int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index d8ad2dcad8d5ef..2279965f8f8a48 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -562,6 +562,7 @@ 0 #define MCP_REG_CPU_STATE \ 0xe05004UL +#define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL #define PGLUE_B_REG_PF_BAR0_SIZE \ -- cgit 1.2.3-korg From eaa50fc59e5841910987e90b0438b2643041f508 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:44 +0300 Subject: qed: Prevent a possible deadlock during driver load and unload The MFW manages an internal lock to prevent concurrent hardware (de)initialization of different PFs. This, together with the busy-waiting for the MFW's responses for commands, might lead to a deadlock during concurrent load or unload of PFs. This patch adds the option to sleep within the busy-waiting, and uses it for the (un)load requests (which are not sent from an interrupt context) to prevent the possible deadlock. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 43 ++++++++++++++++++++++--------- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 21 +++++++++------ 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 5f3dbdc7ff1da0..b7279e625db3b4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -48,7 +48,7 @@ #include "qed_reg_addr.h" #include "qed_sriov.h" -#define CHIP_MCP_RESP_ITER_US 10 +#define QED_MCP_RESP_ITER_US 10 #define QED_DRV_MB_MAX_RETRIES (500 * 1000) /* Account for 5 sec */ #define QED_MCP_RESET_RETRIES (50 * 1000) /* Account for 500 msec */ @@ -317,7 +317,7 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn, int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0; + u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; /* Ensure that only a single thread is accessing the mailbox */ @@ -449,10 +449,10 @@ static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_mb_params *p_mb_params, - u32 max_retries, u32 delay) + u32 max_retries, u32 usecs) { + u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000); struct qed_mcp_cmd_elem *p_cmd_elem; - u32 cnt = 0; u16 seq_num; int rc = 0; @@ -475,7 +475,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, goto err; spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); - udelay(delay); + + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); } while (++cnt < max_retries); if (cnt >= max_retries) { @@ -504,7 +508,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, * The spinlock stays locked until the list element is removed. */ - udelay(delay); + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); if (p_cmd_elem->b_is_completed) @@ -539,7 +547,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n", p_mb_params->mcp_resp, p_mb_params->mcp_param, - (cnt * delay) / 1000, (cnt * delay) % 1000); + (cnt * usecs) / 1000, (cnt * usecs) % 1000); /* Clear the sequence number from the MFW response */ p_mb_params->mcp_resp &= FW_MSG_CODE_MASK; @@ -557,7 +565,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, { size_t union_data_size = sizeof(union drv_union_data); u32 max_retries = QED_DRV_MB_MAX_RETRIES; - u32 delay = CHIP_MCP_RESP_ITER_US; + u32 usecs = QED_MCP_RESP_ITER_US; /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { @@ -574,8 +582,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EINVAL; } + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) { + max_retries = DIV_ROUND_UP(max_retries, 1000); + usecs *= 1000; + } + return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries, - delay); + usecs); } int qed_mcp_cmd(struct qed_hwfn *p_hwfn, @@ -793,6 +806,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); + mb_params.flags = QED_MB_FLAG_CAN_SLEEP; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -1014,7 +1028,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 wol_param, mcp_resp, mcp_param; + struct qed_mcp_mb_params mb_params; + u32 wol_param; switch (p_hwfn->cdev->wol_config) { case QED_OV_WOL_DISABLED: @@ -1032,8 +1047,12 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP; } - return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param, - &mcp_resp, &mcp_param); + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; + mb_params.param = wol_param; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + + return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 047976d5c6e962..b9d3ecf7aad6ec 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -660,14 +660,19 @@ struct qed_mcp_info { }; struct qed_mcp_mb_params { - u32 cmd; - u32 param; - void *p_data_src; - u8 data_src_size; - void *p_data_dst; - u8 data_dst_size; - u32 mcp_resp; - u32 mcp_param; + u32 cmd; + u32 param; + void *p_data_src; + void *p_data_dst; + u8 data_src_size; + u8 data_dst_size; + u32 mcp_resp; + u32 mcp_param; + u32 flags; +#define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAGS_IS_SET(params, flag) \ + ({ typeof(params) __params = (params); \ + (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) }; struct qed_drv_tlv_hdr { -- cgit 1.2.3-korg From b310974e041913231b6e3d5d475d4df55c312301 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:45 +0300 Subject: qed: Avoid sending mailbox commands when MFW is not responsive Keep sending mailbox commands to the MFW when it is not responsive ends up with a redundant amount of timeout expiries. This patch prints the MCP status on the first command which is not responded, and blocks the following commands. Since the (un)load request commands might be not responded due to other PFs, the patch also adds the option to skip the blocking upon a failure. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 52 +++++++++++++++++++++++++- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 6 ++- drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 1 + 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index b7279e625db3b4..5d37ec7e9b0b7b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -320,6 +320,12 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending MCP_RESET mailbox command.\n"); + return -EBUSY; + } + /* Ensure that only a single thread is accessing the mailbox */ spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -445,6 +451,33 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, (p_mb_params->cmd | seq_num), p_mb_params->param); } +static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd) +{ + p_hwfn->mcp_info->b_block_cmd = block_cmd; + + DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n", + block_cmd ? "Block" : "Unblock"); +} + +static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2; + u32 delay = QED_MCP_RESP_ITER_US; + + cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + + DP_NOTICE(p_hwfn, + "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 0x%08x}\n", + cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2); +} + static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -531,11 +564,15 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "The MFW failed to respond to command 0x%08x [param 0x%08x].\n", p_mb_params->cmd, p_mb_params->param); + qed_mcp_print_cpu_info(p_hwfn, p_ptt); spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK)) + qed_mcp_cmd_set_blocking(p_hwfn, true); + return -EAGAIN; } @@ -573,6 +610,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EBUSY; } + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending mailbox command 0x%08x [param 0x%08x].\n", + p_mb_params->cmd, p_mb_params->param); + return -EBUSY; + } + if (p_mb_params->data_src_size > union_data_size || p_mb_params->data_dst_size > union_data_size) { DP_ERR(p_hwfn, @@ -806,7 +850,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); - mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -1050,7 +1094,7 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) memset(&mb_params, 0, sizeof(mb_params)); mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; mb_params.param = wol_param; - mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } @@ -2158,6 +2202,8 @@ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return -EBUSY; } + qed_mcp_cmd_set_blocking(p_hwfn, true); + return 0; } @@ -2182,6 +2228,8 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return -EBUSY; } + qed_mcp_cmd_set_blocking(p_hwfn, false); + return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index b9d3ecf7aad6ec..85e6b3989e7a91 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -635,11 +635,14 @@ struct qed_mcp_info { */ spinlock_t cmd_lock; + /* Flag to indicate whether sending a MFW mailbox command is blocked */ + bool b_block_cmd; + /* Spinlock used for syncing SW link-changes and link-changes * originating from attention context. */ spinlock_t link_lock; - bool block_mb_sending; + u32 public_base; u32 drv_mb_addr; u32 mfw_mb_addr; @@ -670,6 +673,7 @@ struct qed_mcp_mb_params { u32 mcp_param; u32 flags; #define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAG_AVOID_BLOCK (0x1 << 1) #define QED_MB_FLAGS_IS_SET(params, flag) \ ({ typeof(params) __params = (params); \ (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 2279965f8f8a48..f736f70956fd37 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -565,6 +565,7 @@ #define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL +#define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL #define PGLUE_B_REG_PF_BAR0_SIZE \ 0x2aae60UL #define PGLUE_B_REG_PF_BAR1_SIZE \ -- cgit 1.2.3-korg From c954579087f4c0185206cfa777e697874b1e7d13 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 30 Jul 2018 07:56:06 +0000 Subject: thermal: rcar_thermal: convert to SPDX identifiers As original license mentioned, it is GPL-2.0 in SPDX. Then, MODULE_LICENSE() should be "GPL v2" instead of "GPL". See ${LINUX}/include/linux/module.h "GPL" [GNU Public License v2 or later] "GPL v2" [GNU Public License v2] Signed-off-by: Kuninori Morimoto Reviewed-by: Daniel Lezcano Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Eduardo Valentin --- drivers/thermal/rcar_thermal.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index e77e63070e998e..78f932822d381c 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car THS/TSC thermal sensor driver * * Copyright (C) 2012 Renesas Solutions Corp. * Kuninori Morimoto - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include #include @@ -660,6 +648,6 @@ static struct platform_driver rcar_thermal_driver = { }; module_platform_driver(rcar_thermal_driver); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("R-Car THS/TSC thermal sensor driver"); MODULE_AUTHOR("Kuninori Morimoto "); -- cgit 1.2.3-korg From d316522d06e9894429ace4b5f99e181bde4e7bd7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 20 Aug 2018 11:42:35 -0700 Subject: thermal: rcar_gen3_thermal: convert to SPDX identifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Reviewed-by: Daniel Lezcano Reviewed-by: Niklas Söderlund Reviewed-by: Simon Horman Signed-off-by: Eduardo Valentin --- drivers/thermal/rcar_gen3_thermal.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index 766521eb70715a..7aed5337bdd35b 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -1,19 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car Gen3 THS thermal sensor driver * Based on rcar_thermal.c and work from Hien Dang and Khiem Nguyen. * * Copyright (C) 2016 Renesas Electronics Corporation. * Copyright (C) 2016 Sang Engineering - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * */ #include #include -- cgit 1.2.3-korg From 152395fd03d4ce1e535a75cdbf58105e50587611 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 31 Jul 2018 00:56:49 +0800 Subject: thermal: of-thermal: disable passive polling when thermal zone is disabled When thermal zone is in passive mode, disabling its mode from sysfs is NOT taking effect at all, it is still polling the temperature of the disabled thermal zone and handling all thermal trips, it makes user confused. The disabling operation should disable the thermal zone behavior completely, for both active and passive mode, this patch clears the passive_delay when thermal zone is disabled and restores it when it is enabled. Signed-off-by: Anson Huang Signed-off-by: Eduardo Valentin --- drivers/thermal/of-thermal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 977a8307fbb1a4..4f28165592056d 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -260,10 +260,13 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz, mutex_lock(&tz->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED) { tz->polling_delay = data->polling_delay; - else + tz->passive_delay = data->passive_delay; + } else { tz->polling_delay = 0; + tz->passive_delay = 0; + } mutex_unlock(&tz->lock); -- cgit 1.2.3-korg From 176eb614b118c96e7797f5ddefd10708c316f621 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 20 Aug 2018 12:43:51 +0800 Subject: r8152: disable RX aggregation on new Dell TB16 dock There's a new Dell TB16 dock with a different iSerialNumber. Apply the same fix from commit 0b1655143df0 ("r8152: disable RX aggregation on Dell TB16 dock") to this model. BugLink: https://bugs.launchpad.net/bugs/1785780 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 97742708460bc8..2cd71bdb6484c7 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5217,8 +5217,8 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } - if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && - udev->serial && !strcmp(udev->serial, "000001000000")) { + if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && + (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) { dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); } -- cgit 1.2.3-korg From 80f1a0f4e0cd4bfc8a74fc1c39843a6e7b206b95 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Aug 2018 13:02:41 -0700 Subject: net/ipv6: Put lwtstate when destroying fib6_info Prior to the introduction of fib6_info lwtstate was managed by the dst code. With fib6_info releasing lwtstate needs to be done when the struct is freed. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d212738e9d100d..c861a6d4671d3f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -198,6 +198,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head) } } + lwtstate_put(f6i->fib6_nh.nh_lwtstate); + if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); -- cgit 1.2.3-korg From ab08dcd724543896303eae7de6288242bbaff458 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 01:41:56 +0000 Subject: rhashtable: remove duplicated include from rhashtable.c Remove duplicated include. Signed-off-by: Yue Haibing Signed-off-by: David S. Miller --- lib/rhashtable.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ae4223e0f5bcb6..672eecda874a8f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -28,7 +28,6 @@ #include #include #include -#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -- cgit 1.2.3-korg From 09a4e0be5826aa66c4ce9954841f110ffe63ef4f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 16 Aug 2018 21:44:02 -0500 Subject: isofs: reject hardware sector size > 2048 bytes The largest block size supported by isofs is ISOFS_BLOCK_SIZE (2048), but isofs_fill_super calls sb_min_blocksize and sets the blocksize to the device's logical block size if it's larger than what we ended up with after option parsing. If for some reason we try to mount a hard 4k device as an isofs filesystem, we'll set opt.blocksize to 4096, and when we try to read the superblock we found via: block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits) with s_blocksize_bits greater than ISOFS_BLOCK_BITS, we'll have a negative shift and the bread will fail somewhat cryptically: isofs_fill_super: bread failed, dev=sda, iso_blknum=17, block=-2147483648 It seems best to just catch and clearly reject mounts of such a device. Reported-by: Bryan Gurney Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/isofs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index ec3fba7d492f48..488a9e7f8f6602 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "isofs.h" #include "zisofs.h" @@ -653,6 +654,12 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) /* * What if bugger tells us to go beyond page size? */ + if (bdev_logical_block_size(s->s_bdev) > 2048) { + printk(KERN_WARNING + "ISOFS: unsupported/invalid hardware sector size %d\n", + bdev_logical_block_size(s->s_bdev)); + goto out_freesbi; + } opt.blocksize = sb_min_blocksize(s, opt.blocksize); sbi->s_high_sierra = 0; /* default is iso9660 */ -- cgit 1.2.3-korg From 19f5e9e015675fcdbf2c20e804b2e84e80201454 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 20 Aug 2018 10:54:44 +0200 Subject: mmc: atmel-mci: fix bad logic of sg_copy_{from,to}_buffer conversion The conversion to sg_copy_{from,to}_buffer has been done in the wrong way. sg_copy_to_buffer is a copy from an SG list to a linear buffer so it can't replace memcpy(buf + offset, &value, remaining) where buf is the virtual address of the SG. Same for sg_copy_to_buffer but in the opposite way. Signed-off-by: Ludovic Desroches Suggested-by: Douglas Gilbert Fixes: 5b4277814e3f ("mmc: atmel-mci: use sg_copy_{from,to}_buffer") Signed-off-by: Ulf Hansson --- drivers/mmc/host/atmel-mci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 5aa2c9404e926d..be53044086c76f 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1976,7 +1976,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) do { value = atmci_readl(host, ATMCI_RDR); if (likely(offset + 4 <= sg->length)) { - sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset); + sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset); offset += 4; nbytes += 4; @@ -1993,7 +1993,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) } else { unsigned int remaining = sg->length - offset; - sg_pcopy_to_buffer(sg, 1, &value, remaining, offset); + sg_pcopy_from_buffer(sg, 1, &value, remaining, offset); nbytes += remaining; flush_dcache_page(sg_page(sg)); @@ -2003,7 +2003,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) goto done; offset = 4 - remaining; - sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining, + sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining, offset, 0); nbytes += offset; } @@ -2042,7 +2042,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) do { if (likely(offset + 4 <= sg->length)) { - sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset); + sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset); atmci_writel(host, ATMCI_TDR, value); offset += 4; @@ -2059,7 +2059,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) unsigned int remaining = sg->length - offset; value = 0; - sg_pcopy_from_buffer(sg, 1, &value, remaining, offset); + sg_pcopy_to_buffer(sg, 1, &value, remaining, offset); nbytes += remaining; host->sg = sg = sg_next(sg); @@ -2070,7 +2070,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) } offset = 4 - remaining; - sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining, + sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining, offset, 0); atmci_writel(host, ATMCI_TDR, value); nbytes += offset; -- cgit 1.2.3-korg From 17e96d8516e31c3cb52cb8e2ee79d1d2e6948c11 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 20 Aug 2018 10:54:45 +0200 Subject: mmc: android-goldfish: fix bad logic of sg_copy_{from,to}_buffer conversion The conversion to sg_copy_{from,to}_buffer has been done in the wrong way. sg_copy_to_buffer is a copy from an SG list to a linear buffer so it can't replace memcpy(dest, host->virt_base, data->sg->length) where dest is the virtual address of the SG. Same for sg_copy_from_buffer but in the opposite way. Signed-off-by: Ludovic Desroches Suggested-by: Douglas Gilbert Fixes: 53d7e098ba08 ("mmc: android-goldfish: use sg_copy_{from,to}_buffer") Signed-off-by: Ulf Hansson --- drivers/mmc/host/android-goldfish.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c index 294de177632c6e..61e4e2a213c963 100644 --- a/drivers/mmc/host/android-goldfish.c +++ b/drivers/mmc/host/android-goldfish.c @@ -217,7 +217,7 @@ static void goldfish_mmc_xfer_done(struct goldfish_mmc_host *host, * We don't really have DMA, so we need * to copy from our platform driver buffer */ - sg_copy_to_buffer(data->sg, 1, host->virt_base, + sg_copy_from_buffer(data->sg, 1, host->virt_base, data->sg->length); } host->data->bytes_xfered += data->sg->length; @@ -393,7 +393,7 @@ static void goldfish_mmc_prepare_data(struct goldfish_mmc_host *host, * We don't really have DMA, so we need to copy to our * platform driver buffer */ - sg_copy_from_buffer(data->sg, 1, host->virt_base, + sg_copy_to_buffer(data->sg, 1, host->virt_base, data->sg->length); } } -- cgit 1.2.3-korg From 26caddf274cf1e89fd4ce44ab2b8dbc7a7f97681 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Aug 2018 15:05:55 +0300 Subject: mmc: block: Fix unsupported parallel dispatch of requests The mmc block driver does not support parallel dispatch of requests. In normal circumstances, all requests are anyway funneled through a single work item, so parallel dispatch never happens. However it can happen if there is no elevator. Fix that by detecting if a dispatch is in progress and returning busy (BLK_STS_RESOURCE) in that case Fixes: 81196976ed94 ("mmc: block: Add blk-mq support") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 12 +++++++----- drivers/mmc/core/queue.h | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 648eb6743ed588..6edffeed995349 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -238,10 +238,6 @@ static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req, mmc_exit_request(mq->queue, req); } -/* - * We use BLK_MQ_F_BLOCKING and have only 1 hardware queue, which means requests - * will not be dispatched in parallel. - */ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -264,7 +260,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(q->queue_lock); - if (mq->recovery_needed) { + if (mq->recovery_needed || mq->busy) { spin_unlock_irq(q->queue_lock); return BLK_STS_RESOURCE; } @@ -291,6 +287,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, break; } + /* Parallel dispatch of requests is not supported at the moment */ + mq->busy = true; + mq->in_flight[issue_type] += 1; get_card = (mmc_tot_in_flight(mq) == 1); cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); @@ -333,9 +332,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, mq->in_flight[issue_type] -= 1; if (mmc_tot_in_flight(mq) == 0) put_card = true; + mq->busy = false; spin_unlock_irq(q->queue_lock); if (put_card) mmc_put_card(card, &mq->ctx); + } else { + WRITE_ONCE(mq->busy, false); } return ret; diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 17e59d50b49602..9bf3c924507552 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -81,6 +81,7 @@ struct mmc_queue { unsigned int cqe_busy; #define MMC_CQE_DCMD_BUSY BIT(0) #define MMC_CQE_QUEUE_FULL BIT(1) + bool busy; bool use_cqe; bool recovery_needed; bool in_recovery; -- cgit 1.2.3-korg From 51474eff2bc2777061ab3658e014a37dc9d7a775 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Aug 2018 11:57:07 +0200 Subject: Bluetooth: Make BT_HCIUART_RTL configuration option depend on ACPI At the moment we only support ACPI enumeration for serial port attached RTL bluetooth controllers. This commit adds a dependency on ACPI to the BT_HCIUART_RTL configuration option, fixing the following warning when ACPI is not enabled: drivers/bluetooth/hci_h5.c:920:22: warning: 'rtl_vnd' defined but not used Cc: Arnd Bergmann Reported-by: Arnd Bergmann Signed-off-by: Hans de Goede Acked-by: Arnd Bergmann Acked-by: Geert Uytterhoeven Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 2df11cc08a460a..845b0314ce3a7f 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -200,6 +200,7 @@ config BT_HCIUART_RTL depends on BT_HCIUART depends on BT_HCIUART_SERDEV depends on GPIOLIB + depends on ACPI select BT_HCIUART_3WIRE select BT_RTL help -- cgit 1.2.3-korg From addb3ffbca66954fb1d1791d2db2153c403f81af Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 14 Aug 2018 10:10:31 -0500 Subject: Bluetooth: mediatek: Fix memory leak In case memory resources for *fw* were allocated, release them before return. Addresses-Coverity-ID: 1472611 ("Resource leak") Fixes: 7237c4c9ec92 ("Bluetooth: mediatek: Add protocol support for MediaTek serial devices") Signed-off-by: Gustavo A. R. Silva Acked-by: Sean Wang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btmtkuart.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index ed2a5c7cb77fa0..4593baff2bc944 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -144,8 +144,10 @@ static int mtk_setup_fw(struct hci_dev *hdev) fw_size = fw->size; /* The size of patch header is 30 bytes, should be skip */ - if (fw_size < 30) - return -EINVAL; + if (fw_size < 30) { + err = -EINVAL; + goto free_fw; + } fw_size -= 30; fw_ptr += 30; @@ -172,8 +174,8 @@ static int mtk_setup_fw(struct hci_dev *hdev) fw_ptr += dlen; } +free_fw: release_firmware(fw); - return err; } -- cgit 1.2.3-korg From 093dee661d6004738c4cbcbf48835c1e6c6ebae3 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 13:58:08 +0000 Subject: sch_cake: Remove unused including Remove including that don't need it. Signed-off-by: Yue Haibing Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 35fc7252187c1f..4d26b0823cdfc2 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include #include -- cgit 1.2.3-korg From c27f1e2e9f29563cb093e96261e87c1ef83aeb98 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 14:05:42 +0000 Subject: rds: tcp: remove duplicated include from tcp.c Remove duplicated include. Signed-off-by: Yue Haibing Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2c7b7c352d3e8c..b9bbcf3d6c6397 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "rds.h" -- cgit 1.2.3-korg From b93c1b5ac8643cc08bb74fa8ae21d6c63dfcb23d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 21 Aug 2018 10:40:38 -0700 Subject: hv_netvsc: ignore devices that are not PCI Registering another device with same MAC address (such as TAP, VPN or DPDK KNI) will confuse the VF autobinding logic. Restrict the search to only run if the device is known to be a PCI attached VF. Fixes: e8ff40d4bff1 ("hv_netvsc: improve VF device matching") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 507f68190cb1b4..1121a1ec407cd0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -2039,12 +2040,16 @@ static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct net_device_context *net_device_ctx; + struct device *pdev = vf_netdev->dev.parent; struct netvsc_device *netvsc_dev; int ret; if (vf_netdev->addr_len != ETH_ALEN) return NOTIFY_DONE; + if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev)) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching -- cgit 1.2.3-korg From edfaf94fa705181eeb2fe0c36c0b902dedbd40f1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:05 -0700 Subject: net_sched: improve and refactor tcf_action_put_many() tcf_action_put_many() is mostly called to clean up actions on failure path, but tcf_action_put_many(&actions[acts_deleted]) is used in the ugliest way: it passes a slice of the array and uses an additional NULL at the end to avoid out-of-bound access. acts_deleted is completely unnecessary since we can teach tcf_action_put_many() scan the whole array and checks against NULL pointer. Which also means tcf_action_delete() should set deleted action pointers to NULL to avoid double free. Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to actions") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 229d63c99be23b..cd69a6afcf880e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -686,14 +686,18 @@ static int tcf_action_put(struct tc_action *p) return __tcf_action_put(p, false); } +/* Put all actions in this array, skip those NULL's. */ static void tcf_action_put_many(struct tc_action *actions[]) { int i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { struct tc_action *a = actions[i]; - const struct tc_action_ops *ops = a->ops; + const struct tc_action_ops *ops; + if (!a) + continue; + ops = a->ops; if (tcf_action_put(a)) module_put(ops->owner); } @@ -1176,7 +1180,7 @@ err_out: } static int tcf_action_delete(struct net *net, struct tc_action *actions[], - int *acts_deleted, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { u32 act_index; int ret, i; @@ -1196,20 +1200,17 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[], } else { /* now do the delete */ ret = ops->delete(net, act_index); - if (ret < 0) { - *acts_deleted = i + 1; + if (ret < 0) return ret; - } } + actions[i] = NULL; } - *acts_deleted = i; return 0; } static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - int *acts_deleted, u32 portid, size_t attr_size, - struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; @@ -1227,7 +1228,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } /* now do the delete */ - ret = tcf_action_delete(net, actions, acts_deleted, extack); + ret = tcf_action_delete(net, actions, extack); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); @@ -1249,8 +1250,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t attr_size = 0; - struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {}; - int acts_deleted = 0; + struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) @@ -1280,14 +1280,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, actions, &acts_deleted, portid, - attr_size, extack); + ret = tcf_del_notify(net, n, actions, portid, attr_size, extack); if (ret) goto err; - return ret; + return 0; } err: - tcf_action_put_many(&actions[acts_deleted]); + tcf_action_put_many(actions); return ret; } -- cgit 1.2.3-korg From 97a3f84f2c84f81b859aedd2c186df09c2ee21a6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:06 -0700 Subject: net_sched: remove unnecessary ops->delete() All ops->delete() wants is getting the tn->idrinfo, but we already have tc_action before calling ops->delete(), and tc_action has a pointer ->idrinfo. More importantly, each type of action does the same thing, that is, just calling tcf_idr_delete_index(). So it can be just removed. Fixes: b409074e6693 ("net: sched: add 'delete' function to action ops") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 15 +++++++-------- net/sched/act_bpf.c | 8 -------- net/sched/act_connmark.c | 8 -------- net/sched/act_csum.c | 8 -------- net/sched/act_gact.c | 8 -------- net/sched/act_ife.c | 8 -------- net/sched/act_ipt.c | 16 ---------------- net/sched/act_mirred.c | 8 -------- net/sched/act_nat.c | 8 -------- net/sched/act_pedit.c | 8 -------- net/sched/act_police.c | 8 -------- net/sched/act_sample.c | 8 -------- net/sched/act_simple.c | 8 -------- net/sched/act_skbedit.c | 8 -------- net/sched/act_skbmod.c | 8 -------- net/sched/act_tunnel_key.c | 8 -------- net/sched/act_vlan.c | 8 -------- 18 files changed, 7 insertions(+), 146 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 1ad5b19e83a95d..e32708491d8359 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -102,7 +102,6 @@ struct tc_action_ops { size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); void (*put_dev)(struct net_device *dev); - int (*delete)(struct net *net, u32 index); }; struct tc_action_net { @@ -158,7 +157,6 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index); int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); static inline int tcf_idr_release(struct tc_action *a, bool bind) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cd69a6afcf880e..00bf7d2b0bdd1f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -337,9 +337,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } EXPORT_SYMBOL(tcf_idr_check); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index) +static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { - struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret = 0; @@ -370,7 +369,6 @@ int tcf_idr_delete_index(struct tc_action_net *tn, u32 index) spin_unlock(&idrinfo->lock); return ret; } -EXPORT_SYMBOL(tcf_idr_delete_index); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, @@ -1182,24 +1180,25 @@ err_out: static int tcf_action_delete(struct net *net, struct tc_action *actions[], struct netlink_ext_ack *extack) { - u32 act_index; - int ret, i; + int i; for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { struct tc_action *a = actions[i]; const struct tc_action_ops *ops = a->ops; - /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. */ - act_index = a->tcfa_index; + struct tcf_idrinfo *idrinfo = a->idrinfo; + u32 act_index = a->tcfa_index; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); } else { + int ret; + /* now do the delete */ - ret = ops->delete(net, act_index); + ret = tcf_idr_delete_index(idrinfo, act_index); if (ret < 0) return ret; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index d30b23e4243643..0c68bc9cf0b4df 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -395,13 +395,6 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_bpf_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .type = TCA_ACT_BPF, @@ -412,7 +405,6 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .init = tcf_bpf_init, .walk = tcf_bpf_walker, .lookup = tcf_bpf_search, - .delete = tcf_bpf_delete, .size = sizeof(struct tcf_bpf), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 54c0bf54f2acf7..6f0f273f1139f8 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -198,13 +198,6 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_connmark_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .type = TCA_ACT_CONNMARK, @@ -214,7 +207,6 @@ static struct tc_action_ops act_connmark_ops = { .init = tcf_connmark_init, .walk = tcf_connmark_walker, .lookup = tcf_connmark_search, - .delete = tcf_connmark_delete, .size = sizeof(struct tcf_connmark_info), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e698d3fe2080d1..b8a67ae3105ad1 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -659,13 +659,6 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_csum)); } -static int tcf_csum_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_csum_ops = { .kind = "csum", .type = TCA_ACT_CSUM, @@ -677,7 +670,6 @@ static struct tc_action_ops act_csum_ops = { .walk = tcf_csum_walker, .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, - .delete = tcf_csum_delete, .size = sizeof(struct tcf_csum), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 6a3f25a8ffb30b..cd1d9bd32ef9af 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -243,13 +243,6 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) return sz; } -static int tcf_gact_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, @@ -261,7 +254,6 @@ static struct tc_action_ops act_gact_ops = { .walk = tcf_gact_walker, .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, - .delete = tcf_gact_delete, .size = sizeof(struct tcf_gact), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index d1081bdf1bdb55..92fcf8ba5bca14 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -853,13 +853,6 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_ife_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_ife_ops = { .kind = "ife", .type = TCA_ACT_IFE, @@ -870,7 +863,6 @@ static struct tc_action_ops act_ife_ops = { .init = tcf_ife_init, .walk = tcf_ife_walker, .lookup = tcf_ife_search, - .delete = tcf_ife_delete, .size = sizeof(struct tcf_ife_info), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 51f235bbeb5bcc..23273b5303fd9d 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -337,13 +337,6 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_ipt_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .type = TCA_ACT_IPT, @@ -354,7 +347,6 @@ static struct tc_action_ops act_ipt_ops = { .init = tcf_ipt_init, .walk = tcf_ipt_walker, .lookup = tcf_ipt_search, - .delete = tcf_ipt_delete, .size = sizeof(struct tcf_ipt), }; @@ -395,13 +387,6 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_xt_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_xt_ops = { .kind = "xt", .type = TCA_ACT_XT, @@ -412,7 +397,6 @@ static struct tc_action_ops act_xt_ops = { .init = tcf_xt_init, .walk = tcf_xt_walker, .lookup = tcf_xt_search, - .delete = tcf_xt_delete, .size = sizeof(struct tcf_ipt), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 38fd20f10f6796..8bf66d0a680000 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -395,13 +395,6 @@ static void tcf_mirred_put_dev(struct net_device *dev) dev_put(dev); } -static int tcf_mirred_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .type = TCA_ACT_MIRRED, @@ -416,7 +409,6 @@ static struct tc_action_ops act_mirred_ops = { .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, .put_dev = tcf_mirred_put_dev, - .delete = tcf_mirred_delete, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 822e903bfc25f0..4313aa102440e9 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -300,13 +300,6 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_nat_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_nat_ops = { .kind = "nat", .type = TCA_ACT_NAT, @@ -316,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .init = tcf_nat_init, .walk = tcf_nat_walker, .lookup = tcf_nat_search, - .delete = tcf_nat_delete, .size = sizeof(struct tcf_nat), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 8a7a7cb94e8308..10703407001972 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -460,13 +460,6 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_pedit_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .type = TCA_ACT_PEDIT, @@ -477,7 +470,6 @@ static struct tc_action_ops act_pedit_ops = { .init = tcf_pedit_init, .walk = tcf_pedit_walker, .lookup = tcf_pedit_search, - .delete = tcf_pedit_delete, .size = sizeof(struct tcf_pedit), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 06f0742db59316..5d8bfa878477e8 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -320,13 +320,6 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_police_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, police_net_id); - - return tcf_idr_delete_index(tn, index); -} - MODULE_AUTHOR("Alexey Kuznetsov"); MODULE_DESCRIPTION("Policing actions"); MODULE_LICENSE("GPL"); @@ -340,7 +333,6 @@ static struct tc_action_ops act_police_ops = { .init = tcf_police_init, .walk = tcf_police_walker, .lookup = tcf_police_search, - .delete = tcf_police_delete, .size = sizeof(struct tcf_police), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 207b4132d1b066..44e9c00657bc1e 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -232,13 +232,6 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_sample_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_sample_ops = { .kind = "sample", .type = TCA_ACT_SAMPLE, @@ -249,7 +242,6 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, - .delete = tcf_sample_delete, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e616523ba3c15f..52400d49f81f23 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -196,13 +196,6 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_simp_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_simp_ops = { .kind = "simple", .type = TCA_ACT_SIMP, @@ -213,7 +206,6 @@ static struct tc_action_ops act_simp_ops = { .init = tcf_simp_init, .walk = tcf_simp_walker, .lookup = tcf_simp_search, - .delete = tcf_simp_delete, .size = sizeof(struct tcf_defact), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 926d7bc4a89d9d..73e44ce2a8837d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -299,13 +299,6 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_skbedit_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .type = TCA_ACT_SKBEDIT, @@ -316,7 +309,6 @@ static struct tc_action_ops act_skbedit_ops = { .cleanup = tcf_skbedit_cleanup, .walk = tcf_skbedit_walker, .lookup = tcf_skbedit_search, - .delete = tcf_skbedit_delete, .size = sizeof(struct tcf_skbedit), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d6a1af0c41712c..588077fafd6cc5 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -259,13 +259,6 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_skbmod_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .type = TCA_ACT_SKBMOD, @@ -276,7 +269,6 @@ static struct tc_action_ops act_skbmod_ops = { .cleanup = tcf_skbmod_cleanup, .walk = tcf_skbmod_walker, .lookup = tcf_skbmod_search, - .delete = tcf_skbmod_delete, .size = sizeof(struct tcf_skbmod), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8f09cf08d8fe12..420759153d5f44 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -548,13 +548,6 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tunnel_key_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", .type = TCA_ACT_TUNNEL_KEY, @@ -565,7 +558,6 @@ static struct tc_action_ops act_tunnel_key_ops = { .cleanup = tunnel_key_release, .walk = tunnel_key_walker, .lookup = tunnel_key_search, - .delete = tunnel_key_delete, .size = sizeof(struct tcf_tunnel_key), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 209e70ad2c09a0..033d273afe5023 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -296,13 +296,6 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_vlan_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .type = TCA_ACT_VLAN, @@ -313,7 +306,6 @@ static struct tc_action_ops act_vlan_ops = { .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, - .delete = tcf_vlan_delete, .size = sizeof(struct tcf_vlan), }; -- cgit 1.2.3-korg From b144e7ec51a132eac00a68bf897b6349d810022f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:07 -0700 Subject: net_sched: remove unused parameter for tcf_action_delete() Fixes: 16af6067392c ("net: sched: implement reference counted action release") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 00bf7d2b0bdd1f..ba55226928a3ce 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1177,8 +1177,7 @@ err_out: return err; } -static int tcf_action_delete(struct net *net, struct tc_action *actions[], - struct netlink_ext_ack *extack) +static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { int i; @@ -1227,7 +1226,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } /* now do the delete */ - ret = tcf_action_delete(net, actions, extack); + ret = tcf_action_delete(net, actions); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); -- cgit 1.2.3-korg From 7d485c451fc82f8ae431cdb379521bc6d0641064 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:08 -0700 Subject: net_sched: remove unused tcf_idr_check() tcf_idr_check() is replaced by tcf_idr_check_alloc(), and __tcf_idr_check() now can be folded into tcf_idr_search(). Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 22 +++------------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index e32708491d8359..eaa0e8b93d5b31 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -147,8 +147,6 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ba55226928a3ce..d76948f02a0282 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -300,21 +300,17 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, - struct tc_action **a, int bind) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; spin_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); - if (IS_ERR(p)) { + if (IS_ERR(p)) p = NULL; - } else if (p) { + else if (p) refcount_inc(&p->tcfa_refcnt); - if (bind) - atomic_inc(&p->tcfa_bindcnt); - } spin_unlock(&idrinfo->lock); if (p) { @@ -323,20 +319,8 @@ static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, } return false; } - -int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) -{ - return __tcf_idr_check(tn, index, a, 0); -} EXPORT_SYMBOL(tcf_idr_search); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) -{ - return __tcf_idr_check(tn, index, a, bind); -} -EXPORT_SYMBOL(tcf_idr_check); - static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; -- cgit 1.2.3-korg From 244cd96adb5f5ab39551081fb1f9009a54bb12ee Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:09 -0700 Subject: net_sched: remove list_head from tc_action After commit 90b73b77d08e, list_head is no longer needed. Now we just need to convert the list iteration to array iteration for drivers. Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to actions") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 ++---- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 10 ++++----- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 5 ++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++---- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 ++++++++-------- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 +-- .../net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++---- drivers/net/ethernet/netronome/nfp/flower/action.c | 6 ++---- drivers/net/ethernet/qlogic/qede/qede_filter.c | 6 ++---- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 5 ++--- include/net/act_api.h | 1 - include/net/pkt_cls.h | 25 ++++++++++++---------- net/dsa/slave.c | 4 +--- net/sched/act_api.c | 1 - 14 files changed, 43 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 139d96c5a02355..092c817f8f11cd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -110,16 +110,14 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, struct tcf_exts *tc_exts) { const struct tc_action *tc_act; - LIST_HEAD(tc_actions); - int rc; + int i, rc; if (!tcf_exts_has_actions(tc_exts)) { netdev_info(bp->dev, "no actions"); return -EINVAL; } - tcf_exts_to_list(tc_exts, &tc_actions); - list_for_each_entry(tc_act, &tc_actions, list) { + tcf_exts_for_each_action(i, tc_act, tc_exts) { /* Drop action */ if (is_tcf_gact_shot(tc_act)) { actions->flags |= BNXT_TC_ACTION_FLAG_DROP; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 623f73dd7738db..c116f96956fe93 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -417,10 +417,9 @@ static void cxgb4_process_flow_actions(struct net_device *in, struct ch_filter_specification *fs) { const struct tc_action *a; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { fs->action = FILTER_PASS; } else if (is_tcf_gact_shot(a)) { @@ -591,10 +590,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, bool act_redir = false; bool act_pedit = false; bool act_vlan = false; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { /* Do nothing */ } else if (is_tcf_gact_shot(a)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 18eb2aedd4cb0a..c7d2b4dc7568e7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -93,14 +93,13 @@ static int fill_action_fields(struct adapter *adap, unsigned int num_actions = 0; const struct tc_action *a; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { /* Don't allow more than one action per rule. */ if (num_actions) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 44709800549092..af4c9ae7f432d0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9171,14 +9171,12 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; - LIST_HEAD(actions); + int i; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - + tcf_exts_for_each_action(i, a, exts) { /* Drop action */ if (is_tcf_gact_shot(a)) { *action = IXGBE_FDIR_DROP_QUEUE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9131a1376e7dc5..9fed54017659de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1982,14 +1982,15 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { + int k; + if (!is_tcf_pedit(a)) continue; nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); + for (k = 0; k < nkeys; k++) { + htype = tcf_pedit_htype(a, k); if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { modify_ip_header = true; @@ -2053,15 +2054,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, const struct tc_action *a; LIST_HEAD(actions); u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -2666,7 +2666,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); bool encap = false; u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; @@ -2674,8 +2674,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6070d1591d1e77..930700413b1d07 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1346,8 +1346,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(f->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(f->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct mlxsw_sp_port_mall_mirror_tc_entry *mirror; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ebd1b24ebaa5dd..8d211972c5e90f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -21,8 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { const struct tc_action *a; - LIST_HEAD(actions); - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return 0; @@ -32,8 +31,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_ok(a)) { err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 0ba0356ec4e6dc..9044496803e627 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -796,11 +796,10 @@ int nfp_flower_compile_action(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err, tun_out_cnt, out_cnt; + int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; enum nfp_flower_tun_type tun_type; const struct tc_action *a; u32 csum_updated = 0; - LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; @@ -810,8 +809,7 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - tcf_exts_to_list(flow->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, flow->exts) { err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 9673d19308e65c..b16ce7d93caff5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2006,18 +2006,16 @@ unlock: static int qede_parse_actions(struct qede_dev *edev, struct tcf_exts *exts) { - int rc = -EINVAL, num_act = 0; + int rc = -EINVAL, num_act = 0, i; const struct tc_action *a; bool is_drop = false; - LIST_HEAD(actions); if (!tcf_exts_has_actions(exts)) { DP_NOTICE(edev, "No tc actions received\n"); return rc; } - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { num_act++; if (is_tcf_gact_shot(a)) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1a96dd9c1091e6..531294f4978bc4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -61,7 +61,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, struct stmmac_tc_entry *action_entry = entry; const struct tc_action *act; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) @@ -69,8 +69,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, if (frag) action_entry = frag; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(act, &actions, list) { + tcf_exts_for_each_action(i, act, exts) { /* Accept */ if (is_tcf_gact_ok(act)) { action_entry->val.af = 1; diff --git a/include/net/act_api.h b/include/net/act_api.h index eaa0e8b93d5b31..f9c4b871af8840 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -23,7 +23,6 @@ struct tc_action { const struct tc_action_ops *ops; __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; - struct list_head list; struct tcf_idrinfo *idrinfo; u32 tcfa_index; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ef727f71336e7e..c17d5186546950 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -298,19 +298,13 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) #endif } -static inline void tcf_exts_to_list(const struct tcf_exts *exts, - struct list_head *actions) -{ #ifdef CONFIG_NET_CLS_ACT - int i; - - for (i = 0; i < exts->nr_actions; i++) { - struct tc_action *a = exts->actions[i]; - - list_add_tail(&a->list, actions); - } +#define tcf_exts_for_each_action(i, a, exts) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++) +#else +#define tcf_exts_for_each_action(i, a, exts) \ + for (; 0; ) #endif -} static inline void tcf_exts_stats_update(const struct tcf_exts *exts, @@ -361,6 +355,15 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->actions[0]; +#else + return NULL; +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 962c4fd338ba57..1c45c1d6d241db 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -767,7 +767,6 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, const struct tc_action *a; struct dsa_port *to_dp; int err = -EOPNOTSUPP; - LIST_HEAD(actions); if (!ds->ops->port_mirror_add) return err; @@ -775,8 +774,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!tcf_exts_has_one_action(cls->exts)) return err; - tcf_exts_to_list(cls->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(cls->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct dsa_mall_mirror_tc_entry *mirror; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d76948f02a0282..db83dac1e7f488 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -391,7 +391,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->idrinfo = idrinfo; p->ops = ops; - INIT_LIST_HEAD(&p->list); *a = p; return 0; err3: -- cgit 1.2.3-korg From a0c2e90fe131d9a7440ac05c9c31fc35dfac2fa8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:10 -0700 Subject: net_sched: remove unused tcfa_capab Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index f9c4b871af8840..970303448c9029 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -28,7 +28,6 @@ struct tc_action { u32 tcfa_index; refcount_t tcfa_refcnt; atomic_t tcfa_bindcnt; - u32 tcfa_capab; int tcfa_action; struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; @@ -43,7 +42,6 @@ struct tc_action { #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt -#define tcf_capab common.tcfa_capab #define tcf_action common.tcfa_action #define tcf_tm common.tcfa_tm #define tcf_bstats common.tcfa_bstats -- cgit 1.2.3-korg From 8ce5be1c899d31f3cd047e5f707cd9dbfb81e076 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:11 -0700 Subject: Revert "net: sched: act_ife: disable bh when taking ife_mod_lock" This reverts commit 42c625a486f3 ("net: sched: act_ife: disable bh when taking ife_mod_lock"), because what ife_mod_lock protects is absolutely not touched in rate est timer BH context, they have no race. A better fix is following up. Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 92fcf8ba5bca14..9decbb74b3ac83 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -167,16 +167,16 @@ static struct tcf_meta_ops *find_ife_oplist(u16 metaid) { struct tcf_meta_ops *o; - read_lock_bh(&ife_mod_lock); + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { if (o->metaid == metaid) { if (!try_module_get(o->owner)) o = NULL; - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); return o; } } - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); return NULL; } @@ -190,12 +190,12 @@ int register_ife_op(struct tcf_meta_ops *mops) !mops->get || !mops->alloc) return -EINVAL; - write_lock_bh(&ife_mod_lock); + write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid || (strcmp(mops->name, m->name) == 0)) { - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return -EEXIST; } } @@ -204,7 +204,7 @@ int register_ife_op(struct tcf_meta_ops *mops) mops->release = ife_release_meta_gen; list_add_tail(&mops->list, &ifeoplist); - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return 0; } EXPORT_SYMBOL_GPL(unregister_ife_op); @@ -214,7 +214,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops) struct tcf_meta_ops *m; int err = -ENOENT; - write_lock_bh(&ife_mod_lock); + write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid) { list_del(&mops->list); @@ -222,7 +222,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops) break; } } - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return err; } @@ -343,13 +343,13 @@ static int use_all_metadata(struct tcf_ife_info *ife) int rc = 0; int installed = 0; - read_lock_bh(&ife_mod_lock); + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); if (installed) return 0; -- cgit 1.2.3-korg From 4e407ff5cd67ec76eeeea1deec227b7982dc7f66 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:12 -0700 Subject: act_ife: move tcfa_lock down to where necessary The only time we need to take tcfa_lock is when adding a new metainfo to an existing ife->metalist. We don't need to take tcfa_lock so early and so broadly in tcf_ife_init(). This means we can always take ife_mod_lock first, avoid the reverse locking ordering warning as reported by Vlad. Reported-by: Vlad Buslov Tested-by: Vlad Buslov Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9decbb74b3ac83..244a8cf4818350 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -265,11 +265,8 @@ static const char *ife_meta_id2name(u32 metaid) #endif /* called when adding new meta information - * under ife->tcf_lock for existing action */ -static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len, bool exists, - bool rtnl_held) +static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -277,15 +274,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - if (exists) - spin_unlock_bh(&ife->tcf_lock); if (rtnl_held) rtnl_unlock(); request_module("ife-meta-%s", ife_meta_id2name(metaid)); if (rtnl_held) rtnl_lock(); - if (exists) - spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -302,10 +295,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic) + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -332,12 +324,16 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, } } + if (exists) + spin_lock_bh(&ife->tcf_lock); list_add_tail(&mi->metalist, &ife->metalist); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; @@ -345,7 +341,7 @@ static int use_all_metadata(struct tcf_ife_info *ife) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true); + rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -422,7 +418,6 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } -/* under ife->tcf_lock for existing action */ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -436,12 +431,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len, exists, - rtnl_held); + rc = load_metaops_and_vet(i, val, len, rtnl_held); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, exists); + rc = add_metainfo(ife, i, val, len, false, exists); if (rc) return rc; } @@ -540,8 +534,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, p->eth_type = ife_type; } - if (exists) - spin_lock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); @@ -551,10 +543,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL, NULL); if (err) { metadata_parse_err: - if (exists) - spin_unlock_bh(&ife->tcf_lock); tcf_idr_release(*a, bind); - kfree(p); return err; } @@ -569,17 +558,16 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { - if (exists) - spin_unlock_bh(&ife->tcf_lock); tcf_idr_release(*a, bind); - kfree(p); return err; } } + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ rcu_swap_protected(ife->params, p, 1); -- cgit 1.2.3-korg From 5ffe57da29b3802baeddaa40909682bbb4cb4d48 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:13 -0700 Subject: act_ife: fix a potential deadlock use_all_metadata() acquires read_lock(&ife_mod_lock), then calls add_metainfo() which calls find_ife_oplist() which acquires the same lock again. Deadlock! Introduce __add_metainfo() which accepts struct tcf_meta_ops *ops as an additional parameter and let its callers to decide how to find it. For use_all_metadata(), it already has ops, no need to find it again, just call __add_metainfo() directly. And, as ife_mod_lock is only needed for find_ife_oplist(), this means we can make non-atomic allocation for populate_metalist() now. Fixes: 817e9f2c5c26 ("act_ife: acquire ife_mod_lock before reading ifeoplist") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 244a8cf4818350..196430aefe87a3 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -296,22 +296,16 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) /* called when adding new meta information */ -static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic, bool exists) +static int __add_metainfo(const struct tcf_meta_ops *ops, + struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; - struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; - if (!ops) - return -ENOENT; - mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); - if (!mi) { - /*put back what find_ife_oplist took */ - module_put(ops->owner); + if (!mi) return -ENOMEM; - } mi->metaid = metaid; mi->ops = ops; @@ -319,7 +313,6 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); - module_put(ops->owner); return ret; } } @@ -333,6 +326,21 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } +static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool exists) +{ + const struct tcf_meta_ops *ops = find_ife_oplist(metaid); + int ret; + + if (!ops) + return -ENOENT; + ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); + if (ret) + /*put back what find_ife_oplist took */ + module_put(ops->owner); + return ret; +} + static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; @@ -341,7 +349,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); + rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -435,7 +443,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, false, exists); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } -- cgit 1.2.3-korg From 96c26e04581667e3cd17ed74c2fc3499afea49b8 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Mon, 20 Aug 2018 09:54:25 +0900 Subject: xsk: fix return value of xdp_umem_assign_dev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/ENOTSUPP/EOPNOTSUPP/ in function umem_assign_dev(). This function's return value is directly returned by xsk_bind(). EOPNOTSUPP is bind()'s possible return value. Fixes: f734607e819b ("xsk: refactor xdp_umem_assign_dev()") Signed-off-by: Prashant Bhole Acked-by: Song Liu Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 911ca6d3cb5a6c..bfe2dbea480ba8 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -74,14 +74,14 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, return 0; if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) - return force_zc ? -ENOTSUPP : 0; /* fail or fallback */ + return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */ bpf.command = XDP_QUERY_XSK_UMEM; rtnl_lock(); err = xdp_umem_query(dev, queue_id); if (err) { - err = err < 0 ? -ENOTSUPP : -EBUSY; + err = err < 0 ? -EOPNOTSUPP : -EBUSY; goto err_rtnl_unlock; } -- cgit 1.2.3-korg From 64d9d13828c6c8e188bba63794eee923df3d69a9 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 01:37:30 +0000 Subject: fs/quota: Replace XQM_MAXQUOTAS usage with MAXQUOTAS XQM_MAXQUOTAS and MAXQUOTAS are, it appears, equivalent. Replace all usage of XQM_MAXQUOTAS and remove it along with the unused XQM_*QUOTA definitions. Signed-off-by: Jeremy Cline Signed-off-by: Jan Kara --- fs/quota/quota.c | 12 +++++------- include/linux/quota.h | 8 +------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 860bfbe7a07aa5..d403392d8a0f10 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -120,8 +120,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr) struct if_dqinfo uinfo; int ret; - /* This checks whether qc_state has enough entries... */ - BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS); if (!sb->s_qcop->get_state) return -ENOSYS; ret = sb->s_qcop->get_state(sb, &state); @@ -354,10 +352,10 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) * GETXSTATE quotactl has space for just one set of time limits so * report them for the first enabled quota type */ - for (type = 0; type < XQM_MAXQUOTAS; type++) + for (type = 0; type < MAXQUOTAS; type++) if (state.s_state[type].flags & QCI_ACCT_ENABLED) break; - BUG_ON(type == XQM_MAXQUOTAS); + BUG_ON(type == MAXQUOTAS); fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -427,10 +425,10 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) * GETXSTATV quotactl has space for just one set of time limits so * report them for the first enabled quota type */ - for (type = 0; type < XQM_MAXQUOTAS; type++) + for (type = 0; type < MAXQUOTAS; type++) if (state.s_state[type].flags & QCI_ACCT_ENABLED) break; - BUG_ON(type == XQM_MAXQUOTAS); + BUG_ON(type == MAXQUOTAS); fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -701,7 +699,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, { int ret; - if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) + if (type >= MAXQUOTAS) return -EINVAL; /* * Quota not supported on this fs? Check this before s_quota_types diff --git a/include/linux/quota.h b/include/linux/quota.h index ca9772c8e48b06..f32dd270b8e3f2 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -408,13 +408,7 @@ struct qc_type_state { struct qc_state { unsigned int s_incoredqs; /* Number of dquots in core */ - /* - * Per quota type information. The array should really have - * max(MAXQUOTAS, XQM_MAXQUOTAS) entries. BUILD_BUG_ON in - * quota_getinfo() makes sure XQM_MAXQUOTAS is large enough. Once VFS - * supports project quotas, this can be changed to MAXQUOTAS - */ - struct qc_type_state s_state[XQM_MAXQUOTAS]; + struct qc_type_state s_state[MAXQUOTAS]; /* Per quota type information */ }; /* Structure for communicating via ->set_info */ -- cgit 1.2.3-korg From 7b6924d94a60c6b8c1279ca003e8744e6cd9e8b1 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 01:37:31 +0000 Subject: fs/quota: Fix spectre gadget in do_quotactl 'type' is user-controlled, so sanitize it after the bounds check to avoid using it in speculative execution. This covers the following potential gadgets detected with the help of smatch: * fs/ext4/super.c:5741 ext4_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/ext4/super.c:5778 ext4_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1552 f2fs_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1608 f2fs_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/quota/dquot.c:412 mark_info_dirty() warn: potential spectre issue 'sb_dqopt(sb)->info' [w] * fs/quota/dquot.c:933 dqinit_needed() warn: potential spectre issue 'dquots' [r] * fs/quota/dquot.c:2112 dquot_commit_info() warn: potential spectre issue 'dqopt->ops' [r] * fs/quota/dquot.c:2362 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->files' [w] (local cap) * fs/quota/dquot.c:2369 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->ops' [w] (local cap) * fs/quota/dquot.c:2370 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->info' [w] (local cap) * fs/quota/quota.c:110 quota_getfmt() warn: potential spectre issue 'sb_dqopt(sb)->info' [r] * fs/quota/quota_v2.c:84 v2_check_quota_file() warn: potential spectre issue 'quota_magics' [w] * fs/quota/quota_v2.c:85 v2_check_quota_file() warn: potential spectre issue 'quota_versions' [w] * fs/quota/quota_v2.c:96 v2_read_file_info() warn: potential spectre issue 'dqopt->info' [r] * fs/quota/quota_v2.c:172 v2_write_file_info() warn: potential spectre issue 'dqopt->info' [r] Additionally, a quick inspection indicates there are array accesses with 'type' in quota_on() and quota_off() functions which are also addressed by this. Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Jan Kara --- fs/quota/quota.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index d403392d8a0f10..f0cbf58ad4dade 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,7 @@ #include #include #include +#include static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -701,6 +702,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, if (type >= MAXQUOTAS) return -EINVAL; + type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. -- cgit 1.2.3-korg From b845c898b2f1ea458d5453f0fa1da6e2dfce3bb4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 21 Aug 2018 15:55:00 +0200 Subject: bpf, sockmap: fix sock_hash_alloc and reject zero-sized keys Currently, it is possible to create a sock hash map with key size of 0 and have the kernel return a fd back to user space. This is invalid for hash maps (and kernel also hasn't been tested for zero key size support in general at this point). Thus, reject such configuration. Fixes: 81110384441a ("bpf: sockmap, add hash map support") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Song Liu --- kernel/bpf/sockmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 98e621a29e8e69..60ceb0e1fa56b0 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -2140,7 +2140,9 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return ERR_PTR(-EPERM); /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->value_size != 4 || + if (attr->max_entries == 0 || + attr->key_size == 0 || + attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); -- cgit 1.2.3-korg From eb29429d81e31b191f3b2bd19cf820279cec6463 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Aug 2018 18:09:17 +0200 Subject: bpf, sockmap: fix sock hash count in alloc_sock_hash_elem When we try to allocate a new sock hash entry and the allocation fails, then sock hash map fails to reduce the map element counter, meaning we keep accounting this element although it was never used. Fix it by dropping the element counter on error. Fixes: 81110384441a ("bpf: sockmap, add hash map support") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend --- kernel/bpf/sockmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 60ceb0e1fa56b0..40c6ef9fc82865 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -2269,8 +2269,10 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, } l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, htab->map.numa_node); - if (!l_new) + if (!l_new) { + atomic_dec(&htab->count); return ERR_PTR(-ENOMEM); + } memcpy(l_new->key, key, key_size); l_new->sk = sk; -- cgit 1.2.3-korg From 67db7cd249e71f64346f481b629724376d063e08 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:32 -0700 Subject: tls: possible hang when do_tcp_sendpages hits sndbuf is full case Currently, the lower protocols sk_write_space handler is not called if TLS is sending a scatterlist via tls_push_sg. However, normally tls_push_sg calls do_tcp_sendpage, which may be under memory pressure, that in turn may trigger a wait via sk_wait_event. Typically, this happens when the in-flight bytes exceed the sdnbuf size. In the normal case when enough ACKs are received sk_write_space() will be called and the sk_wait_event will be woken up allowing it to send more data and/or return to the user. But, in the TLS case because the sk_write_space() handler does not wake up the events the above send will wait until the sndtimeo is exceeded. By default this is MAX_SCHEDULE_TIMEOUT so it look like a hang to the user (especially this impatient user). To fix this pass the sk_write_space event to the lower layers sk_write_space event which in the TCP case will wake any pending events. I observed the above while integrating sockmap and ktls. It initially appeared as test_sockmap (modified to use ktls) occasionally hanging. To reliably reproduce this reduce the sndbuf size and stress the tls layer by sending many 1B sends. This results in every byte needing a header and each byte individually being sent to the crypto layer. Signed-off-by: John Fastabend Acked-by: Dave Watson Signed-off-by: Daniel Borkmann --- net/tls/tls_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 93c0c225ab340a..180b6640e5316b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -213,9 +213,14 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - /* We are already sending pages, ignore notification */ - if (ctx->in_tcp_sendpages) + /* If in_tcp_sendpages call lower protocol write space handler + * to ensure we wake up any waiting operations there. For example + * if do_tcp_sendpages where to call sk_wait_event. + */ + if (ctx->in_tcp_sendpages) { + ctx->sk_write_space(sk); return; + } if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; -- cgit 1.2.3-korg From 9b2e0388bec8ec5427403e23faff3b58dd1c3200 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:37 -0700 Subject: bpf: sockmap: write_space events need to be passed to TCP handler When sockmap code is using the stream parser it also handles the write space events in order to handle the case where (a) verdict redirects skb to another socket and (b) the sockmap then sends the skb but due to memory constraints (or other EAGAIN errors) needs to do a retry. But the initial code missed a third case where the skb_send_sock_locked() triggers an sk_wait_event(). A typically case would be when sndbuf size is exceeded. If this happens because we do not pass the write_space event to the lower layers we never wake up the event and it will wait for sndtimeo. Which as noted in ktls fix may be rather large and look like a hang to the user. To reproduce the best test is to reduce the sndbuf size and send 1B data chunks to stress the memory handling. To fix this pass the event from the upper layer to the lower layer. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 40c6ef9fc82865..cf5195c7c33172 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1427,12 +1427,15 @@ out: static void smap_write_space(struct sock *sk) { struct smap_psock *psock; + void (*write_space)(struct sock *sk); rcu_read_lock(); psock = smap_psock_sk(sk); if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state))) schedule_work(&psock->tx_work); + write_space = psock->save_write_space; rcu_read_unlock(); + write_space(sk); } static void smap_stop_sock(struct smap_psock *psock, struct sock *sk) -- cgit 1.2.3-korg From 00e1cae78120ee19462e7f96135cd1cc59a086e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 22 Aug 2018 00:02:19 +0200 Subject: net: ethernet: renesas: use SPDX identifier for Renesas drivers Signed-off-by: Wolfram Sang Acked-by: Sergei Shtylyov Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb.h | 5 +---- drivers/net/ethernet/renesas/ravb_main.c | 5 +---- drivers/net/ethernet/renesas/sh_eth.c | 13 +------------ drivers/net/ethernet/renesas/sh_eth.h | 13 +------------ 4 files changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index b81f4faf7b1011..1470fc12282b25 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Renesas Ethernet AVB device driver * * Copyright (C) 2014-2015 Renesas Electronics Corporation @@ -5,10 +6,6 @@ * Copyright (C) 2015-2016 Cogent Embedded, Inc. * * Based on the SuperH Ethernet driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License version 2, - * as published by the Free Software Foundation. */ #ifndef __RAVB_H__ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c06f2df895c2c3..aff5516b781e27 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet AVB device driver * * Copyright (C) 2014-2015 Renesas Electronics Corporation @@ -5,10 +6,6 @@ * Copyright (C) 2015-2016 Cogent Embedded, Inc. * * Based on the SuperH Ethernet driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License version 2, - * as published by the Free Software Foundation. */ #include diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5573199c4536c2..ad4433d592377a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* SuperH Ethernet device driver * * Copyright (C) 2014 Renesas Electronics Corporation @@ -5,18 +6,6 @@ * Copyright (C) 2008-2014 Renesas Solutions Corp. * Copyright (C) 2013-2017 Cogent Embedded, Inc. * Copyright (C) 2014 Codethink Limited - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". */ #include diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index f94be99cf40021..0c18650bbfe69f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* SuperH Ethernet device driver * * Copyright (C) 2006-2012 Nobuhiro Iwamatsu * Copyright (C) 2008-2012 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". */ #ifndef __SH_ETH_H__ -- cgit 1.2.3-korg From 3d0371b313b84ba7c16ebf2526a7a34f1c57b19e Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 22 Aug 2018 14:57:44 +1000 Subject: net/ncsi: Fixup .dumpit message flags and ID check in Netlink handler The ncsi_pkg_info_all_nl() .dumpit handler is missing the NLM_F_MULTI flag, causing additional package information after the first to be lost. Also fixup a sanity check in ncsi_write_package_info() to reject out of range package IDs. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 82e6edf9c5d9c8..45f33d6dedf771 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -100,7 +100,7 @@ static int ncsi_write_package_info(struct sk_buff *skb, bool found; int rc; - if (id > ndp->package_num) { + if (id > ndp->package_num - 1) { netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id); return -ENODEV; } @@ -240,7 +240,7 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, return 0; /* done */ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); + &ncsi_genl_family, NLM_F_MULTI, NCSI_CMD_PKG_INFO); if (!hdr) { rc = -EMSGSIZE; goto err; -- cgit 1.2.3-korg From 93cfb6c17690c465509967aeb237717d10513a88 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 22 Aug 2018 12:29:43 +0200 Subject: sch_cake: Fix TC filter flow override and expand it to hosts as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TC filter flow mapping override completely skipped the call to cake_hash(); however that meant that the internal state was not being updated, which ultimately leads to deadlocks in some configurations. Fix that by passing the overridden flow ID into cake_hash() instead so it can react appropriately. In addition, the major number of the class ID can now be set to override the host mapping in host isolation mode. If both host and flow are overridden (or if the respective modes are disabled), flow dissection and hashing will be skipped entirely; otherwise, the hashing will be kept for the portions that are not set by the filter. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 4d26b0823cdfc2..c07c30b916d5e4 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -620,15 +620,20 @@ static bool cake_ddst(int flow_mode) } static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, - int flow_mode) + int flow_mode, u16 flow_override, u16 host_override) { - u32 flow_hash = 0, srchost_hash, dsthost_hash; + u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; u16 reduced_hash, srchost_idx, dsthost_idx; struct flow_keys keys, host_keys; if (unlikely(flow_mode == CAKE_FLOW_NONE)) return 0; + /* If both overrides are set we can skip packet dissection entirely */ + if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && + (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) + goto skip_hash; + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -675,6 +680,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, if (flow_mode & CAKE_FLOW_FLOWS) flow_hash = flow_hash_from_keys(&keys); +skip_hash: + if (flow_override) + flow_hash = flow_override - 1; + if (host_override) { + dsthost_hash = host_override - 1; + srchost_hash = host_override - 1; + } + if (!(flow_mode & CAKE_FLOW_FLOWS)) { if (flow_mode & CAKE_FLOW_SRC_IP) flow_hash ^= srchost_hash; @@ -1570,7 +1583,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, struct cake_sched_data *q = qdisc_priv(sch); struct tcf_proto *filter; struct tcf_result res; - u32 flow = 0; + u16 flow = 0, host = 0; int result; filter = rcu_dereference_bh(q->filter_list); @@ -1594,10 +1607,12 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, #endif if (TC_H_MIN(res.classid) <= CAKE_QUEUES) flow = TC_H_MIN(res.classid); + if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16)) + host = TC_H_MAJ(res.classid) >> 16; } hash: *t = cake_select_tin(sch, skb); - return flow ?: cake_hash(*t, skb, flow_mode) + 1; + return cake_hash(*t, skb, flow_mode, flow, host) + 1; } static void cake_reconfigure(struct Qdisc *sch); -- cgit 1.2.3-korg From 191672ca07a7c10c3b84d01019a33d59b4317997 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Aug 2018 17:25:44 +0200 Subject: net_sched: fix unused variable warning in stmmac The new tcf_exts_for_each_action() macro doesn't reference its arguments when CONFIG_NET_CLS_ACT is disabled, which leads to a harmless warning in at least one driver: drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_fill_actions': drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:64:6: error: unused variable 'i' [-Werror=unused-variable] Adding a cast to void lets us avoid this kind of warning. To be on the safe side, do it for all three arguments, not just the one that caused the warning. Fixes: 244cd96adb5f ("net_sched: remove list_head from tc_action") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c17d5186546950..75a3f3fdb35917 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -303,7 +303,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++) #else #define tcf_exts_for_each_action(i, a, exts) \ - for (; 0; ) + for (; 0; (void)(i), (void)(a), (void)(exts)) #endif static inline void -- cgit 1.2.3-korg From e500c6d349f7f36120886841c6f057ce248b48b2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Aug 2018 12:58:34 -0700 Subject: addrconf: reduce unnecessary atomic allocations All the 3 callers of addrconf_add_mroute() assert RTNL lock, they don't take any additional lock either, so it is safe to convert it to GFP_KERNEL. Same for sit_add_v4_addrs(). Cc: David Ahern Signed-off-by: Cong Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fac4ad748672c..d51a8c0b3372d0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2398,7 +2398,7 @@ static void addrconf_add_mroute(struct net_device *dev) ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, GFP_ATOMIC, NULL); + ip6_route_add(&cfg, GFP_KERNEL, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -3062,7 +3062,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, - GFP_ATOMIC); + GFP_KERNEL); return; } @@ -3087,7 +3087,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, 0, idev->dev, - 0, pflags, GFP_ATOMIC); + 0, pflags, GFP_KERNEL); } } } -- cgit 1.2.3-korg From 431280eebed9f5079553daf003011097763e71fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2018 13:30:45 -0700 Subject: ipv4: tcp: send zero IPID for RST and ACK sent in SYN-RECV and TIME-WAIT state tcp uses per-cpu (and per namespace) sockets (net->ipv4.tcp_sk) internally to send some control packets. 1) RST packets, through tcp_v4_send_reset() 2) ACK packets in SYN-RECV and TIME-WAIT state, through tcp_v4_send_ack() These packets assert IP_DF, and also use the hashed IP ident generator to provide an IPv4 ID number. Geoff Alexander reported this could be used to build off-path attacks. These packets should not be fragmented, since their size is smaller than IPV4_MIN_MTU. Only some tunneled paths could eventually have to fragment, regardless of inner IPID. We really can use zero IPID, to address the flaw, and as a bonus, avoid a couple of atomic operations in ip_idents_reserve() Signed-off-by: Eric Dumazet Reported-by: Geoff Alexander Tested-by: Geoff Alexander Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9e041fa5c54536..44c09eddbb781c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2517,6 +2517,12 @@ static int __net_init tcp_sk_init(struct net *net) if (res) goto fail; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } -- cgit 1.2.3-korg From fb99886224294b2291d267da41395022fa4200e2 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:14 -0400 Subject: tcp_bbr: add bbr_check_probe_rtt_done() helper This patch add a helper function bbr_check_probe_rtt_done() to 1. check the condition to see if bbr should exit probe_rtt mode; 2. process the logic of exiting probe_rtt mode. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 13d34427ca3dd5..fd7bccf36a263f 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -95,11 +95,10 @@ struct bbr { u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ - restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:12, + unused:13, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -396,17 +395,11 @@ static bool bbr_set_cwnd_to_recover_or_restore( cwnd = tcp_packets_in_flight(tp) + acked; } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { /* Exiting loss recovery; restore cwnd saved before recovery. */ - bbr->restore_cwnd = 1; + cwnd = max(cwnd, bbr->prior_cwnd); bbr->packet_conservation = 0; } bbr->prev_ca_state = state; - if (bbr->restore_cwnd) { - /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ - cwnd = max(cwnd, bbr->prior_cwnd); - bbr->restore_cwnd = 0; - } - if (bbr->packet_conservation) { *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); return true; /* yes, using packet conservation */ @@ -748,6 +741,20 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr_reset_mode(sk); +} + /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and * periodically drain the bottleneck queue, to converge to measure the true * min_rtt (unloaded propagation delay). This allows the flows to keep queues @@ -806,12 +813,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) } else if (bbr->probe_rtt_done_stamp) { if (bbr->round_start) bbr->probe_rtt_round_done = 1; - if (bbr->probe_rtt_round_done && - after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) { - bbr->min_rtt_stamp = tcp_jiffies32; - bbr->restore_cwnd = 1; /* snap to prior_cwnd */ - bbr_reset_mode(sk); - } + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); } } /* Restart after idle ends only once we process a new S/ACK for data */ @@ -862,7 +865,6 @@ static void bbr_init(struct sock *sk) bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); - bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; bbr->full_bw_reached = 0; -- cgit 1.2.3-korg From 5490b32dce6932ea7ee8e3b2f76db2957c92af6e Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:15 -0400 Subject: tcp_bbr: in restart from idle, see if we should exit PROBE_RTT This patch fix the case where BBR does not exit PROBE_RTT mode when it restarts from idle. When BBR restarts from idle and if BBR is in PROBE_RTT mode, BBR should check if it's time to exit PROBE_RTT. If yes, then BBR should exit PROBE_RTT mode and restore the cwnd to its full value. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index fd7bccf36a263f..1d4bdd3b5e4d04 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -174,6 +174,8 @@ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; +static void bbr_check_probe_rtt_done(struct sock *sk); + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { @@ -308,6 +310,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) */ if (bbr->mode == BBR_PROBE_BW) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); } } -- cgit 1.2.3-korg From 8e995bf14fdb7e33681d5c3312b602fa342b878a Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:16 -0400 Subject: tcp_bbr: apply PROBE_RTT cwnd cap even if acked==0 This commit fixes a corner case where TCP BBR would enter PROBE_RTT mode but not reduce its cwnd. If a TCP receiver ACKed less than one full segment, the number of delivered/acked packets was 0, so that bbr_set_cwnd() would short-circuit and exit early, without cutting cwnd to the value we want for PROBE_RTT. The fix is to instead make sure that even when 0 full packets are ACKed, we do apply all the appropriate caps, including the cap that applies in PROBE_RTT mode. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 1d4bdd3b5e4d04..02ff2dde96094c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -420,10 +420,10 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = 0, target_cwnd = 0; + u32 cwnd = tp->snd_cwnd, target_cwnd = 0; if (!acked) - return; + goto done; /* no packet fully ACKed; just apply caps */ if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; -- cgit 1.2.3-korg From 3ed614dce3ca9912d22be215ff0f11104b69fe62 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:10 +0800 Subject: net: hns: fix length and page_offset overflow when CONFIG_ARM64_64K_PAGES When enable the config item "CONFIG_ARM64_64K_PAGES", the size of PAGE_SIZE is 65536(64K). But the type of length and page_offset are u16, they will overflow. So change them to u32. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index fa5b30f547f662..cad52bd331f7b2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -220,10 +220,10 @@ struct hnae_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; + u32 page_offset; + u32 length; /* length of the buffer */ - u16 length; /* length of the buffer */ + u16 reuse_flag; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; -- cgit 1.2.3-korg From ac4a5b52f5970479f4b2d94a7f98dbf9eaf675ab Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:11 +0800 Subject: net: hns: modify variable type in hns_nic_reuse_page 'truesize' is supposed to be u32, not int, so fix it. Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 9f2b552aee3399..c8c0b0309c2716 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -512,7 +512,8 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i, struct hnae_desc_cb *desc_cb) { struct hnae_desc *desc; - int truesize, size; + u32 truesize; + int size; int last_offset; bool twobufs; -- cgit 1.2.3-korg From b1ccd4c0ab6ef499f47dd84ed4920502a7147bba Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:12 +0800 Subject: net: hns: fix skb->truesize underestimation skb->truesize is not meant to be tracking amount of used bytes in a skb, but amount of reserved/consumed bytes in memory. For instance, if we use a single byte in last page fragment, we have to account the full size of the fragment. So skb_add_rx_frag needs to calculate the length of the entire buffer into turesize. Fixes: 9cbe9fd5214e ("net: hns: optimize XGE capability by reducing cpu usage") Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index c8c0b0309c2716..71bd3bff6c6735 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -531,7 +531,7 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i, } skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); + size - pull_len, truesize); /* avoid re-using remote pages,flag default unreuse */ if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id())) -- cgit 1.2.3-korg From 339379a2fb13decd1802b403370cc3cdf21d819f Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:13 +0800 Subject: net: hns: use eth_get_headlen interface instead of hns_nic_get_headlen Update hns to drop the hns_nic_get_headlen function in favour of eth_get_headlen, and hence also removes now redundant hns_nic_get_headlen. Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 103 +------------------------- 1 file changed, 1 insertion(+), 102 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 71bd3bff6c6735..02a0ba20fad55f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -406,107 +406,6 @@ out_net_tx_busy: return NETDEV_TX_BUSY; } -/** - * hns_nic_get_headlen - determine size of header for RSC/LRO/GRO/FCOE - * @data: pointer to the start of the headers - * @max: total length of section to find headers in - * - * This function is meant to determine the length of headers that will - * be recognized by hardware for LRO, GRO, and RSC offloads. The main - * motivation of doing this is to only perform one pull for IPv4 TCP - * packets so that we can do basic things like calculating the gso_size - * based on the average data per packet. - **/ -static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, - unsigned int max_size) -{ - unsigned char *network; - u8 hlen; - - /* this should never happen, but better safe than sorry */ - if (max_size < ETH_HLEN) - return max_size; - - /* initialize network frame pointer */ - network = data; - - /* set first protocol and move network header forward */ - network += ETH_HLEN; - - /* handle any vlan tag if present */ - if (hnae_get_field(flag, HNS_RXD_VLAN_M, HNS_RXD_VLAN_S) - == HNS_RX_FLAG_VLAN_PRESENT) { - if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN)) - return max_size; - - network += VLAN_HLEN; - } - - /* handle L3 protocols */ - if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S) - == HNS_RX_FLAG_L3ID_IPV4) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct iphdr))) - return max_size; - - /* access ihl as a u8 to avoid unaligned access on ia64 */ - hlen = (network[0] & 0x0F) << 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct iphdr)) - return network - data; - - /* record next protocol if header is present */ - } else if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S) - == HNS_RX_FLAG_L3ID_IPV6) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct ipv6hdr))) - return max_size; - - /* record next protocol */ - hlen = sizeof(struct ipv6hdr); - } else { - return network - data; - } - - /* relocate pointer to start of L4 header */ - network += hlen; - - /* finally sort out TCP/UDP */ - if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S) - == HNS_RX_FLAG_L4ID_TCP) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct tcphdr))) - return max_size; - - /* access doff as a u8 to avoid unaligned access on ia64 */ - hlen = (network[12] & 0xF0) >> 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct tcphdr)) - return network - data; - - network += hlen; - } else if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S) - == HNS_RX_FLAG_L4ID_UDP) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct udphdr))) - return max_size; - - network += sizeof(struct udphdr); - } - - /* If everything has gone correctly network should be the - * data section of the packet and will be the end of the header. - * If not then it probably represents the end of the last recognized - * header. - */ - if ((typeof(max_size))(network - data) < max_size) - return network - data; - else - return max_size; -} - static void hns_nic_reuse_page(struct sk_buff *skb, int i, struct hnae_ring *ring, int pull_len, struct hnae_desc_cb *desc_cb) @@ -696,7 +595,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, } else { ring->stats.seg_pkt_cnt++; - pull_len = hns_nic_get_headlen(va, bnum_flag, HNS_RX_HEAD_SIZE); + pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE); memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); -- cgit 1.2.3-korg From d23c4b6336ef30898dcdff351f21e633e7a64930 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 23 Aug 2018 11:31:37 +0800 Subject: net/ipv6: init ip6 anycast rt->dst.input as ip6_input Commit 6edb3c96a5f02 ("net/ipv6: Defer initialization of dst to data path") forgot to handle anycast route and init anycast rt->dst.input to ip6_forward. Fix it by setting anycast rt->dst.input back to ip6_input. Fixes: 6edb3c96a5f02 ("net/ipv6: Defer initialization of dst to data path") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7208c16302f61a..c4ea13e8360b9a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -956,7 +956,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.error = 0; rt->dst.output = ip6_output; - if (ort->fib6_type == RTN_LOCAL) { + if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; -- cgit 1.2.3-korg From 27a5959308559fa6afcaa4e6cd81d25bcb2dda7c Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:37:15 +0800 Subject: net: hns3: fix page_offset overflow when CONFIG_ARM64_64K_PAGES When enable the config item "CONFIG_ARM64_64K_PAGES", the size of PAGE_SIZE is 65536(64K). But the type of page_offset is u16, it will overflow. So change it to u32, when "CONFIG_ARM64_64K_PAGES" enabled. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index a02a96aee2a2bf..cb450d7ec8c166 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -284,11 +284,11 @@ struct hns3_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; - + u32 page_offset; u32 length; /* length of the buffer */ + u16 reuse_flag; + /* desc type, used by the ring user to mark the type of the priv data */ u16 type; }; -- cgit 1.2.3-korg From 583e7281f1d8234f3a3e483bd6fba7a72d24aa4e Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:37:16 +0800 Subject: net: hns3: modify variable type in hns3_nic_reuse_page 'truesize' is supposed to be u32, not int, so fix it. Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 3554dca7a680a2..955c4ab18b03bb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2019,7 +2019,8 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, struct hns3_desc_cb *desc_cb) { struct hns3_desc *desc; - int truesize, size; + u32 truesize; + int size; int last_offset; bool twobufs; -- cgit 1.2.3-korg From 9faf870e559a710c44e747ba20383ea82d8ac5d2 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 22 Aug 2018 21:28:01 +0300 Subject: mmc: renesas_sdhi_internal_dmac: fix #define RST_RESERVED_BITS The DM_CM_RST register actually has bits 0-31 defaulting to 1s and bits 32-63 defaulting to 0s -- fix off-by-one in #define RST_RESERVED_BITS. Signed-off-by: Sergei Shtylyov Reviewed-by: Wolfram Sang Fixes: 2a68ea7896e3 ("mmc: renesas-sdhi: add support for R-Car Gen3 SDHI DMAC") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 35cc0de6be67a5..f16677f424b93a 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -45,7 +45,7 @@ /* DM_CM_RST */ #define RST_DTRANRST1 BIT(9) #define RST_DTRANRST0 BIT(8) -#define RST_RESERVED_BITS GENMASK_ULL(32, 0) +#define RST_RESERVED_BITS GENMASK_ULL(31, 0) /* DM_CM_INFO1 and DM_CM_INFO1_MASK */ #define INFO1_CLEAR 0 -- cgit 1.2.3-korg From d2332f887ddfba50fee93b8e1736376517c2df0c Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 22 Aug 2018 21:22:26 +0300 Subject: mmc: renesas_sdhi_internal_dmac: mask DMAC interrupts I have encountered an interrupt storm during the eMMC chip probing (and the chip finally didn't get detected). It turned out that U-Boot left the SDHI DMA interrupts enabled while the Linux driver didn't use those. Masking those interrupts in renesas_sdhi_internal_dmac_request_dma() gets rid of both issues... Signed-off-by: Sergei Shtylyov Reviewed-by: Wolfram Sang Fixes: 2a68ea7896e3 ("mmc: renesas-sdhi: add support for R-Car Gen3 SDHI DMAC") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index f16677f424b93a..ca0b43973769c9 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -49,10 +49,12 @@ /* DM_CM_INFO1 and DM_CM_INFO1_MASK */ #define INFO1_CLEAR 0 +#define INFO1_MASK_CLEAR GENMASK_ULL(31, 0) #define INFO1_DTRANEND1 BIT(17) #define INFO1_DTRANEND0 BIT(16) /* DM_CM_INFO2 and DM_CM_INFO2_MASK */ +#define INFO2_MASK_CLEAR GENMASK_ULL(31, 0) #define INFO2_DTRANERR1 BIT(17) #define INFO2_DTRANERR0 BIT(16) @@ -252,6 +254,12 @@ renesas_sdhi_internal_dmac_request_dma(struct tmio_mmc_host *host, { struct renesas_sdhi *priv = host_to_priv(host); + /* Disable DMAC interrupts, we don't use them */ + renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO1_MASK, + INFO1_MASK_CLEAR); + renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO2_MASK, + INFO2_MASK_CLEAR); + /* Each value is set to non-zero to assume "enabling" each DMA */ host->chan_rx = host->chan_tx = (void *)0xdeadbeaf; -- cgit 1.2.3-korg From 4381147df9098706caa5cf9fda37e53b2fe4871f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:28:51 -0700 Subject: ice: Fix multiple static analyser warnings This patch fixes the following smatch errors: 1) Fix "odd binop '0x0 & 0xc'" when performing the bitwise-and with a constant value of zero (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG). Remove a similar bitwise-and with 0 in ice_add_marker_act() and use the right mask ICE_LG_ACT_GENERIC_OFFSET_M in the expression. 2) Fix a similar issue "odd binop '0x0 & 0x1800' in ice_req_irq_msix_misc. 3) Fix "odd binop '0x380000 & 0x7fff8'" in ice_add_marker_act(). Also, use a new define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX instead of magic number '7'. 4) Fix warn: odd binop '0x0 & 0x18' in ice_set_dflt_vsi_ctx() by removing unnecessary logic to explicitly unset bits 3 and 4 in port_vlan_bits. These bits are unset already by the memset on ctxt->info. Reported-by: Dan Carpenter Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 1 + drivers/net/ethernet/intel/ice/ice_common.c | 25 ++++++++++++++----------- drivers/net/ethernet/intel/ice/ice_main.c | 19 ++++++++----------- drivers/net/ethernet/intel/ice/ice_switch.c | 4 ++-- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 7541ec2270b370..6d3e11659ba52c 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -594,6 +594,7 @@ struct ice_sw_rule_lg_act { #define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S) #define ICE_LG_ACT_GENERIC_PRIORITY_S 22 #define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S) +#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX 7 /* Action = 7 - Set Stat count */ #define ICE_LG_ACT_STAT_COUNT 0x7 diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 71d032cc5fa7d7..d5300b606d5a60 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1619,20 +1619,23 @@ __ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, } /* LUT size is only valid for Global and PF table types */ - if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) { - flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG << - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) { + switch (lut_size) { + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128: + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512: flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) && - (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) { - flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else { + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K: + if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + break; + } + /* fall-through */ + default: status = ICE_ERR_PARAM; goto ice_aq_get_set_rss_lut_exit; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5299caf55a7f2b..186e764a469abb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1352,14 +1352,13 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* Allow all packets untagged/tagged */ + /* By default bits 3 and 4 in port_vlan_flags are 0's which results in + * legacy behavior (show VLAN, DEI, and UP) in descriptor. Also, allow + * all packets untagged/tagged. + */ ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & ICE_AQ_VSI_PVLAN_MODE_M) >> ICE_AQ_VSI_PVLAN_MODE_S); - /* Show VLAN/UP from packets in Rx descriptors */ - ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH & - ICE_AQ_VSI_PVLAN_EMOD_M) >> - ICE_AQ_VSI_PVLAN_EMOD_S); /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); @@ -2058,15 +2057,13 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) skip_req_irq: ice_ena_misc_vector(pf); - val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) | - PFINT_OICR_CTL_CAUSE_ENA_M; + val = ((pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | + PFINT_OICR_CTL_CAUSE_ENA_M); wr32(hw, PFINT_OICR_CTL, val); /* This enables Admin queue Interrupt causes */ - val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) | - PFINT_FW_CTL_CAUSE_ENA_M; + val = ((pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | + PFINT_FW_CTL_CAUSE_ENA_M); wr32(hw, PFINT_FW_CTL, val); itr_gran = hw->itr_gran_200; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 723d15f1e90b4a..6b7ec2ae5ad679 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -645,14 +645,14 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->pdata.lg_act.act[1] = cpu_to_le32(act); - act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; + act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << + ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; /* Third action Marker value */ act |= ICE_LG_ACT_GENERIC; act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; - act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->pdata.lg_act.act[2] = cpu_to_le32(act); /* call the fill switch rule to fill the lookup tx rx structure */ -- cgit 1.2.3-korg From 6efa6239e7f8777dccddcebb643e6d9a0304bf43 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 9 Aug 2018 06:28:52 -0700 Subject: ice: Remove unnecessary node owner check There is already a check for owner == ICE_SCHED_NODE_OWNER_LAN at the beginning of ice_sched_update_vsi_child_nodes. Remove the additional check to address the static analysis tool smatch issue "warn: we tested 'owner' before and it was 'false'". Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2e6c1d92cc8884..eeae199469b6e7 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1576,8 +1576,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc, return status; } - if (owner == ICE_SCHED_NODE_OWNER_LAN) - vsi->max_lanq[tc] = new_numqs; + vsi->max_lanq[tc] = new_numqs; return status; } -- cgit 1.2.3-korg From c0203475765f827e7b2eaf0a87222d0766e2cc4b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Aug 2018 23:49:37 +0200 Subject: bpf: use per htab salt for bucket hash All BPF hash and LRU maps currently have a known and global seed we feed into jhash() which is 0. This is suboptimal, thus fix it by generating a random seed upon hashtab setup time which we can later on feed into jhash() on lookup, update and deletions. Fixes: 0f8e4bd8a1fc8 ("bpf: add hashtable type of eBPF maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Reviewed-by: Eduardo Valentin --- kernel/bpf/hashtab.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 04b8eda94e7d5f..03cc59ee9c9536 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "percpu_freelist.h" #include "bpf_lru_list.h" @@ -41,6 +42,7 @@ struct bpf_htab { atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ + u32 hashrnd; }; /* each htab element is struct htab_elem + key + value */ @@ -371,6 +373,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (!htab->buckets) goto free_htab; + htab->hashrnd = get_random_int(); for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); @@ -402,9 +405,9 @@ free_htab: return ERR_PTR(err); } -static inline u32 htab_map_hash(const void *key, u32 key_len) +static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) { - return jhash(key, key_len, 0); + return jhash(key, key_len, hashrnd); } static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) @@ -470,7 +473,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); head = select_bucket(htab, hash); @@ -597,7 +600,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) if (!key) goto find_first_elem; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); head = select_bucket(htab, hash); @@ -824,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -880,7 +883,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -945,7 +948,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -998,7 +1001,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -1071,7 +1074,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -1103,7 +1106,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; -- cgit 1.2.3-korg From 5ab522443bd1dafa9e32d6f4b029128efda072de Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:28:53 -0700 Subject: ice: Cleanup magic number Use define for the unit size shift of the Rx LAN context descriptor base address instead of the magic number 7. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index d23a91665b4637..068dbc740b7667 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -265,6 +265,7 @@ enum ice_rx_flex_desc_status_error_0_bits { struct ice_rlan_ctx { u16 head; u16 cpuid; /* bigger than needed, see above for reason */ +#define ICE_RLAN_BASE_S 7 u64 base; u16 qlen; #define ICE_RLAN_CTX_DBUF_S 7 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 186e764a469abb..7d65e0ed35887f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3983,7 +3983,7 @@ static int ice_setup_rx_ctx(struct ice_ring *ring) /* clear the context structure first */ memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - rlan_ctx.base = ring->dma >> 7; + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; rlan_ctx.qlen = ring->count; -- cgit 1.2.3-korg From f8ba7db850350319348b6d3c276f8ba19bc098ef Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Aug 2018 06:28:54 -0700 Subject: ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 7 ++++ drivers/net/ethernet/intel/ice/ice_ethtool.c | 52 +++++++++++++++++++++------- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index d8b5fff581e717..ed071ea75f2051 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -89,6 +89,13 @@ extern const char ice_drv_ver[]; #define ice_for_each_rxq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) +/* Macros for each allocated tx/rx ring whether used or not in a VSI */ +#define ice_for_each_alloc_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++) + +#define ice_for_each_alloc_rxq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++) + struct ice_tc_info { u16 qoffset; u16 qcount; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 1db304c01d1006..c71a9b528d6d55 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -26,7 +26,7 @@ static int ice_q_stats_len(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); - return ((np->vsi->num_txq + np->vsi->num_rxq) * + return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * (sizeof(struct ice_q_stats) / sizeof(u64))); } @@ -218,7 +218,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } - ice_for_each_txq(vsi, i) { + ice_for_each_alloc_txq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_packets", i); p += ETH_GSTRING_LEN; @@ -226,7 +226,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } - ice_for_each_rxq(vsi, i) { + ice_for_each_alloc_rxq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_packets", i); p += ETH_GSTRING_LEN; @@ -253,6 +253,24 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { case ETH_SS_STATS: + /* The number (and order) of strings reported *must* remain + * constant for a given netdevice. This function must not + * report a different number based on run time parameters + * (such as the number of queues in use, or the setting of + * a private ethtool flag). This is due to the nature of the + * ethtool stats API. + * + * User space programs such as ethtool must make 3 separate + * ioctl requests, one for size, one for the strings, and + * finally one for the stats. Since these cross into + * user space, changes to the number or size could result in + * undefined memory access or incorrect string<->value + * correlations for statistics. + * + * Even if it appears to be safe, changes to the size or + * order of strings will suffer from race conditions and are + * not safe. + */ return ICE_ALL_STATS_LEN(netdev); default: return -EOPNOTSUPP; @@ -280,18 +298,26 @@ ice_get_ethtool_stats(struct net_device *netdev, /* populate per queue stats */ rcu_read_lock(); - ice_for_each_txq(vsi, j) { + ice_for_each_alloc_txq(vsi, j) { ring = READ_ONCE(vsi->tx_rings[j]); - if (!ring) - continue; - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } } - ice_for_each_rxq(vsi, j) { + ice_for_each_alloc_rxq(vsi, j) { ring = READ_ONCE(vsi->rx_rings[j]); - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } } rcu_read_unlock(); @@ -519,7 +545,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) goto done; } - for (i = 0; i < vsi->num_txq; i++) { + for (i = 0; i < vsi->alloc_txq; i++) { /* clone ring and setup updated count */ tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_cnt; @@ -551,7 +577,7 @@ process_rx: goto done; } - for (i = 0; i < vsi->num_rxq; i++) { + for (i = 0; i < vsi->alloc_rxq; i++) { /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; -- cgit 1.2.3-korg From b29bc220e2c7bd494a4605defcd93b18d5a8cf86 Mon Sep 17 00:00:00 2001 From: Preethi Banala Date: Thu, 9 Aug 2018 06:28:55 -0700 Subject: ice: Clean control queues only when they are initialized Clean control queues only when they are initialized. One of the ways to validate if the basic initialization is done is by checking value of cq->sq.head and cq->rq.head variables that specify the register address. This patch adds a check to avoid NULL pointer dereference crash when tried to shutdown uninitialized control queue. Signed-off-by: Preethi Banala Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_controlq.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 7c511f144ed60d..c064416080e74d 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -597,10 +597,14 @@ static enum ice_status ice_init_check_adminq(struct ice_hw *hw) return 0; init_ctrlq_free_rq: - ice_shutdown_rq(hw, cq); - ice_shutdown_sq(hw, cq); - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } return status; } @@ -706,10 +710,14 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) return; } - ice_shutdown_sq(hw, cq); - ice_shutdown_rq(hw, cq); - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } } /** -- cgit 1.2.3-korg From 3d6b640efcc1b07709b42dd2e9609401c6f88633 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:28:56 -0700 Subject: ice: Fix bugs in control queue processing This patch is a consolidation of multiple bug fixes for control queue processing. 1) In ice_clean_adminq_subtask() remove unnecessary reads/writes to registers. The bits PFINT_FW_CTL, PFINT_MBX_CTL and PFINT_SB_CTL are not set when an interrupt arrives, which means that clearing them again can be omitted. 2) Get an accurate value in "pending" by re-reading the control queue head register from the hardware. 3) Fix a corner case involving lost control queue messages by checking for new control messages (using ice_ctrlq_pending) before exiting the cleanup routine. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_controlq.c | 5 ++++- drivers/net/ethernet/intel/ice/ice_main.c | 26 ++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index c064416080e74d..62be72fdc8f30c 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1065,8 +1065,11 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, clean_rq_elem_out: /* Set pending if needed, unlock and return */ - if (pending) + if (pending) { + /* re-read HW head to calculate actual pending messages */ + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc)); + } clean_rq_elem_err: mutex_unlock(&cq->rq_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 7d65e0ed35887f..f3ba4f76b6cb6b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -916,6 +916,21 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) return pending && (i == ICE_DFLT_IRQ_WORK); } +/** + * ice_ctrlq_pending - check if there is a difference between ntc and ntu + * @hw: pointer to hardware info + * @cq: control queue information + * + * returns true if there are pending messages in a queue, false if there aren't + */ +static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u16 ntu; + + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + return cq->rq.next_to_clean != ntu; +} + /** * ice_clean_adminq_subtask - clean the AdminQ rings * @pf: board private structure @@ -923,7 +938,6 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - u32 val; if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) return; @@ -933,9 +947,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - /* re-enable Admin queue interrupt causes */ - val = rd32(hw, PFINT_FW_CTL); - wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M)); + /* There might be a situation where new messages arrive to a control + * queue between processing the last message and clearing the + * EVENT_PENDING bit. So before exiting, check queue head again (using + * ice_ctrlq_pending) and process new messages if any. + */ + if (ice_ctrlq_pending(hw, &hw->adminq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN); ice_flush(hw); } -- cgit 1.2.3-korg From 1eb43fc754485d772b1165118a8fb80c385f0492 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Aug 2018 06:28:57 -0700 Subject: ice: Use order_base_2 to calculate higher power of 2 Currently, we use a combination of ilog2 and is_power_of_2() to calculate the next power of 2 for the qcount. This appears to be causing a warning on some combinations of GCC and the Linux kernel: MODPOST 1 modules WARNING: "____ilog2_NaN" [ice.ko] undefined! This appears to because because GCC realizes that qcount could be zero in some circumstances and thus attempts to link against the intentionally undefined ___ilog2_NaN function. The order_base_2 function is intentionally defined to return 0 when passed 0 as an argument, and thus will be safe to use here. This not only fixes the warning but makes the resulting code slightly cleaner, and is really what we should have used originally. Also update the comment to make it more clear that we are rounding up, not just incrementing the ilog2 of qcount unconditionally. Signed-off-by: Jacob Keller Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f3ba4f76b6cb6b..3eff1d2d154395 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1313,11 +1313,8 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) qcount = numq_tc; } - /* find higher power-of-2 of qcount */ - pow = ilog2(qcount); - - if (!is_power_of_2(qcount)) - pow++; + /* find the (rounded up) power-of-2 of qcount */ + pow = order_base_2(qcount); for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { -- cgit 1.2.3-korg From 5d8778d803e21f235e9bc727b5bd619f02abb88b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 9 Aug 2018 06:28:58 -0700 Subject: ice: Set VLAN flags correctly In the struct ice_aqc_vsi_props the field port_vlan_flags is an overloaded term because it is used for both port VLANs (PVLANs) and regular VLANs. This is an issue and is very confusing especially when dealing with VFs because normal VLANs and port VLANs are not the same. To fix this the field was renamed to vlan_flags and all of the #define's labeled *_PVLAN_* were renamed to *_VLAN_* if they are not specific to port VLANs. Also in ice_vsi_manage_vlan_stripping, set the ICE_AQ_VSI_VLAN_MODE_ALL bit to allow the driver to add a VLAN tag to all packets it sends. Signed-off-by: Brett Creeley Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 24 +++++++++---------- drivers/net/ethernet/intel/ice/ice_main.c | 31 ++++++++++++++----------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 6d3e11659ba52c..a0614f472658ac 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -329,19 +329,19 @@ struct ice_aqc_vsi_props { /* VLAN section */ __le16 pvid; /* VLANS include priority bits */ u8 pvlan_reserved[2]; - u8 port_vlan_flags; -#define ICE_AQ_VSI_PVLAN_MODE_S 0 -#define ICE_AQ_VSI_PVLAN_MODE_M (0x3 << ICE_AQ_VSI_PVLAN_MODE_S) -#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED 0x1 -#define ICE_AQ_VSI_PVLAN_MODE_TAGGED 0x2 -#define ICE_AQ_VSI_PVLAN_MODE_ALL 0x3 + u8 vlan_flags; +#define ICE_AQ_VSI_VLAN_MODE_S 0 +#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S) +#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1 +#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2 +#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3 #define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2) -#define ICE_AQ_VSI_PVLAN_EMOD_S 3 -#define ICE_AQ_VSI_PVLAN_EMOD_M (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR (0x2 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_S 3 +#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) u8 pvlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3eff1d2d154395..68003fad33d11a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1367,13 +1367,15 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* By default bits 3 and 4 in port_vlan_flags are 0's which results in - * legacy behavior (show VLAN, DEI, and UP) in descriptor. Also, allow - * all packets untagged/tagged. + + /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy + * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all + * packets untagged/tagged. */ - ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & - ICE_AQ_VSI_PVLAN_MODE_M) >> - ICE_AQ_VSI_PVLAN_MODE_S); + ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL & + ICE_AQ_VSI_VLAN_MODE_M) >> + ICE_AQ_VSI_VLAN_MODE_S); + /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); @@ -3732,10 +3734,10 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) enum ice_status status; /* Here we are configuring the VSI to let the driver add VLAN tags by - * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN - * tag insertion happens in the Tx hot path, in ice_tx_map. + * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag + * insertion happens in the Tx hot path, in ice_tx_map. */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL; ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); ctxt.vsi_num = vsi->vsi_num; @@ -3747,7 +3749,7 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) return -EIO; } - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + vsi->info.vlan_flags = ctxt.info.vlan_flags; return 0; } @@ -3769,12 +3771,15 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) */ if (ena) { /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH; } else { /* Disable stripping. Leave tag in packet */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING; } + /* Allow all packets untagged/tagged */ + ctxt.info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL; + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); ctxt.vsi_num = vsi->vsi_num; @@ -3785,7 +3790,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) return -EIO; } - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + vsi->info.vlan_flags = ctxt.info.vlan_flags; return 0; } -- cgit 1.2.3-korg From 785e76d7a2051a9e28b9134d5388a45b16f5eb72 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 23 Aug 2018 17:46:25 +0100 Subject: tools: bpftool: return from do_event_pipe() on bad arguments When command line parsing fails in the while loop in do_event_pipe() because the number of arguments is incorrect or because the keyword is unknown, an error message is displayed, but bpftool remains stuck in the loop. Make sure we exit the loop upon failure. Fixes: f412eed9dfde ("tools: bpftool: add simple perf event output reader") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map_perf_ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 1832100d1b274d..6d41323be291bb 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -194,8 +194,10 @@ int do_event_pipe(int argc, char **argv) } while (argc) { - if (argc < 2) + if (argc < 2) { BAD_ARG(); + goto err_close_map; + } if (is_prefix(*argv, "cpu")) { char *endptr; @@ -221,6 +223,7 @@ int do_event_pipe(int argc, char **argv) NEXT_ARG(); } else { BAD_ARG(); + goto err_close_map; } do_all = false; -- cgit 1.2.3-korg From 3bcd7fa37f33cda8c5639a908e9eb42d856e5d8b Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 9 Aug 2018 06:28:59 -0700 Subject: ice: Update to interrupts enabled in OICR Remove the following interrupt causes that are not applicable or not handled: - PFINT_OICR_HLP_RDY_M - PFINT_OICR_CPM_RDY_M - PFINT_OICR_GPIO_M - PFINT_OICR_STORM_DETECT_M Add the following interrupt cause that's actually handled in ice_misc_intr: - PFINT_OICR_PE_CRITERR_M Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 8 -------- drivers/net/ethernet/intel/ice/ice_main.c | 9 +++------ 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 499904874b3ff8..6076fc87df9d28 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -121,10 +121,6 @@ #define PFINT_FW_CTL_CAUSE_ENA_S 30 #define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) #define PFINT_OICR 0x0016CA00 -#define PFINT_OICR_HLP_RDY_S 14 -#define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S) -#define PFINT_OICR_CPM_RDY_S 15 -#define PFINT_OICR_CPM_RDY_M BIT(PFINT_OICR_CPM_RDY_S) #define PFINT_OICR_ECC_ERR_S 16 #define PFINT_OICR_ECC_ERR_M BIT(PFINT_OICR_ECC_ERR_S) #define PFINT_OICR_MAL_DETECT_S 19 @@ -133,10 +129,6 @@ #define PFINT_OICR_GRST_M BIT(PFINT_OICR_GRST_S) #define PFINT_OICR_PCI_EXCEPTION_S 21 #define PFINT_OICR_PCI_EXCEPTION_M BIT(PFINT_OICR_PCI_EXCEPTION_S) -#define PFINT_OICR_GPIO_S 22 -#define PFINT_OICR_GPIO_M BIT(PFINT_OICR_GPIO_S) -#define PFINT_OICR_STORM_DETECT_S 24 -#define PFINT_OICR_STORM_DETECT_M BIT(PFINT_OICR_STORM_DETECT_S) #define PFINT_OICR_HMC_ERR_S 26 #define PFINT_OICR_HMC_ERR_M BIT(PFINT_OICR_HMC_ERR_S) #define PFINT_OICR_PE_CRITERR_S 28 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 68003fad33d11a..34be94a30a6090 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1704,15 +1704,12 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ rd32(hw, PFINT_OICR); /* read to clear */ - val = (PFINT_OICR_HLP_RDY_M | - PFINT_OICR_CPM_RDY_M | - PFINT_OICR_ECC_ERR_M | + val = (PFINT_OICR_ECC_ERR_M | PFINT_OICR_MAL_DETECT_M | PFINT_OICR_GRST_M | PFINT_OICR_PCI_EXCEPTION_M | - PFINT_OICR_GPIO_M | - PFINT_OICR_STORM_DETECT_M | - PFINT_OICR_HMC_ERR_M); + PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); -- cgit 1.2.3-korg From c7f2c42b80ed6009f44e355aefc1e40db9485a9d Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:29:00 -0700 Subject: ice: Fix a few null pointer dereference issues 1) When ice_ena_msix_range() fails to reserve vectors, a devm_kfree() warning was seen in the error flow path. So check pf->irq_tracker before use in ice_clear_interrupt_scheme(). 2) In ice_vsi_cfg(), check vsi->netdev before use. 3) In ice_get_link_status, check link_up before use. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index d5300b606d5a60..ebd701ac942809 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1483,7 +1483,7 @@ enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up) struct ice_phy_info *phy_info; enum ice_status status = 0; - if (!pi) + if (!pi || !link_up) return ICE_ERR_PARAM; phy_info = &pi->phy; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 34be94a30a6090..d5d83c8848f863 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3257,8 +3257,10 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf) if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) ice_dis_msix(pf); - devm_kfree(&pf->pdev->dev, pf->irq_tracker); - pf->irq_tracker = NULL; + if (pf->irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->irq_tracker); + pf->irq_tracker = NULL; + } } /** @@ -4112,11 +4114,12 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) { int err; - ice_set_rx_mode(vsi->netdev); - - err = ice_restore_vlan(vsi); - if (err) - return err; + if (vsi->netdev) { + ice_set_rx_mode(vsi->netdev); + err = ice_restore_vlan(vsi); + if (err) + return err; + } err = ice_vsi_cfg_txqs(vsi); if (!err) -- cgit 1.2.3-korg From dab0588fb616c1774bbf108eab1749dda4ac6942 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 9 Aug 2018 06:29:01 -0700 Subject: ice: Fix potential return of uninitialized value In ice_vsi_setup_[tx|rx]_rings, err is uninitialized which can result in a garbage value return to the caller. Fix that. Signed-off-by: Jesse Brandeburg Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d5d83c8848f863..e2315651518615 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4885,7 +4885,7 @@ int ice_down(struct ice_vsi *vsi) */ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_txq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", @@ -4910,7 +4910,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) */ static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_rxq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", -- cgit 1.2.3-korg From 43f8b22450f0a721915f1d2c7e3db6dd9573e76b Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 9 Aug 2018 06:29:02 -0700 Subject: ice: Change struct members from bool to u8 Recent versions of checkpatch have a new warning based on a documented preference of Linus to not use bool in structures due to wasted space and the size of bool is implementation dependent. For more information, see the email thread at https://lkml.org/lkml/2017/11/21/384. Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 8 ++++---- drivers/net/ethernet/intel/ice/ice_switch.h | 6 +++--- drivers/net/ethernet/intel/ice/ice_txrx.h | 2 +- drivers/net/ethernet/intel/ice/ice_type.h | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index ed071ea75f2051..868f4a1d0f7243 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -196,9 +196,9 @@ struct ice_vsi { struct list_head tmp_sync_list; /* MAC filters to be synced */ struct list_head tmp_unsync_list; /* MAC filters to be unsynced */ - bool irqs_ready; - bool current_isup; /* Sync 'link up' logging */ - bool stat_offsets_loaded; + u8 irqs_ready; + u8 current_isup; /* Sync 'link up' logging */ + u8 stat_offsets_loaded; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -269,7 +269,7 @@ struct ice_pf { struct ice_hw_port_stats stats; struct ice_hw_port_stats stats_prev; struct ice_hw hw; - bool stat_prev_loaded; /* has previous stats been loaded */ + u8 stat_prev_loaded; /* has previous stats been loaded */ char int_name[ICE_INT_NAME_STR_LEN]; }; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 6f4a0d159dbfdc..9b8ec128ee31f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -17,7 +17,7 @@ struct ice_vsi_ctx { u16 vsis_unallocated; u16 flags; struct ice_aqc_vsi_props info; - bool alloc_from_pool; + u8 alloc_from_pool; }; enum ice_sw_fwd_act_type { @@ -94,8 +94,8 @@ struct ice_fltr_info { u8 qgrp_size; /* Rule creations populate these indicators basing on the switch type */ - bool lb_en; /* Indicate if packet can be looped back */ - bool lan_en; /* Indicate if packet can be forwarded to the uplink */ + u8 lb_en; /* Indicate if packet can be looped back */ + u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ }; /* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 567067b650c412..31bc998fe2006f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -143,7 +143,7 @@ struct ice_ring { u16 next_to_use; u16 next_to_clean; - bool ring_active; /* is ring online or not */ + u8 ring_active; /* is ring online or not */ /* stats structs */ struct ice_q_stats stats; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 99c8a9a71b5e4f..97c366e0ca596f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -83,7 +83,7 @@ struct ice_link_status { u64 phy_type_low; u16 max_frame_size; u16 link_speed; - bool lse_ena; /* Link Status Event notification */ + u8 lse_ena; /* Link Status Event notification */ u8 link_info; u8 an_info; u8 ext_info; @@ -101,7 +101,7 @@ struct ice_phy_info { struct ice_link_status link_info_old; u64 phy_type_low; enum ice_media_type media_type; - bool get_link_info; + u8 get_link_info; }; /* Common HW capabilities for SW use */ @@ -167,7 +167,7 @@ struct ice_nvm_info { u32 oem_ver; /* OEM version info */ u16 sr_words; /* Shadow RAM size in words */ u16 ver; /* NVM package version */ - bool blank_nvm_mode; /* is NVM empty (no FW present) */ + u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; /* Max number of port to queue branches w.r.t topology */ @@ -181,7 +181,7 @@ struct ice_sched_node { struct ice_aqc_txsched_elem_data info; u32 agg_id; /* aggregator group id */ u16 vsi_id; - bool in_use; /* suspended or in use */ + u8 in_use; /* suspended or in use */ u8 tx_sched_layer; /* Logical Layer (1-9) */ u8 num_children; u8 tc_num; @@ -218,7 +218,7 @@ struct ice_sched_vsi_info { struct ice_sched_tx_policy { u16 max_num_vsis; u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS]; - bool rdma_ena; + u8 rdma_ena; }; struct ice_port_info { @@ -243,7 +243,7 @@ struct ice_port_info { struct list_head agg_list; /* lists all aggregator */ u8 lport; #define ICE_LPORT_MASK 0xff - bool is_vf; + u8 is_vf; }; struct ice_switch_info { @@ -287,7 +287,7 @@ struct ice_hw { u8 max_cgds; u8 sw_entry_point_layer; - bool evb_veb; /* true for VEB, false for VEPA */ + u8 evb_veb; /* true for VEB, false for VEPA */ struct ice_bus_info bus; struct ice_nvm_info nvm; struct ice_hw_dev_caps dev_caps; /* device capabilities */ @@ -318,7 +318,7 @@ struct ice_hw { u8 itr_gran_100; u8 itr_gran_50; u8 itr_gran_25; - bool ucast_shared; /* true if VSIs can share unicast addr */ + u8 ucast_shared; /* true if VSIs can share unicast addr */ }; -- cgit 1.2.3-korg From 3968540ba61e9a19a0c4bda733db70952708d264 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:29:03 -0700 Subject: ice: Trivial formatting fixes 1) Add missing "\n" when printing link event error message. 2) Update dev_err statement in probe. 3) Add function description for ice_clear_pf_cfg. 4) Fix coding style for ice_acquire_nvm. 5) netdev->mtu is unsigned so use %u. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 3 +++ drivers/net/ethernet/intel/ice/ice_main.c | 6 +++--- drivers/net/ethernet/intel/ice/ice_nvm.c | 5 ++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index ebd701ac942809..661beea6af795c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -45,6 +45,9 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) /** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure + * + * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port + * configuration, flow director filters, etc.). */ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw) { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e2315651518615..f1e80eed2fd6d9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -901,7 +901,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) case ice_aqc_opc_get_link_status: if (ice_handle_link_event(pf)) dev_err(&pf->pdev->dev, - "Could not handle link event"); + "Could not handle link event\n"); break; default: dev_dbg(&pf->pdev->dev, @@ -3284,7 +3284,7 @@ static int ice_probe(struct pci_dev *pdev, err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); if (err) { - dev_err(&pdev->dev, "I/O map error %d\n", err); + dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err); return err; } @@ -5252,7 +5252,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) u8 count = 0; if (new_mtu == netdev->mtu) { - netdev_warn(netdev, "mtu is already %d\n", netdev->mtu); + netdev_warn(netdev, "mtu is already %u\n", netdev->mtu); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 92da0a626ce0b3..295a8cd87fc165 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -131,9 +131,8 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) * * This function will request NVM ownership. */ -static enum -ice_status ice_acquire_nvm(struct ice_hw *hw, - enum ice_aq_res_access_type access) +static enum ice_status +ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) { if (hw->nvm.blank_nvm_mode) return 0; -- cgit 1.2.3-korg From 82c82ab658655befcb6aa47cbdb98dadce1a0cfe Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Aug 2018 12:00:08 +0200 Subject: udf: Remove dead code from udf_find_fileset() Remove dead code and slightly simplify code in udf_find_fileset(). Signed-off-by: Jan Kara --- fs/udf/super.c | 62 +--------------------------------------------------------- 1 file changed, 1 insertion(+), 61 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 3040dc2a32f6a1..68d57b61f3af82 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -764,9 +764,7 @@ static int udf_find_fileset(struct super_block *sb, struct kernel_lb_addr *root) { struct buffer_head *bh = NULL; - long lastblock; uint16_t ident; - struct udf_sb_info *sbi; if (fileset->logicalBlockNum != 0xFFFFFFFF || fileset->partitionReferenceNum != 0xFFFF) { @@ -779,69 +777,11 @@ static int udf_find_fileset(struct super_block *sb, return 1; } - } - - sbi = UDF_SB(sb); - if (!bh) { - /* Search backwards through the partitions */ - struct kernel_lb_addr newfileset; - -/* --> cvg: FIXME - is it reasonable? */ - return 1; - - for (newfileset.partitionReferenceNum = sbi->s_partitions - 1; - (newfileset.partitionReferenceNum != 0xFFFF && - fileset->logicalBlockNum == 0xFFFFFFFF && - fileset->partitionReferenceNum == 0xFFFF); - newfileset.partitionReferenceNum--) { - lastblock = sbi->s_partmaps - [newfileset.partitionReferenceNum] - .s_partition_len; - newfileset.logicalBlockNum = 0; - - do { - bh = udf_read_ptagged(sb, &newfileset, 0, - &ident); - if (!bh) { - newfileset.logicalBlockNum++; - continue; - } - - switch (ident) { - case TAG_IDENT_SBD: - { - struct spaceBitmapDesc *sp; - sp = (struct spaceBitmapDesc *) - bh->b_data; - newfileset.logicalBlockNum += 1 + - ((le32_to_cpu(sp->numOfBytes) + - sizeof(struct spaceBitmapDesc) - - 1) >> sb->s_blocksize_bits); - brelse(bh); - break; - } - case TAG_IDENT_FSD: - *fileset = newfileset; - break; - default: - newfileset.logicalBlockNum++; - brelse(bh); - bh = NULL; - break; - } - } while (newfileset.logicalBlockNum < lastblock && - fileset->logicalBlockNum == 0xFFFFFFFF && - fileset->partitionReferenceNum == 0xFFFF); - } - } - - if ((fileset->logicalBlockNum != 0xFFFFFFFF || - fileset->partitionReferenceNum != 0xFFFF) && bh) { udf_debug("Fileset at block=%u, partition=%u\n", fileset->logicalBlockNum, fileset->partitionReferenceNum); - sbi->s_partition = fileset->partitionReferenceNum; + UDF_SB(sb)->s_partition = fileset->partitionReferenceNum; udf_load_fileset(sb, bh, root); brelse(bh); return 0; -- cgit 1.2.3-korg From ee4af50ca94f58afc3532662779b9cf80bbe27c8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Aug 2018 14:52:34 +0200 Subject: udf: Fix mounting of Win7 created UDF filesystems Win7 is creating UDF filesystems with single partition with number 8192. Current partition descriptor scanning code does not handle this well as it incorrectly assumes that partition numbers will form mostly contiguous space of small numbers. This results in unmountable media due to errors like: UDF-fs: error (device dm-1): udf_read_tagged: tag version 0x0000 != 0x0002 || 0x0003, block 0 UDF-fs: warning (device dm-1): udf_fill_super: No fileset found Fix the problem by handling partition descriptors in a way that sparse partition numbering does not matter. Reported-and-tested-by: jean-luc malet CC: stable@vger.kernel.org Fixes: 7b78fd02fb19530fd101ae137a1f46aa466d9bb6 Signed-off-by: Jan Kara --- fs/udf/super.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 68d57b61f3af82..6f515651a2c2fe 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1510,10 +1510,16 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ */ #define PART_DESC_ALLOC_STEP 32 +struct part_desc_seq_scan_data { + struct udf_vds_record rec; + u32 partnum; +}; + struct desc_seq_scan_data { struct udf_vds_record vds[VDS_POS_LENGTH]; unsigned int size_part_descs; - struct udf_vds_record *part_descs_loc; + unsigned int num_part_descs; + struct part_desc_seq_scan_data *part_descs_loc; }; static struct udf_vds_record *handle_partition_descriptor( @@ -1522,10 +1528,14 @@ static struct udf_vds_record *handle_partition_descriptor( { struct partitionDesc *desc = (struct partitionDesc *)bh->b_data; int partnum; + int i; partnum = le16_to_cpu(desc->partitionNumber); - if (partnum >= data->size_part_descs) { - struct udf_vds_record *new_loc; + for (i = 0; i < data->num_part_descs; i++) + if (partnum == data->part_descs_loc[i].partnum) + return &(data->part_descs_loc[i].rec); + if (data->num_part_descs >= data->size_part_descs) { + struct part_desc_seq_scan_data *new_loc; unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP); new_loc = kcalloc(new_size, sizeof(*new_loc), GFP_KERNEL); @@ -1537,7 +1547,7 @@ static struct udf_vds_record *handle_partition_descriptor( data->part_descs_loc = new_loc; data->size_part_descs = new_size; } - return &(data->part_descs_loc[partnum]); + return &(data->part_descs_loc[data->num_part_descs++].rec); } @@ -1587,6 +1597,7 @@ static noinline int udf_process_sequence( memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); data.size_part_descs = PART_DESC_ALLOC_STEP; + data.num_part_descs = 0; data.part_descs_loc = kcalloc(data.size_part_descs, sizeof(*data.part_descs_loc), GFP_KERNEL); @@ -1598,7 +1609,6 @@ static noinline int udf_process_sequence( * are in it. */ for (; (!done && block <= lastblock); block++) { - bh = udf_read_tagged(sb, block, block, &ident); if (!bh) break; @@ -1670,13 +1680,10 @@ static noinline int udf_process_sequence( } /* Now handle prevailing Partition Descriptors */ - for (i = 0; i < data.size_part_descs; i++) { - if (data.part_descs_loc[i].block) { - ret = udf_load_partdesc(sb, - data.part_descs_loc[i].block); - if (ret < 0) - return ret; - } + for (i = 0; i < data.num_part_descs; i++) { + ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block); + if (ret < 0) + return ret; } return 0; -- cgit 1.2.3-korg From a9910c0886470b659a6c3629d6467d5639c327e9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 19 Jul 2018 22:04:07 +0800 Subject: ixgb: use dma_zalloc_coherent instead of allocator/memset Use dma_zalloc_coherent instead of dma_alloc_coherent followed by memset 0. Signed-off-by: YueHaibing Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 43664adf7a3c12..d3e72d0f66ef42 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -771,14 +771,13 @@ ixgb_setup_rx_resources(struct ixgb_adapter *adapter) rxdr->size = rxdr->count * sizeof(struct ixgb_rx_desc); rxdr->size = ALIGN(rxdr->size, 4096); - rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, - GFP_KERNEL); + rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, + GFP_KERNEL); if (!rxdr->desc) { vfree(rxdr->buffer_info); return -ENOMEM; } - memset(rxdr->desc, 0, rxdr->size); rxdr->next_to_clean = 0; rxdr->next_to_use = 0; -- cgit 1.2.3-korg From cf1acec008f8d7761aa3fd7c4bca7e17b2d2512d Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:29 -0700 Subject: e1000: check on netif_running() before calling e1000_up() When the device is not up, the call to 'e1000_up()' from the error handling path of 'e1000_set_ringparam()' causes a kernel oops with a null-pointer dereference. The null-pointer dereference is triggered in function 'e1000_alloc_rx_buffers()' at line 'buffer_info = &rx_ring->buffer_info[i]'. This bug was reported by COD, a tool for testing kernel module binaries I am building. This bug was also detected by KFI from Dr. Kai Cong. This patch fixes the bug by checking on 'netif_running()' before calling 'e1000_up()' in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index bdb3f8e65ed470..c1e4e94f100f55 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -644,7 +644,8 @@ err_setup_rx: err_alloc_rx: kfree(txdr); err_alloc_tx: - e1000_up(adapter); + if (netif_running(adapter->netdev)) + e1000_up(adapter); err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; -- cgit 1.2.3-korg From ee400a3f1bfe7004a3e14b81c38ccc5583c26295 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:30 -0700 Subject: e1000: ensure to free old tx/rx rings in set_ringparam() In 'e1000_set_ringparam()', the tx_ring and rx_ring are updated with new value and the old tx/rx rings are freed only when the device is up. There are resource leaks on old tx/rx rings when the device is not up. This bug is reported by COD, a tool for testing kernel module binaries I am building. This patch fixes the bug by always calling 'kfree()' on old tx/rx rings in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index c1e4e94f100f55..2569a168334cbc 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -624,14 +624,14 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->tx_ring = tx_old; e1000_free_all_rx_resources(adapter); e1000_free_all_tx_resources(adapter); - kfree(tx_old); - kfree(rx_old); adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); if (err) goto err_setup; } + kfree(tx_old); + kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); return 0; -- cgit 1.2.3-korg From a798fbac33c4cbfe7d539298623b7af6c3f9525a Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Thu, 26 Jul 2018 10:20:38 -0700 Subject: igb: Use an advanced ctx descriptor for launchtime On i210, Launchtime (TxTime) requires the usage of an "Advanced Transmit Context Descriptor" for retrieving the timestamp of a packet. The igb driver correctly builds such descriptor on the segmentation flow (i.e. igb_tso()) or on the checksum one (i.e. igb_tx_csum()), but the feature is broken for AF_PACKET if the IGB_TX_FLAGS_VLAN is not set, which happens due to an early return on igb_tx_csum(). This flag is only set by the kernel when a VLAN interface is used, thus we can't just rely on it. Here we are fixing this issue by checking if launchtime is enabled for the current tx_ring before performing the early return. Signed-off-by: Jesus Sanchez-Palencia Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d03c2f0d759260..74416f8a9446d5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5816,7 +5816,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_failed: - if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) + if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) && + !tx_ring->launchtime_enable) return; goto no_csum; } -- cgit 1.2.3-korg From 151356270b0761e455ed82bba3353fb494451555 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 27 Jul 2018 16:04:53 +0800 Subject: igb: Replace GFP_ATOMIC with GFP_KERNEL in igb_sw_init() igb_sw_init() is never called in atomic context. It calls kzalloc() and kcalloc() with GFP_ATOMIC, which is not necessary. GFP_ATOMIC can be replaced with GFP_KERNEL. This is found by a static analysis tool named DCNS written by myself. Signed-off-by: Jia-Ju Bai Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 74416f8a9446d5..a32c576c1e656c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3873,7 +3873,7 @@ static int igb_sw_init(struct igb_adapter *adapter) adapter->mac_table = kcalloc(hw->mac.rar_entry_count, sizeof(struct igb_mac_addr), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->mac_table) return -ENOMEM; @@ -3883,7 +3883,7 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->shadow_vfta) return -ENOMEM; -- cgit 1.2.3-korg From 69a64658de502c8ca383fe2c5a5208f00b5844cd Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 27 Jul 2018 16:07:38 +0800 Subject: igb: Replace mdelay() with msleep() in igb_integrated_phy_loopback() igb_integrated_phy_loopback() is never called in atomic context. It calls mdelay() to busily wait, which is not necessary. mdelay() can be replaced with msleep(). This is found by a static analysis tool named DCNS written by myself. Signed-off-by: Jia-Ju Bai Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f92f7918112de0..5acf3b743876a4 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1649,7 +1649,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) if (hw->phy.type == e1000_phy_m88) igb_phy_disable_receiver(adapter); - mdelay(500); + msleep(500); return 0; } -- cgit 1.2.3-korg From 374f78f75be98c72876a4d0311b278cd226effba Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 27 Jul 2018 16:22:31 +0800 Subject: ixgbe: Replace GFP_ATOMIC with GFP_KERNEL ixgbe_fcoe_ddp_setup(), ixgbe_setup_fcoe_ddp_resources() and ixgbe_sw_init() are never called in atomic context. They call kmalloc(), dma_pool_alloc() and kzalloc() with GFP_ATOMIC, which is not necessary. GFP_ATOMIC can be replaced with GFP_KERNEL. This is found by a static analysis tool named DCNS written by myself. Signed-off-by: Jia-Ju Bai Acked-by: Sebastian Basierski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 94b3165ff54305..ccd852ad62a4b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -192,7 +192,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, } /* alloc the udl from per cpu ddp pool */ - ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp); + ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_KERNEL, &ddp->udp); if (!ddp->udl) { e_err(drv, "failed allocated ddp context\n"); goto out_noddp_unmap; @@ -760,7 +760,7 @@ int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter) return 0; /* Extra buffer to be shared by all DDPs for HW work around */ - buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC); + buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_KERNEL); if (!buffer) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index af4c9ae7f432d0..663d59ba527a9f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6201,7 +6201,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, adapter->mac_table = kcalloc(hw->mac.num_rar_entries, sizeof(struct ixgbe_mac_addr), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->mac_table) return -ENOMEM; -- cgit 1.2.3-korg From fabf1bce103aa8e3db27ff2cc55f8e0fb0abcc30 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Mon, 30 Jul 2018 15:52:48 -0700 Subject: ixgbe: Prevent unsupported configurations with XDP These changes address comments by Jakub Kicinski on commit 38b7e7f8ae82 ("ixgbe: Do not allow LRO or MTU change with XDP"). Change the MTU check with XDP to allow any supported value and only reject those outside of the range as opposed to rejecting any change when XDP is active. In situations where MTU size is not supported, return -EINVAL instead of -EPERM. Add checks when enabling SRIOV, DCB, or adding L2FW offloaded device as they are not supported with XDP. CC: Jakub Kicinski Signed-off-by: Tony Nguyen Acked-by: Jakub Kicinski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 28 ++++++++++++++++++++++++-- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 +++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 663d59ba527a9f..9a23d33a47ed52 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6620,8 +6620,18 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) struct ixgbe_adapter *adapter = netdev_priv(netdev); if (adapter->xdp_prog) { - e_warn(probe, "MTU cannot be changed while XDP program is loaded\n"); - return -EPERM; + int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *ring = adapter->rx_ring[i]; + + if (new_frame_size > ixgbe_rx_bufsz(ring)) { + e_warn(probe, "Requested MTU size is not supported with XDP\n"); + return -EINVAL; + } + } } /* @@ -8983,6 +8993,15 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) #ifdef CONFIG_IXGBE_DCB if (tc) { + if (adapter->xdp_prog) { + e_warn(probe, "DCB is not supported with XDP\n"); + + ixgbe_init_interrupt_scheme(adapter); + if (netif_running(dev)) + ixgbe_open(dev); + return -EINVAL; + } + netdev_set_num_tc(dev, tc); ixgbe_set_prio_tc_map(adapter); @@ -9934,6 +9953,11 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) int tcs = adapter->hw_tcs ? : 1; int pool, err; + if (adapter->xdp_prog) { + e_warn(probe, "L2FW offload is not supported with XDP\n"); + return ERR_PTR(-EINVAL); + } + /* The hardware supported by ixgbe only filters on the destination MAC * address. In order to avoid issues we only support offloading modes * where the hardware can actually provide the functionality. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 6f59933cdff7d5..9264a5f8a5d015 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -53,6 +53,11 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, struct ixgbe_hw *hw = &adapter->hw; int i; + if (adapter->xdp_prog) { + e_warn(probe, "SRIOV is not supported with XDP\n"); + return -EINVAL; + } + /* Enable VMDq flag so device will be set in VM mode */ adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED | IXGBE_FLAG_VMDQ_ENABLED; -- cgit 1.2.3-korg From 939b701ad63314f5aa90dcd3d866f73954945209 Mon Sep 17 00:00:00 2001 From: Sebastian Basierski Date: Tue, 31 Jul 2018 18:16:00 +0200 Subject: ixgbe: fix driver behaviour after issuing VFLR Since VFLR doesn't clear VFMBMEM (VF Mailbox Memory) and is not re-enabling queues correctly we should fix this behavior. Signed-off-by: Sebastian Basierski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 26 ++++++++++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 9264a5f8a5d015..3c6f01c41b788e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -693,8 +693,13 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; + u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask); u8 num_tcs = adapter->hw_tcs; + u32 reg_val; + u32 queue; + u32 word; /* remove VLAN filters beloning to this VF */ ixgbe_clear_vf_vlans(adapter, vf); @@ -731,6 +736,27 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) /* reset VF api back to unknown */ adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10; + + /* Restart each queue for given VF */ + for (queue = 0; queue < q_per_pool; queue++) { + unsigned int reg_idx = (vf * q_per_pool) + queue; + + reg_val = IXGBE_READ_REG(hw, IXGBE_PVFTXDCTL(reg_idx)); + + /* Re-enabling only configured queues */ + if (reg_val) { + reg_val |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val); + reg_val &= ~IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val); + } + } + + /* Clear VF's mailbox memory */ + for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0); + + IXGBE_WRITE_FLUSH(hw); } static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 44cfb2021145b9..41bcbb337e837f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2518,6 +2518,7 @@ enum { /* Translated register #defines */ #define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P))) #define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P))) +#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) #define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) #define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) -- cgit 1.2.3-korg From fa38e30ac73fbb01d7e5d0fd1b12d412fa3ac3ee Mon Sep 17 00:00:00 2001 From: Martyna Szapar Date: Tue, 7 Aug 2018 17:11:23 -0700 Subject: i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled If interface is connected to switch port configured for DCB there are TX timeouts when bringing up interface. Problem started appearing after adding in i40e driver code mqprio hardware offload mode. In function i40e_vsi_configure_bw_alloc was added resetting BW rate which should be executing when mqprio qdisc is removed but was also when there was no mqprio qdisc added and DCB was enabled. In this patch was added additional check for DCB flag so now when DCB is enabled the correct DCB configs from before mqprio patch are restored. Signed-off-by: Martyna Szapar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f2c622e78802a7..ac685ad4d87731 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5122,15 +5122,17 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; + struct i40e_pf *pf = vsi->back; i40e_status ret; int i; - if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) + /* There is no need to reset BW when mqprio mode is on. */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - if (!vsi->mqprio_qopt.qopt.hw) { + if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "Failed to reset tx rate for vsi->seid %u\n", vsi->seid); return ret; @@ -5139,12 +5141,11 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; - ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data, - NULL); + ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL); if (ret) { - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "AQ command Config VSI BW allocation per TC failed = %d\n", - vsi->back->hw.aq.asq_last_status); + pf->hw.aq.asq_last_status); return -EINVAL; } -- cgit 1.2.3-korg From 07f3701387dcab3a4fb0166098fb2754a1b927e1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 20 Aug 2018 08:12:27 -0700 Subject: i40e: fix condition of WARN_ONCE for stat strings Commit 9b10df596bd4 ("i40e: use WARN_ONCE to replace the commented BUG_ON size check") introduced a warning check to make sure that the size of the stat strings was always the expected value. This code accidentally inverted the check of the data pointer. Fix this so that we accurately count the size of the stats we copied in. This fixes an erroneous WARN kernel splat that occurs when requesting ethtool statistics. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Tested-by: Mauro S M Rodrigues Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index abcd096ede1402..5ff6caa83948c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2013,7 +2013,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data) for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) i40e_add_stat_strings(&data, i40e_gstrings_pfc_stats, i); - WARN_ONCE(p - data != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN, + WARN_ONCE(data - p != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN, "stat strings count mismatch!"); } -- cgit 1.2.3-korg From cc98963dbaaea93d17608641b8d6942a5327fc31 Mon Sep 17 00:00:00 2001 From: Horia Geantă Date: Mon, 6 Aug 2018 15:29:09 +0300 Subject: crypto: caam/jr - fix descriptor DMA unmapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Descriptor address needs to be swapped to CPU endianness before being DMA unmapped. Cc: # 4.8+ Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reported-by: Laurentiu Tudor Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/jr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index f4f258075b895a..acdd72016ffe15 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -190,7 +190,8 @@ static void caam_jr_dequeue(unsigned long devarg) BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0); /* Unmap just-run descriptor so we can post-process */ - dma_unmap_single(dev, jrp->outring[hw_idx].desc, + dma_unmap_single(dev, + caam_dma_to_cpu(jrp->outring[hw_idx].desc), jrp->entinfo[sw_idx].desc_size, DMA_TO_DEVICE); -- cgit 1.2.3-korg From ad876a18048f43b1f66f5d474b7598538668c5de Mon Sep 17 00:00:00 2001 From: Horia Geantă Date: Mon, 6 Aug 2018 15:29:39 +0300 Subject: crypto: caam/qi - fix error path in xts setkey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xts setkey callback returns 0 on some error paths. Fix this by returning -EINVAL. Cc: # 4.12+ Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_qi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 6e61cc93c2b0da..d7aa7d7ff102fa 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -679,10 +679,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, int ret = 0; if (keylen != 2 * AES_MIN_KEY_SIZE && keylen != 2 * AES_MAX_KEY_SIZE) { - crypto_ablkcipher_set_flags(ablkcipher, - CRYPTO_TFM_RES_BAD_KEY_LEN); dev_err(jrdev, "key size mismatch\n"); - return -EINVAL; + goto badkey; } ctx->cdata.keylen = keylen; @@ -715,7 +713,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, return ret; badkey: crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return 0; + return -EINVAL; } /* -- cgit 1.2.3-korg From f1bf9e60a0779ec97de9ecdc353e1d01cdd73f43 Mon Sep 17 00:00:00 2001 From: Horia Geantă Date: Mon, 6 Aug 2018 15:29:55 +0300 Subject: crypto: caam - fix DMA mapping direction for RSA forms 2 & 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crypto engine needs some temporary locations in external memory for running RSA decrypt forms 2 and 3 (CRT). These are named "tmp1" and "tmp2" in the PDB. Update DMA mapping direction of tmp1 and tmp2 from TO_DEVICE to BIDIRECTIONAL, since engine needs r/w access. Cc: # 4.13+ Fixes: 52e26d77b8b3 ("crypto: caam - add support for RSA key form 2") Fixes: 4a651b122adb ("crypto: caam - add support for RSA key form 3") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 578ea63a31098e..f26d62e5533a7a 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -71,8 +71,8 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, @@ -90,8 +90,8 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } /* RSA Job Completion handler */ @@ -417,13 +417,13 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, goto unmap_p; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_q; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -451,7 +451,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_q: dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); unmap_p: @@ -504,13 +504,13 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, goto unmap_dq; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_qinv; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -538,7 +538,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_qinv: dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); unmap_dq: -- cgit 1.2.3-korg From 7fa885e2a22fd0f91a2c23d9275f5021f618ff5a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 7 Aug 2018 23:18:36 +0200 Subject: crypto: arm64/sm4-ce - check for the right CPU feature bit ARMv8.2 specifies special instructions for the SM3 cryptographic hash and the SM4 symmetric cipher. While it is unlikely that a core would implement one and not the other, we should only use SM4 instructions if the SM4 CPU feature bit is set, and we currently check the SM3 feature bit instead. So fix that. Fixes: e99ce921c468 ("crypto: arm64 - add support for SM4...") Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index b7fb5274b25013..0c4fc223f22575 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -69,5 +69,5 @@ static void __exit sm4_ce_mod_fini(void) crypto_unregister_alg(&sm4_ce_alg); } -module_cpu_feature_match(SM3, sm4_ce_mod_init); +module_cpu_feature_match(SM4, sm4_ce_mod_init); module_exit(sm4_ce_mod_fini); -- cgit 1.2.3-korg From 65b2c12dcdb883fc015c0ec65d6c2f857e0456ac Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 10 Aug 2018 18:27:41 +0530 Subject: crypto: chtls - fix null dereference chtls_free_uld() call chtls_free_uld() only for the initialized cdev, this fixes NULL dereference in chtls_free_uld() Signed-off-by: Ganesh Goudar Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chtls/chtls.h | 5 +++++ drivers/crypto/chelsio/chtls/chtls_main.c | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index a53a0e6ba024e8..7725b6ee14efb2 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -96,6 +96,10 @@ enum csk_flags { CSK_CONN_INLINE, /* Connection on HW */ }; +enum chtls_cdev_state { + CHTLS_CDEV_STATE_UP = 1 +}; + struct listen_ctx { struct sock *lsk; struct chtls_dev *cdev; @@ -146,6 +150,7 @@ struct chtls_dev { unsigned int send_page_order; int max_host_sndbuf; struct key_map kmap; + unsigned int cdev_state; }; struct chtls_hws { diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index 9b07f9165658be..f59b044ebd2552 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -160,6 +160,7 @@ static void chtls_register_dev(struct chtls_dev *cdev) tlsdev->hash = chtls_create_hash; tlsdev->unhash = chtls_destroy_hash; tls_register_device(&cdev->tlsdev); + cdev->cdev_state = CHTLS_CDEV_STATE_UP; } static void chtls_unregister_dev(struct chtls_dev *cdev) @@ -281,8 +282,10 @@ static void chtls_free_all_uld(void) struct chtls_dev *cdev, *tmp; mutex_lock(&cdev_mutex); - list_for_each_entry_safe(cdev, tmp, &cdev_list, list) - chtls_free_uld(cdev); + list_for_each_entry_safe(cdev, tmp, &cdev_list, list) { + if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) + chtls_free_uld(cdev); + } mutex_unlock(&cdev_mutex); } -- cgit 1.2.3-korg From e5b954e8d11fdde55eed35017370a3a0d8837754 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 15 Aug 2018 10:29:42 -0700 Subject: crypto: aesni - Use unaligned loads from gcm_context_data A regression was reported bisecting to 1476db2d12 "Move HashKey computation from stack to gcm_context". That diff moved HashKey computation from the stack, which was explicitly aligned in the asm, to a struct provided from the C code, depending on AESNI_ALIGN_ATTR for alignment. It appears some compilers may not align this struct correctly, resulting in a crash on the movdqa instruction when attempting to encrypt or decrypt data. Fix by using unaligned loads for the HashKeys. On modern hardware there is no perf difference between the unaligned and aligned loads. All other accesses to gcm_context_data already use unaligned loads. Reported-by: Mauro Rossi Fixes: 1476db2d12 ("Move HashKey computation from stack to gcm_context") Cc: Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 66 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index e762ef417562ff..d27a50656aa1f7 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -223,34 +223,34 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff pcmpeqd TWOONE(%rip), \TMP2 pand POLY(%rip), \TMP2 pxor \TMP2, \TMP3 - movdqa \TMP3, HashKey(%arg2) + movdqu \TMP3, HashKey(%arg2) movdqa \TMP3, \TMP5 pshufd $78, \TMP3, \TMP1 pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%arg2) + movdqu \TMP1, HashKey_k(%arg2) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 # TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%arg2) + movdqu \TMP5, HashKey_2(%arg2) # HashKey_2 = HashKey^2<<1 (mod poly) pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%arg2) + movdqu \TMP1, HashKey_2_k(%arg2) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 # TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%arg2) + movdqu \TMP5, HashKey_3(%arg2) pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%arg2) + movdqu \TMP1, HashKey_3_k(%arg2) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 # TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%arg2) + movdqu \TMP5, HashKey_4(%arg2) pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%arg2) + movdqu \TMP1, HashKey_4_k(%arg2) .endm # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. @@ -271,7 +271,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, - movdqa HashKey(%arg2), %xmm13 + movdqu HashKey(%arg2), %xmm13 CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ %xmm4, %xmm5, %xmm6 @@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%arg2), \TMP5 + movdqu HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%arg2), \TMP5 + movdqu HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%arg2), \TMP5 + movdqu HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%arg2), \TMP5 + movdqu HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%arg2), \TMP5 + movdqu HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%arg2), \TMP5 + movdqu HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%arg2), \TMP5 + movdqu HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@: AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%arg2), \TMP5 + movdqu HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK @@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%arg2), \TMP5 + movdqu HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%arg2), \TMP5 + movdqu HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%arg2), \TMP5 + movdqu HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%arg2), \TMP5 + movdqu HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%arg2), \TMP5 + movdqu HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%arg2), \TMP5 + movdqu HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%arg2), \TMP5 + movdqu HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@: AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%arg2), \TMP5 + movdqu HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK @@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM1, \TMP6 pshufd $78, \XMM1, \TMP2 pxor \XMM1, \TMP2 - movdqa HashKey_4(%arg2), \TMP5 + movdqu HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 - movdqa HashKey_4_k(%arg2), \TMP4 + movdqu HashKey_4_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqa \XMM1, \XMMDst movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 @@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM2, \TMP1 pshufd $78, \XMM2, \TMP2 pxor \XMM2, \TMP2 - movdqa HashKey_3(%arg2), \TMP5 + movdqu HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 - movdqa HashKey_3_k(%arg2), \TMP4 + movdqu HashKey_3_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM2, \XMMDst @@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM3, \TMP1 pshufd $78, \XMM3, \TMP2 pxor \XMM3, \TMP2 - movdqa HashKey_2(%arg2), \TMP5 + movdqu HashKey_2(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 - movdqa HashKey_2_k(%arg2), \TMP4 + movdqu HashKey_2_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM3, \XMMDst @@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM4, \TMP1 pshufd $78, \XMM4, \TMP2 pxor \XMM4, \TMP2 - movdqa HashKey(%arg2), \TMP5 + movdqu HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 - movdqa HashKey_k(%arg2), \TMP4 + movdqu HashKey_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM4, \XMMDst -- cgit 1.2.3-korg From c2b24c36e0a30ebd8fc7d068da7f0451f2c05c76 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Aug 2018 16:58:34 +0200 Subject: crypto: arm64/aes-gcm-ce - fix scatterwalk API violation Commit 71e52c278c54 ("crypto: arm64/aes-ce-gcm - operate on two input blocks at a time") modified the granularity at which the AES/GCM code processes its input to allow subsequent changes to be applied that improve performance by using aggregation to process multiple input blocks at once. For this reason, it doubled the algorithm's 'chunksize' property to 2 x AES_BLOCK_SIZE, but retained the non-SIMD fallback path that processes a single block at a time. In some cases, this violates the skcipher scatterwalk API, by calling skcipher_walk_done() with a non-zero residue value for a chunk that is expected to be handled in its entirety. This results in a WARN_ON() to be hit by the TLS self test code, but is likely to break other user cases as well. Unfortunately, none of the current test cases exercises this exact code path at the moment. Fixes: 71e52c278c54 ("crypto: arm64/aes-ce-gcm - operate on two ...") Reported-by: Vakul Garg Signed-off-by: Ard Biesheuvel Tested-by: Vakul Garg Signed-off-by: Herbert Xu --- arch/arm64/crypto/ghash-ce-glue.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 6e9f33d14930eb..067d8937d5af1e 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -417,7 +417,7 @@ static int gcm_encrypt(struct aead_request *req) __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - while (walk.nbytes >= AES_BLOCK_SIZE) { + while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; @@ -437,11 +437,18 @@ static int gcm_encrypt(struct aead_request *req) NULL); err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); + walk.nbytes % (2 * AES_BLOCK_SIZE)); } - if (walk.nbytes) + if (walk.nbytes) { __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv, nrounds); + if (walk.nbytes > AES_BLOCK_SIZE) { + crypto_inc(iv, AES_BLOCK_SIZE); + __aes_arm64_encrypt(ctx->aes_key.key_enc, + ks + AES_BLOCK_SIZE, iv, + nrounds); + } + } } /* handle the tail */ @@ -545,7 +552,7 @@ static int gcm_decrypt(struct aead_request *req) __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - while (walk.nbytes >= AES_BLOCK_SIZE) { + while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; @@ -564,11 +571,21 @@ static int gcm_decrypt(struct aead_request *req) } while (--blocks > 0); err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); + walk.nbytes % (2 * AES_BLOCK_SIZE)); } - if (walk.nbytes) + if (walk.nbytes) { + if (walk.nbytes > AES_BLOCK_SIZE) { + u8 *iv2 = iv + AES_BLOCK_SIZE; + + memcpy(iv2, iv, AES_BLOCK_SIZE); + crypto_inc(iv2, AES_BLOCK_SIZE); + + __aes_arm64_encrypt(ctx->aes_key.key_enc, iv2, + iv2, nrounds); + } __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv, nrounds); + } } /* handle the tail */ -- cgit 1.2.3-korg From 0522236d4f9c5ab2e79889cb020d1acbe5da416e Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 22 Aug 2018 08:26:31 +0200 Subject: crypto: vmx - Fix sleep-in-atomic bugs This patch fixes sleep-in-atomic bugs in AES-CBC and AES-XTS VMX implementations. The problem is that the blkcipher_* functions should not be called in atomic context. The bugs can be reproduced via the AF_ALG interface by trying to encrypt/decrypt sufficiently large buffers (at least 64 KiB) using the VMX implementations of 'cbc(aes)' or 'xts(aes)'. Such operations then trigger BUG in crypto_yield(): [ 891.863680] BUG: sleeping function called from invalid context at include/crypto/algapi.h:424 [ 891.864622] in_atomic(): 1, irqs_disabled(): 0, pid: 12347, name: kcapi-enc [ 891.864739] 1 lock held by kcapi-enc/12347: [ 891.864811] #0: 00000000f5d42c46 (sk_lock-AF_ALG){+.+.}, at: skcipher_recvmsg+0x50/0x530 [ 891.865076] CPU: 5 PID: 12347 Comm: kcapi-enc Not tainted 4.19.0-0.rc0.git3.1.fc30.ppc64le #1 [ 891.865251] Call Trace: [ 891.865340] [c0000003387578c0] [c000000000d67ea4] dump_stack+0xe8/0x164 (unreliable) [ 891.865511] [c000000338757910] [c000000000172a58] ___might_sleep+0x2f8/0x310 [ 891.865679] [c000000338757990] [c0000000006bff74] blkcipher_walk_done+0x374/0x4a0 [ 891.865825] [c0000003387579e0] [d000000007e73e70] p8_aes_cbc_encrypt+0x1c8/0x260 [vmx_crypto] [ 891.865993] [c000000338757ad0] [c0000000006c0ee0] skcipher_encrypt_blkcipher+0x60/0x80 [ 891.866128] [c000000338757b10] [c0000000006ec504] skcipher_recvmsg+0x424/0x530 [ 891.866283] [c000000338757bd0] [c000000000b00654] sock_recvmsg+0x74/0xa0 [ 891.866403] [c000000338757c10] [c000000000b00f64] ___sys_recvmsg+0xf4/0x2f0 [ 891.866515] [c000000338757d90] [c000000000b02bb8] __sys_recvmsg+0x68/0xe0 [ 891.866631] [c000000338757e30] [c00000000000bbe4] system_call+0x5c/0x70 Fixes: 8c755ace357c ("crypto: vmx - Adding CBC routines for VMX module") Fixes: c07f5d3da643 ("crypto: vmx - Adding support for XTS") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aes_cbc.c | 30 ++++++++++++++---------------- drivers/crypto/vmx/aes_xts.c | 21 ++++++++++++++------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 5285ece4f33a36..b71895871be3f1 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -107,24 +107,23 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; @@ -147,24 +146,23 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index 8bd9aff0f55fba..e9954a7d46944d 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -116,32 +116,39 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc, ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { + blkcipher_walk_init(&walk, dst, src, nbytes); + + ret = blkcipher_walk_virt(desc, &walk); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); - - ret = blkcipher_walk_virt(desc, &walk); iv = walk.iv; memset(tweak, 0, AES_BLOCK_SIZE); aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); if (enc) aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); else aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; } -- cgit 1.2.3-korg From 3d7c82060d1fe65bde4023aac41a0b1bd7718e07 Mon Sep 17 00:00:00 2001 From: Srikanth Jampala Date: Wed, 22 Aug 2018 12:40:52 +0530 Subject: crypto: cavium/nitrox - fix for command corruption in queue full case with backlog submissions. Earlier used to post the current command without checking queue full after backlog submissions. So, post the current command only after confirming the space in queue after backlog submissions. Maintain host write index instead of reading device registers to get the next free slot to post the command. Return -ENOSPC in queue full case. Signed-off-by: Srikanth Jampala Reviewed-by: Gadam Sreerama Tested-by: Jha, Chandan Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_dev.h | 3 +- drivers/crypto/cavium/nitrox/nitrox_lib.c | 1 + drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 57 ++++++++++++++++------------ 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h index 9a476bb6d4c7ea..af596455b420f6 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_dev.h +++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h @@ -35,6 +35,7 @@ struct nitrox_cmdq { /* requests in backlog queues */ atomic_t backlog_count; + int write_idx; /* command size 32B/64B */ u8 instr_size; u8 qno; @@ -87,7 +88,7 @@ struct nitrox_bh { struct bh_data *slc; }; -/* NITROX-5 driver state */ +/* NITROX-V driver state */ #define NITROX_UCODE_LOADED 0 #define NITROX_READY 1 diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c index ebe267379ac95e..4d31df07777f63 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_lib.c +++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c @@ -36,6 +36,7 @@ static int cmdq_common_init(struct nitrox_cmdq *cmdq) cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN); cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN); cmdq->qsize = (qsize + PKT_IN_ALIGN); + cmdq->write_idx = 0; spin_lock_init(&cmdq->response_lock); spin_lock_init(&cmdq->cmdq_lock); diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index deaefd532aaa15..4a362fc22f6287 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -42,6 +42,16 @@ * Invalid flag options in AES-CCM IV. */ +static inline int incr_index(int index, int count, int max) +{ + if ((index + count) >= max) + index = index + count - max; + else + index += count; + + return index; +} + /** * dma_free_sglist - unmap and free the sg lists. * @ndev: N5 device @@ -426,30 +436,29 @@ static void post_se_instr(struct nitrox_softreq *sr, struct nitrox_cmdq *cmdq) { struct nitrox_device *ndev = sr->ndev; - union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell; - u64 offset; + int idx; u8 *ent; spin_lock_bh(&cmdq->cmdq_lock); - /* get the next write offset */ - offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno); - pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset); + idx = cmdq->write_idx; /* copy the instruction */ - ent = cmdq->head + pkt_in_baoff_dbell.s.aoff; + ent = cmdq->head + (idx * cmdq->instr_size); memcpy(ent, &sr->instr, cmdq->instr_size); - /* flush the command queue updates */ - dma_wmb(); - sr->tstamp = jiffies; atomic_set(&sr->status, REQ_POSTED); response_list_add(sr, cmdq); + sr->tstamp = jiffies; + /* flush the command queue updates */ + dma_wmb(); /* Ring doorbell with count 1 */ writeq(1, cmdq->dbell_csr_addr); /* orders the doorbell rings */ mmiowb(); + cmdq->write_idx = incr_index(idx, 1, ndev->qlen); + spin_unlock_bh(&cmdq->cmdq_lock); } @@ -459,6 +468,9 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) struct nitrox_softreq *sr, *tmp; int ret = 0; + if (!atomic_read(&cmdq->backlog_count)) + return 0; + spin_lock_bh(&cmdq->backlog_lock); list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) { @@ -466,7 +478,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) /* submit until space available */ if (unlikely(cmdq_full(cmdq, ndev->qlen))) { - ret = -EBUSY; + ret = -ENOSPC; break; } /* delete from backlog list */ @@ -491,23 +503,20 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) { struct nitrox_cmdq *cmdq = sr->cmdq; struct nitrox_device *ndev = sr->ndev; - int ret = -EBUSY; + + /* try to post backlog requests */ + post_backlog_cmds(cmdq); if (unlikely(cmdq_full(cmdq, ndev->qlen))) { if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -EAGAIN; - + return -ENOSPC; + /* add to backlog list */ backlog_list_add(sr, cmdq); - } else { - ret = post_backlog_cmds(cmdq); - if (ret) { - backlog_list_add(sr, cmdq); - return ret; - } - post_se_instr(sr, cmdq); - ret = -EINPROGRESS; + return -EBUSY; } - return ret; + post_se_instr(sr, cmdq); + + return -EINPROGRESS; } /** @@ -624,11 +633,9 @@ int nitrox_process_se_request(struct nitrox_device *ndev, */ sr->instr.fdata[0] = *((u64 *)&req->gph); sr->instr.fdata[1] = 0; - /* flush the soft_req changes before posting the cmd */ - wmb(); ret = nitrox_enqueue_request(sr); - if (ret == -EAGAIN) + if (ret == -ENOSPC) goto send_fail; return ret; -- cgit 1.2.3-korg From 602b74eda81311dbdb5dbab08c30f789f648ebdc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 24 Aug 2018 15:41:35 +0300 Subject: mlxsw: spectrum_switchdev: Do not leak RIFs when removing bridge When a bridge device is removed, the VLANs are flushed from each configured port. This causes the ports to decrement the reference count on the associated FIDs (filtering identifier). If the reference count of a FID is 1 and it has a RIF (router interface), then this RIF is destroyed. However, if no port is member in the VLAN for which a RIF exists, then the RIF will continue to exist after the removal of the bridge. To reproduce: # ip link add name br0 type bridge vlan_filtering 1 # ip link set dev swp1 master br0 # ip link add link br0 name br0.10 type vlan id 10 # ip address add 192.0.2.0/24 dev br0.10 # ip link del dev br0 The RIF associated with br0.10 continues to exist. Fix this by iterating over all the bridge device uppers when it is destroyed and take care of destroying their RIFs. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 +++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 3ae9301967410b..3cdb7aca90b724 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -414,6 +414,8 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3a96307f51b055..2ab9cf25a08ae1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6234,6 +6234,17 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_vr_put(mlxsw_sp, vr); } +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return; + mlxsw_sp_rif_destroy(rif); +} + static void mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0d8444aaba01ae..db715da7bab774 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -127,6 +127,24 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); } +static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, + void *data) +{ + struct mlxsw_sp *mlxsw_sp = data; + + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + return 0; +} + +static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + netdev_walk_all_upper_dev_rcu(dev, + mlxsw_sp_bridge_device_upper_rif_destroy, + mlxsw_sp); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) @@ -165,6 +183,8 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, + bridge_device->dev); list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; -- cgit 1.2.3-korg From ab5f11055fdf8dfc3ddbd89e8e3cc550de41d1d3 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 21 Aug 2018 17:35:48 +0200 Subject: net: macb: Fix regression breaking non-MDIO fixed-link PHYs commit 739de9a1563a ("net: macb: Reorganize macb_mii bringup") broke initializing macb on the EVB-KSZ9477 eval board. There, of_mdiobus_register was called even for the fixed-link representing the RGMII-link to the switch with the result that the driver attempts to enumerate PHYs on a non-existent MDIO bus: libphy: MACB_mii_bus: probed mdio_bus f0028000.ethernet-ffffffff: fixed-link has invalid PHY address mdio_bus f0028000.ethernet-ffffffff: scan phy fixed-link at address 0 [snip] mdio_bus f0028000.ethernet-ffffffff: scan phy fixed-link at address 31 The "MDIO" bus registration succeeds regardless, having claimed the reset GPIO, and calling of_phy_register_fixed_link later on fails because it tries to claim the same GPIO: macb f0028000.ethernet: broken fixed-link specification Fix this by registering the fixed-link before calling mdiobus_register. Fixes: 739de9a1563a ("net: macb: Reorganize macb_mii bringup") Signed-off-by: Ahmad Fatoum Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dc09f9a8a49bb1..f46b854d34b54b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -482,11 +482,6 @@ static int macb_mii_probe(struct net_device *dev) if (np) { if (of_phy_is_fixed_link(np)) { - if (of_phy_register_fixed_link(np) < 0) { - dev_err(&bp->pdev->dev, - "broken fixed-link specification\n"); - return -ENODEV; - } bp->phy_node = of_node_get(np); } else { bp->phy_node = of_parse_phandle(np, "phy-handle", 0); @@ -569,7 +564,7 @@ static int macb_mii_init(struct macb *bp) { struct macb_platform_data *pdata; struct device_node *np; - int err; + int err = -ENXIO; /* Enable management port */ macb_writel(bp, NCR, MACB_BIT(MPE)); @@ -592,12 +587,23 @@ static int macb_mii_init(struct macb *bp) dev_set_drvdata(&bp->dev->dev, bp->mii_bus); np = bp->pdev->dev.of_node; - if (pdata) - bp->mii_bus->phy_mask = pdata->phy_mask; + if (np && of_phy_is_fixed_link(np)) { + if (of_phy_register_fixed_link(np) < 0) { + dev_err(&bp->pdev->dev, + "broken fixed-link specification %pOF\n", np); + goto err_out_free_mdiobus; + } + + err = mdiobus_register(bp->mii_bus); + } else { + if (pdata) + bp->mii_bus->phy_mask = pdata->phy_mask; + + err = of_mdiobus_register(bp->mii_bus, np); + } - err = of_mdiobus_register(bp->mii_bus, np); if (err) - goto err_out_free_mdiobus; + goto err_out_free_fixed_link; err = macb_mii_probe(bp->dev); if (err) @@ -607,6 +613,7 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); +err_out_free_fixed_link: if (np && of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); err_out_free_mdiobus: -- cgit 1.2.3-korg From f7b9e8e111e0ce04ed7d1a1cb5b01b6e57775708 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Aug 2018 15:27:23 +0200 Subject: Revert "net: stmmac: fix build failure due to missing COMMON_CLK dependency" This reverts commit bde4975310eb1982bd0bbff673989052d92fd481. All legacy clock implementations now implement clk_set_rate() (Some implementations may be dummies, though). Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index edf20361ea5f15..bf4acebb6bcddd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -33,7 +33,7 @@ config DWMAC_DWC_QOS_ETH select PHYLIB select CRC32 select MII - depends on OF && COMMON_CLK && HAS_DMA + depends on OF && HAS_DMA help Support for chips using the snps,dwc-qos-ethernet.txt DT binding. @@ -57,7 +57,7 @@ config DWMAC_ANARION config DWMAC_IPQ806X tristate "QCA IPQ806x DWMAC support" default ARCH_QCOM - depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST) + depends on OF && (ARCH_QCOM || COMPILE_TEST) select MFD_SYSCON help Support for QCA IPQ806X DWMAC Ethernet. @@ -100,7 +100,7 @@ config DWMAC_OXNAS config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP - depends on OF && COMMON_CLK && (ARCH_ROCKCHIP || COMPILE_TEST) + depends on OF && (ARCH_ROCKCHIP || COMPILE_TEST) select MFD_SYSCON help Support for Ethernet controller on Rockchip RK3288 SoC. @@ -123,7 +123,7 @@ config DWMAC_SOCFPGA config DWMAC_STI tristate "STi GMAC support" default ARCH_STI - depends on OF && COMMON_CLK && (ARCH_STI || COMPILE_TEST) + depends on OF && (ARCH_STI || COMPILE_TEST) select MFD_SYSCON ---help--- Support for ethernet controller on STi SOCs. @@ -147,7 +147,7 @@ config DWMAC_STM32 config DWMAC_SUNXI tristate "Allwinner GMAC support" default ARCH_SUNXI - depends on OF && COMMON_CLK && (ARCH_SUNXI || COMPILE_TEST) + depends on OF && (ARCH_SUNXI || COMPILE_TEST) ---help--- Support for Allwinner A20/A31 GMAC ethernet controllers. -- cgit 1.2.3-korg From 0da70f808029476001109b6cb076737bc04cea2e Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 23 Aug 2018 10:45:22 +0300 Subject: net: macb: do not disable MDIO bus at open/close time macb_reset_hw() is called from macb_close() and indirectly from macb_open(). macb_reset_hw() zeroes the NCR register, including the MPE (Management Port Enable) bit. This will prevent accessing any other PHYs for other Ethernet MACs on the MDIO bus, which remains registered at macb_reset_hw() time, until macb_init_hw() is called from macb_open() which sets the MPE bit again. I.e. currently the MDIO bus has a short disruption at open time and is disabled at close time until the interface is opened again. Fix that by only touching the RE and TE bits when enabling and disabling RX/TX. v2: Make macb_init_hw() NCR write a single statement. Fixes: 6c36a7074436 ("macb: Use generic PHY layer") Signed-off-by: Anssi Hannula Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f46b854d34b54b..c6707ea2d75198 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2035,14 +2035,17 @@ static void macb_reset_hw(struct macb *bp) { struct macb_queue *queue; unsigned int q; + u32 ctrl = macb_readl(bp, NCR); /* Disable RX and TX (XXX: Should we halt the transmission * more gracefully?) */ - macb_writel(bp, NCR, 0); + ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE)); /* Clear the stats registers (XXX: Update stats first?) */ - macb_writel(bp, NCR, MACB_BIT(CLRSTAT)); + ctrl |= MACB_BIT(CLRSTAT); + + macb_writel(bp, NCR, ctrl); /* Clear all status flags */ macb_writel(bp, TSR, -1); @@ -2230,7 +2233,7 @@ static void macb_init_hw(struct macb *bp) } /* Enable TX and RX */ - macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE)); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); } /* The hash address register is 64 bits long and takes up two -- cgit 1.2.3-korg From 6750c87074c5b534d82fdaabb1deb45b8f1f57de Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 23 Aug 2018 13:20:52 -0700 Subject: qlge: Fix netdev features configuration. qlge_fix_features() is not supposed to modify hardware or driver state, rather it is supposed to only fix requested fetures bits. Currently qlge_fix_features() also goes for interface down and up unnecessarily if there is not even any change in features set. This patch changes/fixes following - 1) Move reload of interface or device re-config from qlge_fix_features() to qlge_set_features(). 2) Reload of interface in qlge_set_features() only if relevant feature bit (NETIF_F_HW_VLAN_CTAG_RX) is changed. 3) Get rid of qlge_fix_features() since driver is not really required to fix any features bit. Signed-off-by: Manish Reviewed-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 353f1c129af1e2..059ba9429e51a3 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2384,26 +2384,20 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev, return status; } -static netdev_features_t qlge_fix_features(struct net_device *ndev, - netdev_features_t features) -{ - int err; - - /* Update the behavior of vlan accel in the adapter */ - err = qlge_update_hw_vlan_features(ndev, features); - if (err) - return err; - - return features; -} - static int qlge_set_features(struct net_device *ndev, netdev_features_t features) { netdev_features_t changed = ndev->features ^ features; + int err; + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + /* Update the behavior of vlan accel in the adapter */ + err = qlge_update_hw_vlan_features(ndev, features); + if (err) + return err; - if (changed & NETIF_F_HW_VLAN_CTAG_RX) qlge_vlan_mode(ndev, features); + } return 0; } @@ -4719,7 +4713,6 @@ static const struct net_device_ops qlge_netdev_ops = { .ndo_set_mac_address = qlge_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = qlge_tx_timeout, - .ndo_fix_features = qlge_fix_features, .ndo_set_features = qlge_set_features, .ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid, -- cgit 1.2.3-korg From 2d66f997f0545c8f7fc5cf0b49af1decb35170e7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 24 Aug 2018 16:53:13 +0800 Subject: vhost: correctly check the iova range when waking virtqueue We don't wakeup the virtqueue if the first byte of pending iova range is the last byte of the range we just got updated. This will lead a virtqueue to wait for IOTLB updating forever. Fixing by correct the check and wake up the virtqueue in this case. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Peter Xu Signed-off-by: Jason Wang Reviewed-by: Peter Xu Tested-by: Peter Xu Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 96c1d8400822a3..b13c6b4b2c665a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -952,7 +952,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && - msg->iova + msg->size - 1 > vq_msg->iova && + msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); -- cgit 1.2.3-korg From e75d039a54090470b7a42e803fd4f9398390f907 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Aug 2018 12:18:30 +0100 Subject: qed: fix spelling mistake "comparsion" -> "comparison" Trivial fix to spelling mistake in DP_ERR error message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_init_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index d9ab5add27a8bf..34193c2f169961 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -407,7 +407,7 @@ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn, if (i == QED_INIT_MAX_POLL_COUNT) { DP_ERR(p_hwfn, - "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n", + "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparison %08x)]\n", addr, le32_to_cpu(cmd->expected_val), val, le32_to_cpu(cmd->op_data)); } -- cgit 1.2.3-korg From 98c8f125fd8a6240ea343c1aa50a1be9047791b8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 25 Aug 2018 22:58:01 -0700 Subject: net: sched: Fix memory exposure from short TCA_U32_SEL Via u32_change(), TCA_U32_SEL has an unspecified type in the netlink policy, so max length isn't enforced, only minimum. This means nkeys (from userspace) was being trusted without checking the actual size of nla_len(), which could lead to a memory over-read, and ultimately an exposure via a call to u32_dump(). Reachability is CAP_NET_ADMIN within a namespace. Reported-by: Al Viro Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5d2a6dc39216b..f218ccf1e2d9a6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -914,6 +914,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; u32 htid, flags = 0; + size_t sel_size; int err; #ifdef CONFIG_CLS_U32_PERF size_t size; @@ -1076,8 +1077,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } s = nla_data(tb[TCA_U32_SEL]); + sel_size = struct_size(s, keys, s->nkeys); + if (nla_len(tb[TCA_U32_SEL]) < sel_size) { + err = -EINVAL; + goto erridr; + } - n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); + n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL); if (n == NULL) { err = -ENOBUFS; goto erridr; @@ -1092,7 +1098,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&n->sel, s, sel_size); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; -- cgit 1.2.3-korg From 3ad867001c91657c46dcf6656d52eb6080286fd5 Mon Sep 17 00:00:00 2001 From: Lothar Felten Date: Tue, 14 Aug 2018 09:09:37 +0200 Subject: hwmon: (ina2xx) fix sysfs shunt resistor read access fix the sysfs shunt resistor read access: return the shunt resistor value, not the calibration register contents. update email address Signed-off-by: Lothar Felten Signed-off-by: Guenter Roeck --- Documentation/hwmon/ina2xx | 2 +- drivers/hwmon/ina2xx.c | 13 +++++++++++-- include/linux/platform_data/ina2xx.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx index 72d16f08e431c6..b8df81f6d6bcf9 100644 --- a/Documentation/hwmon/ina2xx +++ b/Documentation/hwmon/ina2xx @@ -32,7 +32,7 @@ Supported chips: Datasheet: Publicly available at the Texas Instruments website http://www.ti.com/ -Author: Lothar Felten +Author: Lothar Felten Description ----------- diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index e9e6aeabbf842f..71d3445ba869c8 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -17,7 +17,7 @@ * Bi-directional Current/Power Monitor with I2C Interface * Datasheet: http://www.ti.com/product/ina230 * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * Thanks to Jan Volkering * * This program is free software; you can redistribute it and/or modify @@ -329,6 +329,15 @@ static int ina2xx_set_shunt(struct ina2xx_data *data, long val) return 0; } +static ssize_t ina2xx_show_shunt(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%li\n", data->rshunt); +} + static ssize_t ina2xx_store_shunt(struct device *dev, struct device_attribute *da, const char *buf, size_t count) @@ -403,7 +412,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_store_shunt, + ina2xx_show_shunt, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h index 9abc0ca7259b78..9f0aa1b48c7849 100644 --- a/include/linux/platform_data/ina2xx.h +++ b/include/linux/platform_data/ina2xx.h @@ -1,7 +1,7 @@ /* * Driver for Texas Instruments INA219, INA226 power monitor chips * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as -- cgit 1.2.3-korg From 9d19371df50a73301aec66a479b490587e889055 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Aug 2018 12:12:36 +0300 Subject: hwmon: (adt7475) Potential error pointer dereferences The adt7475_update_device() function returns error pointers. The problem is that in show_pwmfreq() we dereference it before the check. And then in pwm_use_point2_pwm_at_crit_show() there isn't a check at all. I don't know if it's required, but it silences a static checker warning and it's doesn't hurt anything to check. Signed-off-by: Dan Carpenter Reviewed-by: Tokunori Ikegami Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7475.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 90837f7c7d0f32..16045149f3db9a 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -962,13 +962,14 @@ static ssize_t show_pwmfreq(struct device *dev, struct device_attribute *attr, { struct adt7475_data *data = adt7475_update_device(dev); struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); - int i = clamp_val(data->range[sattr->index] & 0xf, 0, - ARRAY_SIZE(pwmfreq_table) - 1); + int idx; if (IS_ERR(data)) return PTR_ERR(data); + idx = clamp_val(data->range[sattr->index] & 0xf, 0, + ARRAY_SIZE(pwmfreq_table) - 1); - return sprintf(buf, "%d\n", pwmfreq_table[i]); + return sprintf(buf, "%d\n", pwmfreq_table[idx]); } static ssize_t set_pwmfreq(struct device *dev, struct device_attribute *attr, @@ -1004,6 +1005,10 @@ static ssize_t pwm_use_point2_pwm_at_crit_show(struct device *dev, char *buf) { struct adt7475_data *data = adt7475_update_device(dev); + + if (IS_ERR(data)) + return PTR_ERR(data); + return sprintf(buf, "%d\n", !!(data->config4 & CONFIG4_MAXDUTY)); } -- cgit 1.2.3-korg From f196dec6d50abb2e65fb54a0621b2f1b4d922995 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Aug 2018 13:07:47 +0300 Subject: hwmon: (adt7475) Make adt7475_read_word() return errors The adt7475_read_word() function was meant to return negative error codes on failure. Signed-off-by: Dan Carpenter Reviewed-by: Tokunori Ikegami Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7475.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 16045149f3db9a..f4c7516eb9891f 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -302,14 +302,18 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) return clamp_val(reg, 0, 1023) & (0xff << 2); } -static u16 adt7475_read_word(struct i2c_client *client, int reg) +static int adt7475_read_word(struct i2c_client *client, int reg) { - u16 val; + int val1, val2; - val = i2c_smbus_read_byte_data(client, reg); - val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8); + val1 = i2c_smbus_read_byte_data(client, reg); + if (val1 < 0) + return val1; + val2 = i2c_smbus_read_byte_data(client, reg + 1); + if (val2 < 0) + return val2; - return val; + return val1 | (val2 << 8); } static void adt7475_write_word(struct i2c_client *client, int reg, u16 val) -- cgit 1.2.3-korg From d49dbfade96d5b0863ca8a90122a805edd5ef50a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 15 Aug 2018 08:14:37 -0500 Subject: hwmon: (nct6775) Fix potential Spectre v1 val can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: vers/hwmon/nct6775.c:2698 store_pwm_weight_temp_sel() warn: potential spectre issue 'data->temp_src' [r] Fix this by sanitizing val before using it to index data->temp_src Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index c6bd61e4695abc..944f5b63aecd70 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "lm75.h" #define USE_ALTERNATE @@ -2689,6 +2690,7 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr, return err; if (val > NUM_TEMP) return -EINVAL; + val = array_index_nospec(val, NUM_TEMP + 1); if (val && (!(data->have_temp & BIT(val - 1)) || !data->temp_src[val - 1])) return -EINVAL; -- cgit 1.2.3-korg From c7c09dc187f0323ad40b5b6c57a6db673a386a7f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 24 Aug 2018 11:29:27 +0800 Subject: nios2: kconfig: remove duplicate DEBUG_STACK_USAGE symbol defintions DEBUG_STACK_USAGE is already defined in lib/Kconfig.debug Signed-off-by: Tobias Klauser Signed-off-by: Ley Foon Tan --- arch/nios2/Kconfig.debug | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug index 7a49f0d28d14c1..f1da8a7b17ff49 100644 --- a/arch/nios2/Kconfig.debug +++ b/arch/nios2/Kconfig.debug @@ -3,15 +3,6 @@ config TRACE_IRQFLAGS_SUPPORT def_bool y -config DEBUG_STACK_USAGE - bool "Enable stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T and sysrq-P debug output. - - This option will slow down process creation somewhat. - config EARLY_PRINTK bool "Activate early kernel debugging" default y -- cgit 1.2.3-korg From 166cd4421b0dabd2e438f5384f4ecf930dc8ab08 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 23 Aug 2018 23:43:45 +0200 Subject: mtd: rawnand: docg4: Remove wrong __init annotations If gcc (e.g. 4.1.2) decides not to inline init_mtd_structs() and read_id_reg(), this will cause section mismatches, and crashes: WARNING: drivers/mtd/nand/raw/docg4.o(.text+0xc10): Section mismatch in reference from the function docg4_attach_chip() to the function .init.text:init_mtd_structs() The function docg4_attach_chip() references the function __init init_mtd_structs(). This is often because docg4_attach_chip lacks a __init annotation or the annotation of init_mtd_structs is wrong. WARNING: drivers/mtd/nand/raw/docg4.o(.text+0xc3e): Section mismatch in reference from the function docg4_attach_chip() to the function .init.text:read_id_reg() The function docg4_attach_chip() references the function __init read_id_reg(). This is often because docg4_attach_chip lacks a __init annotation or the annotation of read_id_reg is wrong. Fix this by dropping the now incorrect __init annotations from init_mtd_structs() and read_id_reg(). Fixes: 66a38478dcc5b5a3 ("mtd: rawnand: docg4: convert driver to nand_scan()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/docg4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c index a3f04315c05c54..427fcbc1b71c05 100644 --- a/drivers/mtd/nand/raw/docg4.c +++ b/drivers/mtd/nand/raw/docg4.c @@ -1218,7 +1218,7 @@ static int docg4_resume(struct platform_device *pdev) return 0; } -static void __init init_mtd_structs(struct mtd_info *mtd) +static void init_mtd_structs(struct mtd_info *mtd) { /* initialize mtd and nand data structures */ @@ -1290,7 +1290,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd) } -static int __init read_id_reg(struct mtd_info *mtd) +static int read_id_reg(struct mtd_info *mtd) { struct nand_chip *nand = mtd_to_nand(mtd); struct docg4_priv *doc = nand_get_controller_data(nand); -- cgit 1.2.3-korg From 908946c4bee705542f38bc06c0203a6d83e3700c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Aug 2018 08:37:46 -0600 Subject: Fix up libata MAINTAINERS entry The email was botched in one entry, and I also forgot to update the location of the git tree. It'll be under the linux-block umbrella, just with different branches. Reported-by: Baruch Siach Fixes: 7634ccd2da97 ("libata: maintainership update") Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a5b256b259057c..9ad052aeac39be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8255,9 +8255,9 @@ F: drivers/ata/pata_arasan_cf.c LIBATA PATA DRIVERS M: Bartlomiej Zolnierkiewicz -M: Jens Axboe +M: Jens Axboe L: linux-ide@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git S: Maintained F: drivers/ata/pata_*.c F: drivers/ata/ata_generic.c @@ -8275,7 +8275,7 @@ LIBATA SATA AHCI PLATFORM devices support M: Hans de Goede M: Jens Axboe L: linux-ide@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git S: Maintained F: drivers/ata/ahci_platform.c F: drivers/ata/libahci_platform.c @@ -8291,7 +8291,7 @@ F: drivers/ata/sata_promise.* LIBATA SUBSYSTEM (Serial and Parallel ATA drivers) M: Jens Axboe L: linux-ide@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git S: Maintained F: drivers/ata/ F: include/linux/ata.h -- cgit 1.2.3-korg From 973e5405f2f67ddbb2bf07b3ffc71908a37fea8e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:10 +0200 Subject: xen/blkback: don't keep persistent grants too long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persistent grants are allocated until a threshold per ring is being reached. Those grants won't be freed until the ring is being destroyed meaning there will be resources kept busy which might no longer be used. Instead of freeing only persistent grants until the threshold is reached add a timestamp and remove all persistent grants not having been in use for a minute. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/ABI/testing/sysfs-driver-xen-blkback | 10 +++ drivers/block/xen-blkback/blkback.c | 88 ++++++++++++---------- drivers/block/xen-blkback/common.h | 8 +- 3 files changed, 60 insertions(+), 46 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback index 8bb43b66eb55a1..4e7babb3ba1fec 100644 --- a/Documentation/ABI/testing/sysfs-driver-xen-blkback +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback @@ -15,3 +15,13 @@ Description: blkback. If the frontend tries to use more than max_persistent_grants, the LRU kicks in and starts removing 5% of max_persistent_grants every 100ms. + +What: /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds +Date: August 2018 +KernelVersion: 4.19 +Contact: Roger Pau Monné +Description: + How long a persistent grant is allowed to remain + allocated without being in use. The time is in + seconds, 0 means indefinitely long. + The default is 60 seconds. diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b55b245e805205..9eae7b243f68b8 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -83,6 +83,18 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); +/* + * How long a persistent grant is allowed to remain allocated without being in + * use. The time is in seconds, 0 means indefinitely long. + */ + +static unsigned int xen_blkif_pgrant_timeout = 60; +module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout, + uint, 0644); +MODULE_PARM_DESC(persistent_grant_unused_seconds, + "Time in seconds an unused persistent grant is allowed to " + "remain allocated. Default is 60, 0 means unlimited."); + /* * Maximum number of rings/queues blkback supports, allow as many queues as there * are CPUs if user has not specified a value. @@ -123,6 +135,13 @@ module_param(log_stats, int, 0644); /* Number of free pages to remove on each call to gnttab_free_pages */ #define NUM_BATCH_FREE_PAGES 10 +static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt) +{ + return xen_blkif_pgrant_timeout && + (jiffies - persistent_gnt->last_used >= + HZ * xen_blkif_pgrant_timeout); +} + static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) { unsigned long flags; @@ -278,7 +297,7 @@ static void put_persistent_gnt(struct xen_blkif_ring *ring, { if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) pr_alert_ratelimited("freeing a grant already unused\n"); - set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + persistent_gnt->last_used = jiffies; clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); atomic_dec(&ring->persistent_gnt_in_use); } @@ -371,26 +390,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) struct persistent_gnt *persistent_gnt; struct rb_node *n; unsigned int num_clean, total; - bool scan_used = false, clean_used = false; + bool scan_used = false; struct rb_root *root; - if (ring->persistent_gnt_c < xen_blkif_max_pgrants || - (ring->persistent_gnt_c == xen_blkif_max_pgrants && - !ring->blkif->vbd.overflow_max_grants)) { - goto out; - } - if (work_busy(&ring->persistent_purge_work)) { pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); goto out; } - num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; - num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; - num_clean = min(ring->persistent_gnt_c, num_clean); - if ((num_clean == 0) || - (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use)))) - goto out; + if (ring->persistent_gnt_c < xen_blkif_max_pgrants || + (ring->persistent_gnt_c == xen_blkif_max_pgrants && + !ring->blkif->vbd.overflow_max_grants)) { + num_clean = 0; + } else { + num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + + num_clean; + num_clean = min(ring->persistent_gnt_c, num_clean); + pr_debug("Going to purge at least %u persistent grants\n", + num_clean); + } /* * At this point, we can assure that there will be no calls @@ -401,9 +420,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) * number of grants. */ - total = num_clean; - - pr_debug("Going to purge %u persistent grants\n", num_clean); + total = 0; BUG_ON(!list_empty(&ring->persistent_purge_list)); root = &ring->persistent_gnts; @@ -412,46 +429,37 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); - if (clean_used) { - clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); - continue; - } - if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) continue; - if (!scan_used && - (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags))) + if (!scan_used && !persistent_gnt_timeout(persistent_gnt)) + continue; + if (scan_used && total >= num_clean) continue; rb_erase(&persistent_gnt->node, root); list_add(&persistent_gnt->remove_node, &ring->persistent_purge_list); - if (--num_clean == 0) - goto finished; + total++; } /* - * If we get here it means we also need to start cleaning + * Check whether we also need to start cleaning * grants that were used since last purge in order to cope * with the requested num */ - if (!scan_used && !clean_used) { - pr_debug("Still missing %u purged frames\n", num_clean); + if (!scan_used && total < num_clean) { + pr_debug("Still missing %u purged frames\n", num_clean - total); scan_used = true; goto purge_list; } -finished: - if (!clean_used) { - pr_debug("Finished scanning for grants to clean, removing used flag\n"); - clean_used = true; - goto purge_list; - } - ring->persistent_gnt_c -= (total - num_clean); - ring->blkif->vbd.overflow_max_grants = 0; + if (total) { + ring->persistent_gnt_c -= total; + ring->blkif->vbd.overflow_max_grants = 0; - /* We can defer this work */ - schedule_work(&ring->persistent_purge_work); - pr_debug("Purged %u/%u\n", (total - num_clean), total); + /* We can defer this work */ + schedule_work(&ring->persistent_purge_work); + pr_debug("Purged %u/%u\n", num_clean, total); + } out: return; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index ecb35fe8ca8dbb..7bff72db3b7eb7 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -234,14 +234,9 @@ struct xen_vbd { struct backend_info; /* Number of available flags */ -#define PERSISTENT_GNT_FLAGS_SIZE 2 +#define PERSISTENT_GNT_FLAGS_SIZE 1 /* This persistent grant is currently in use */ #define PERSISTENT_GNT_ACTIVE 0 -/* - * This persistent grant has been used, this flag is set when we remove the - * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently. - */ -#define PERSISTENT_GNT_WAS_ACTIVE 1 /* Number of requests that we can fit in a ring */ #define XEN_BLKIF_REQS_PER_PAGE 32 @@ -250,6 +245,7 @@ struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; + unsigned long last_used; DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); struct rb_node node; struct list_head remove_node; -- cgit 1.2.3-korg From a46b53672b2c2e3770b38a4abf90d16364d2584b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:11 +0200 Subject: xen/blkfront: cleanup stale persistent grants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a periodic cleanup function to remove old persistent grants which are no longer in use on the backend side. This avoids starvation in case there are lots of persistent grants for a device which no longer is involved in I/O business. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 94 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8986adab9bf585..a2a395f85a4156 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq) static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; +static struct delayed_work blkfront_work; +static LIST_HEAD(info_list); /* * Maximum number of segments in indirect requests, the actual value used by @@ -216,6 +219,7 @@ struct blkfront_info /* Save uncomplete reqs and bios for migration. */ struct list_head requests; struct bio_list bio_list; + struct list_head info_list; }; static unsigned int nr_minors; @@ -1759,6 +1763,12 @@ abort_transaction: return err; } +static void free_info(struct blkfront_info *info) +{ + list_del(&info->info_list); + kfree(info); +} + /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1880,7 +1890,10 @@ again: destroy_blkring: blkif_free(info, 0); - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); + dev_set_drvdata(&dev->dev, NULL); return err; @@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev, info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); + mutex_lock(&blkfront_mutex); + list_add(&info->info_list, &info_list); + mutex_unlock(&blkfront_mutex); + return 0; } @@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST) indirect_segments = 0; info->max_indirect_segments = indirect_segments; + + if (info->feature_persistent) { + mutex_lock(&blkfront_mutex); + schedule_delayed_work(&blkfront_work, HZ * 10); + mutex_unlock(&blkfront_mutex); + } } /* @@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) mutex_unlock(&info->mutex); if (!bdev) { - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); return 0; } @@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) if (info && !bdev->bd_openers) { xlvbd_release_gendisk(info); disk->private_data = NULL; - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); } mutex_unlock(&bdev->bd_mutex); @@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); xlvbd_release_gendisk(info); disk->private_data = NULL; - kfree(info); + free_info(info); } out: @@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = { .is_ready = blkfront_is_ready, }; +static void purge_persistent_grants(struct blkfront_info *info) +{ + unsigned int i; + unsigned long flags; + + for (i = 0; i < info->nr_rings; i++) { + struct blkfront_ring_info *rinfo = &info->rinfo[i]; + struct grant *gnt_list_entry, *tmp; + + spin_lock_irqsave(&rinfo->ring_lock, flags); + + if (rinfo->persistent_gnts_c == 0) { + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + continue; + } + + list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, + node) { + if (gnt_list_entry->gref == GRANT_INVALID_REF || + gnttab_query_foreign_access(gnt_list_entry->gref)) + continue; + + list_del(&gnt_list_entry->node); + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); + rinfo->persistent_gnts_c--; + __free_page(gnt_list_entry->page); + kfree(gnt_list_entry); + } + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + } +} + +static void blkfront_delay_work(struct work_struct *work) +{ + struct blkfront_info *info; + bool need_schedule_work = false; + + mutex_lock(&blkfront_mutex); + + list_for_each_entry(info, &info_list, info_list) { + if (info->feature_persistent) { + need_schedule_work = true; + mutex_lock(&info->mutex); + purge_persistent_grants(info); + mutex_unlock(&info->mutex); + } + } + + if (need_schedule_work) + schedule_delayed_work(&blkfront_work, HZ * 10); + + mutex_unlock(&blkfront_mutex); +} + static int __init xlblk_init(void) { int ret; @@ -2650,6 +2732,8 @@ static int __init xlblk_init(void) return -ENODEV; } + INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work); + ret = xenbus_register_frontend(&blkfront_driver); if (ret) { unregister_blkdev(XENVBD_MAJOR, DEV_NAME); @@ -2663,6 +2747,8 @@ module_init(xlblk_init); static void __exit xlblk_exit(void) { + cancel_delayed_work_sync(&blkfront_work); + xenbus_unregister_driver(&blkfront_driver); unregister_blkdev(XENVBD_MAJOR, DEV_NAME); kfree(minors); -- cgit 1.2.3-korg From 4bcddbae019df2614ea36976ba3d36313f93c6d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:12 +0200 Subject: xen/blkfront: reorder tests in xlblk_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we don't want pv block devices we should not test parameters for sanity and eventually print out error messages. So test precluding conditions before checking parameters. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a2a395f85a4156..a71d817e900ddc 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2708,6 +2708,15 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_has_pv_disk_devices()) + return -ENODEV; + + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { + pr_warn("xen_blk: can't get major %d with name %s\n", + XENVBD_MAJOR, DEV_NAME); + return -ENODEV; + } + if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -2723,15 +2732,6 @@ static int __init xlblk_init(void) xen_blkif_max_queues = nr_cpus; } - if (!xen_has_pv_disk_devices()) - return -ENODEV; - - if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { - printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", - XENVBD_MAJOR, DEV_NAME); - return -ENODEV; - } - INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work); ret = xenbus_register_frontend(&blkfront_driver); -- cgit 1.2.3-korg From d77ff24e7fa2258877fa0b87efa06b9a58a37aab Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:13 +0200 Subject: xen/blkback: move persistent grants flags to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct persistent_gnt flags member is meant to be a bitfield of different flags. There is only PERSISTENT_GNT_ACTIVE flag left, so convert it to a bool named "active". Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 13 ++++++------- drivers/block/xen-blkback/common.h | 7 +------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9eae7b243f68b8..fd1e19f1a49f58 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -255,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, } } - bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); - set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->active = true; /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts); @@ -280,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, else if (gref > data->gnt) node = node->rb_right; else { - if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { + if (data->active) { pr_alert_ratelimited("requesting a grant already in use\n"); return NULL; } - set_bit(PERSISTENT_GNT_ACTIVE, data->flags); + data->active = true; atomic_inc(&ring->persistent_gnt_in_use); return data; } @@ -295,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, static void put_persistent_gnt(struct xen_blkif_ring *ring, struct persistent_gnt *persistent_gnt) { - if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (!persistent_gnt->active) pr_alert_ratelimited("freeing a grant already unused\n"); persistent_gnt->last_used = jiffies; - clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->active = false; atomic_dec(&ring->persistent_gnt_in_use); } @@ -429,7 +428,7 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); - if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (persistent_gnt->active) continue; if (!scan_used && !persistent_gnt_timeout(persistent_gnt)) continue; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 7bff72db3b7eb7..2339b8d39c5ea8 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -233,11 +233,6 @@ struct xen_vbd { struct backend_info; -/* Number of available flags */ -#define PERSISTENT_GNT_FLAGS_SIZE 1 -/* This persistent grant is currently in use */ -#define PERSISTENT_GNT_ACTIVE 0 - /* Number of requests that we can fit in a ring */ #define XEN_BLKIF_REQS_PER_PAGE 32 @@ -246,7 +241,7 @@ struct persistent_gnt { grant_ref_t gnt; grant_handle_t handle; unsigned long last_used; - DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); + bool active; struct rb_node node; struct list_head remove_node; }; -- cgit 1.2.3-korg From 6f2f39ad1a54978394851c05e327419ebeb7227e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:14 +0200 Subject: xen/blkback: remove unused pers_gnts_lock from struct xen_blkif_ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pers_gnts_lock isn't being used anywhere. Remove it. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 2339b8d39c5ea8..1d3002d773f7ad 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -269,7 +269,6 @@ struct xen_blkif_ring { wait_queue_head_t pending_free_wq; /* Tree to store persistent grants. */ - spinlock_t pers_gnts_lock; struct rb_root persistent_gnts; unsigned int persistent_gnt_c; atomic_t persistent_gnt_in_use; -- cgit 1.2.3-korg From 061a5427530633de93ace4ef001b99961984af62 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Aug 2018 10:09:06 -0600 Subject: blk-wbt: abstract out end IO completion handler Prep patch for calling the handler from a different context, no functional changes in this patch. Tested-by: Agarwal, Anchal Signed-off-by: Jens Axboe --- block/blk-wbt.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 84507d3e9a981d..4575b465037013 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb) } } -static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) +static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, + enum wbt_flags wb_acct) { - struct rq_wb *rwb = RQWB(rqos); - struct rq_wait *rqw; int inflight, limit; - if (!(wb_acct & WBT_TRACKED)) - return; - - rqw = get_rq_wait(rwb, wb_acct); inflight = atomic_dec_return(&rqw->inflight); /* @@ -170,6 +165,18 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) } } +static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) +{ + struct rq_wb *rwb = RQWB(rqos); + struct rq_wait *rqw; + + if (!(wb_acct & WBT_TRACKED)) + return; + + rqw = get_rq_wait(rwb, wb_acct); + wbt_rqw_done(rwb, rqw, wb_acct); +} + /* * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. -- cgit 1.2.3-korg From 38cfb5a45ee013bfab5d1ae4c4738815e744b440 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Aug 2018 10:10:05 -0600 Subject: blk-wbt: improve waking of tasks We have two potential issues: 1) After commit 2887e41b910b, we only wake one process at the time when we finish an IO. We really want to wake up as many tasks as can queue IO. Before this commit, we woke up everyone, which could cause a thundering herd issue. 2) A task can potentially consume two wakeups, causing us to (in practice) miss a wakeup. Fix both by providing our own wakeup function, which stops __wake_up_common() from waking up more tasks if we fail to get a queueing token. With the strict ordering we have on the wait list, this wakes the right tasks and the right amount of tasks. Based on a patch from Jianchao Wang . Tested-by: Agarwal, Anchal Signed-off-by: Jens Axboe --- block/blk-wbt.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 4575b465037013..bfb0d21d19ce37 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -161,7 +161,7 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, int diff = limit - inflight; if (!inflight || diff >= rwb->wb_background / 2) - wake_up(&rqw->wait); + wake_up_all(&rqw->wait); } } @@ -488,6 +488,34 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) return limit; } +struct wbt_wait_data { + struct wait_queue_entry wq; + struct task_struct *task; + struct rq_wb *rwb; + struct rq_wait *rqw; + unsigned long rw; + bool got_token; +}; + +static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up + * loop in __wake_up_common. + */ + if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw))) + return -1; + + data->got_token = true; + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; +} + /* * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. @@ -498,19 +526,40 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, __acquires(lock) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); - DECLARE_WAITQUEUE(wait, current); + struct wbt_wait_data data = { + .wq = { + .func = wbt_wake_function, + .entry = LIST_HEAD_INIT(data.wq.entry), + }, + .task = current, + .rwb = rwb, + .rqw = rqw, + .rw = rw, + }; bool has_sleeper; has_sleeper = wq_has_sleeper(&rqw->wait); if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) return; - add_wait_queue_exclusive(&rqw->wait, &wait); + prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { - set_current_state(TASK_UNINTERRUPTIBLE); + if (data.got_token) + break; - if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) + if (!has_sleeper && + rq_wait_inc_below(rqw, get_limit(rwb, rw))) { + finish_wait(&rqw->wait, &data.wq); + + /* + * We raced with wbt_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + */ + if (data.got_token) + wbt_rqw_done(rwb, rqw, wb_acct); break; + } if (lock) { spin_unlock_irq(lock); @@ -518,11 +567,11 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, spin_lock_irq(lock); } else io_schedule(); + has_sleeper = false; } while (1); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&rqw->wait, &wait); + finish_wait(&rqw->wait, &data.wq); } static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) -- cgit 1.2.3-korg From 336d139f8718b1336c9d22f0e462611ae1229850 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Aug 2018 16:01:41 +0900 Subject: mtd: rawnand: denali: do not pass zero maxchips to nand_scan() Commit 49aa76b16676 ("mtd: rawnand: do not execute nand_scan_ident() if maxchips is zero") gave a new meaning for calling nand_scan_ident() with maxchips=0. It is a special usage for some drivers such as docg4, but actually the Denali driver may pass maxchips=0 to nand_scan() when the driver is enabled but no NAND chip is found on the board for some reasons. If nand_scan_with_ids() is called with maxchips=0, nand_scan_ident() is skipped, then nand_set_defaults() is skipped as well. Thus, the driver must set chip->controller beforehand. Otherwise, nand_attach() causes NULL pointer dereference. In fact, the Denali controller knows the number of connected chips before calling nand_scan_ident(); if DEVICE_RESET fails, there is no chip in that chip select. Then, denali_reset_banks() sets the maxchips to the number of detected chips. If no chip is found, maxchips is zero. In this case, there is no point for calling nand_scan() because we know it will fail for sure. Let's make the probe function fail immediately. Fixes: 49aa76b16676 ("mtd: rawnand: do not execute nand_scan_ident() if maxchips is zero") Signed-off-by: Masahiro Yamada Acked-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/denali.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index ca18612c420142..67b2065e7a19ca 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1338,6 +1338,11 @@ int denali_init(struct denali_nand_info *denali) denali_enable_irq(denali); denali_reset_banks(denali); + if (!denali->max_banks) { + /* Error out earlier if no chip is found for some reasons. */ + ret = -ENODEV; + goto disable_irq; + } denali->active_bank = DENALI_INVALID_BANK; -- cgit 1.2.3-korg From b0a84beb2e35536839ea289182684528f379b860 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Aug 2018 13:32:12 -0600 Subject: blk-wbt: remove dead code We already note and mark discard and swap IO from bio_to_wbt_flags(). Signed-off-by: Jens Axboe --- block/blk-wbt.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index bfb0d21d19ce37..8e20a0677dcf69 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -636,11 +636,6 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) return; } - if (current_is_kswapd()) - flags |= WBT_KSWAPD; - if (bio_op(bio) == REQ_OP_DISCARD) - flags |= WBT_DISCARD; - __wbt_wait(rwb, flags, bio->bi_opf, lock); if (!blk_stat_is_active(rwb->cb)) -- cgit 1.2.3-korg From 46cb52ad414ac829680d0bb8cc7090ac2b577ca7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 15 Jul 2018 22:09:29 +0200 Subject: ata: ftide010: Add a quirk for SQ201 The DMA is broken on this specific device for some unknown reason (probably badly designed or plain broken interface electronics) and will only work with PIO. Other users of the same hardware does not have this problem. Add a specific quirk so that this Gemini device gets DMA turned off. Also fix up some code around passing the port information around in probe while we're at it. Signed-off-by: Linus Walleij Signed-off-by: Jens Axboe --- drivers/ata/pata_ftide010.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c index 5d4b72e21161a8..569a4a662dcd4d 100644 --- a/drivers/ata/pata_ftide010.c +++ b/drivers/ata/pata_ftide010.c @@ -256,14 +256,12 @@ static struct ata_port_operations pata_ftide010_port_ops = { .qc_issue = ftide010_qc_issue, }; -static struct ata_port_info ftide010_port_info[] = { - { - .flags = ATA_FLAG_SLAVE_POSS, - .mwdma_mask = ATA_MWDMA2, - .udma_mask = ATA_UDMA6, - .pio_mask = ATA_PIO4, - .port_ops = &pata_ftide010_port_ops, - }, +static struct ata_port_info ftide010_port_info = { + .flags = ATA_FLAG_SLAVE_POSS, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA6, + .pio_mask = ATA_PIO4, + .port_ops = &pata_ftide010_port_ops, }; #if IS_ENABLED(CONFIG_SATA_GEMINI) @@ -349,6 +347,7 @@ static int pata_ftide010_gemini_cable_detect(struct ata_port *ap) } static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { struct device *dev = ftide->dev; @@ -373,7 +372,13 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, /* Flag port as SATA-capable */ if (gemini_sata_bridge_enabled(sg, is_ata1)) - ftide010_port_info[0].flags |= ATA_FLAG_SATA; + pi->flags |= ATA_FLAG_SATA; + + /* This device has broken DMA, only PIO works */ + if (of_machine_is_compatible("itian,sq201")) { + pi->mwdma_mask = 0; + pi->udma_mask = 0; + } /* * We assume that a simple 40-wire cable is used in the PATA mode. @@ -435,6 +440,7 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, } #else static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { return -ENOTSUPP; @@ -446,7 +452,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; - const struct ata_port_info pi = ftide010_port_info[0]; + struct ata_port_info pi = ftide010_port_info; const struct ata_port_info *ppi[] = { &pi, NULL }; struct ftide010 *ftide; struct resource *res; @@ -490,6 +496,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) * are ATA0. This will also set up the cable types. */ ret = pata_ftide010_gemini_init(ftide, + &pi, (res->start == 0x63400000)); if (ret) goto err_dis_clk; -- cgit 1.2.3-korg From 62d2a1940709198a522a43ff8be8b8f6b3654dec Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 28 Aug 2018 07:31:11 +0800 Subject: block: remove unnecessary condition check kmem_cache_destroy() can handle NULL pointer correctly, so there is no need to check e->icq_cache before calling kmem_cache_destroy(). Signed-off-by: Chengguang Xu Signed-off-by: Jens Axboe --- block/elevator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 5ea6e7d600e46e..6a06b5d040e5dd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -895,8 +895,7 @@ int elv_register(struct elevator_type *e) spin_lock(&elv_list_lock); if (elevator_find(e->elevator_name, e->uses_mq)) { spin_unlock(&elv_list_lock); - if (e->icq_cache) - kmem_cache_destroy(e->icq_cache); + kmem_cache_destroy(e->icq_cache); return -EBUSY; } list_add_tail(&e->list, &elv_list); -- cgit 1.2.3-korg From db193954ed9e35701b6e489fa4cc97b08589341b Mon Sep 17 00:00:00 2001 From: John Pittman Date: Mon, 27 Aug 2018 14:33:05 -0400 Subject: block: bsg: move atomic_t ref_count variable to refcount API Currently, variable ref_count within the bsg_device struct is of type atomic_t. For variables being used as reference counters, the refcount API should be used instead of atomic. The newer refcount API works to prevent counter overflows and use-after-free bugs. So, move this varable from the atomic API to refcount, potentially avoiding the issues mentioned. Signed-off-by: John Pittman Signed-off-by: Jens Axboe --- block/bsg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index db588add6ba662..9a442c23a715e2 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -37,7 +37,7 @@ struct bsg_device { struct request_queue *queue; spinlock_t lock; struct hlist_node dev_list; - atomic_t ref_count; + refcount_t ref_count; char name[20]; int max_queue; }; @@ -252,7 +252,7 @@ static int bsg_put_device(struct bsg_device *bd) mutex_lock(&bsg_mutex); - if (!atomic_dec_and_test(&bd->ref_count)) { + if (!refcount_dec_and_test(&bd->ref_count)) { mutex_unlock(&bsg_mutex); return 0; } @@ -290,7 +290,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, bd->queue = rq; - atomic_set(&bd->ref_count, 1); + refcount_set(&bd->ref_count, 1); hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); @@ -308,7 +308,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { if (bd->queue == q) { - atomic_inc(&bd->ref_count); + refcount_inc(&bd->ref_count); goto found; } } -- cgit 1.2.3-korg From f1ed3df20d2d223e0852cc4ac1f19bba869a7e3c Mon Sep 17 00:00:00 2001 From: Michal Wnukowski Date: Wed, 15 Aug 2018 15:51:57 -0700 Subject: nvme-pci: add a memory barrier to nvme_dbbuf_update_and_check_event In many architectures loads may be reordered with older stores to different locations. In the nvme driver the following two operations could be reordered: - Write shadow doorbell (dbbuf_db) into memory. - Read EventIdx (dbbuf_ei) from memory. This can result in a potential race condition between driver and VM host processing requests (if given virtual NVMe controller has a support for shadow doorbell). If that occurs, then the NVMe controller may decide to wait for MMIO doorbell from guest operating system, and guest driver may decide not to issue MMIO doorbell on any of subsequent commands. This issue is purely timing-dependent one, so there is no easy way to reproduce it. Currently the easiest known approach is to run "Oracle IO Numbers" (orion) that is shipped with Oracle DB: orion -run advanced -num_large 0 -size_small 8 -type rand -simulate \ concat -write 40 -duration 120 -matrix row -testname nvme_test Where nvme_test is a .lun file that contains a list of NVMe block devices to run test against. Limiting number of vCPUs assigned to given VM instance seems to increase chances for this bug to occur. On test environment with VM that got 4 NVMe drives and 1 vCPU assigned the virtual NVMe controller hang could be observed within 10-20 minutes. That correspond to about 400-500k IO operations processed (or about 100GB of IO read/writes). Orion tool was used as a validation and set to run in a loop for 36 hours (equivalent of pushing 550M IO operations). No issues were observed. That suggest that the patch fixes the issue. Fixes: f9f38e33389c ("nvme: improve performance for virtual NVMe devices") Signed-off-by: Michal Wnukowski Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg [hch: updated changelog and comment a bit] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b9951d2067e63..d668682f91dfdb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -316,6 +316,14 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, old_value = *dbbuf_db; *dbbuf_db = value; + /* + * Ensure that the doorbell is updated before reading the event + * index from memory. The controller needs to provide similar + * ordering to ensure the envent index is updated before reading + * the doorbell. + */ + mb(); + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) return false; } -- cgit 1.2.3-korg From afd299ca996929f4f98ac20da0044c0cdc124879 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 9 Aug 2018 16:00:14 -0700 Subject: nvme-fcloop: Fix dropped LS's to removed target port When a targetport is removed from the config, fcloop will avoid calling the LS done() routine thinking the targetport is gone. This leaves the initiator reset/reconnect hanging as it waits for a status on the Create_Association LS for the reconnect. Change the filter in the LS callback path. If tport null (set when failed validation before "sending to remote port"), be sure to call done. This was the main bug. But, continue the logic that only calls done if tport was set but there is no remoteport (e.g. case where remoteport has been removed, thus host doesn't expect a completion). Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 34712def81b15a..5251689a1d9ac2 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -311,7 +311,7 @@ fcloop_tgt_lsrqst_done_work(struct work_struct *work) struct fcloop_tport *tport = tls_req->tport; struct nvmefc_ls_req *lsreq = tls_req->lsreq; - if (tport->remoteport) + if (!tport || tport->remoteport) lsreq->done(lsreq, tls_req->status); } @@ -329,6 +329,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; + tls_req->tport = NULL; schedule_work(&tls_req->work); return ret; } -- cgit 1.2.3-korg From 04db0e5ec58167364a80fd33ddb4f3b67434eb85 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 15 Aug 2018 18:48:25 -0700 Subject: nvmet: free workqueue object if module init fails Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ebf3e7a6c49ee2..b5ec96abd04870 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1210,7 +1210,7 @@ static int __init nvmet_init(void) error = nvmet_init_discovery(); if (error) - goto out; + goto out_free_work_queue; error = nvmet_init_configfs(); if (error) @@ -1219,6 +1219,8 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); +out_free_work_queue: + destroy_workqueue(buffered_io_wq); out: return error; } -- cgit 1.2.3-korg From 11f65ad111fa29de2d11929f773bf1e553d5b7c4 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 20 Aug 2018 15:47:57 -0700 Subject: dt-bindings: riscv,cpu-intc: Cleanups from a missed review I managed to miss one of Rob's code reviews on the mailing list . The patch has already been merged, so I'm submitting a fixup. Sorry! Fixes: b67bc7cb4088 ("dt-bindings: interrupt-controller: RISC-V local interrupt controller") Cc: Rob Herring Cc: Christoph Hellwig Cc: Karsten Merker Signed-off-by: Palmer Dabbelt --- .../bindings/interrupt-controller/riscv,cpu-intc.txt | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt index b0a8af51c388c5..265b223cd97801 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt @@ -11,7 +11,7 @@ The RISC-V supervisor ISA manual specifies three interrupt sources that are attached to every HLIC: software interrupts, the timer interrupt, and external interrupts. Software interrupts are used to send IPIs between cores. The timer interrupt comes from an architecturally mandated real-time timer that is -controller via Supervisor Binary Interface (SBI) calls and CSR reads. External +controlled via Supervisor Binary Interface (SBI) calls and CSR reads. External interrupts connect all other device interrupts to the HLIC, which are routed via the platform-level interrupt controller (PLIC). @@ -25,7 +25,15 @@ in the system. Required properties: - compatible : "riscv,cpu-intc" -- #interrupt-cells : should be <1> +- #interrupt-cells : should be <1>. The interrupt sources are defined by the + RISC-V supervisor ISA manual, with only the following three interrupts being + defined for supervisor mode: + - Source 1 is the supervisor software interrupt, which can be sent by an SBI + call and is reserved for use by software. + - Source 5 is the supervisor timer interrupt, which can be configured by + SBI calls and implements a one-shot timer. + - Source 9 is the supervisor external interrupt, which chains to all other + device interrupts. - interrupt-controller : Identifies the node as an interrupt controller Furthermore, this interrupt-controller MUST be embedded inside the cpu @@ -38,7 +46,7 @@ An example device tree entry for a HLIC is show below. ... cpu1-intc: interrupt-controller { #interrupt-cells = <1>; - compatible = "riscv,cpu-intc", "sifive,fu540-c000-cpu-intc"; + compatible = "sifive,fu540-c000-cpu-intc", "riscv,cpu-intc"; interrupt-controller; }; }; -- cgit 1.2.3-korg From 0ce5671c4450527f90d2bfb31302f78580587983 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 24 Aug 2018 11:22:55 -0700 Subject: riscv: tlb: Provide definition of tlb_flush() before including tlb.h As of commit fd1102f0aade ("mm: mmu_notifier fix for tlb_end_vma"), asm-generic/tlb.h now calls tlb_flush() from a static inline function, so we need to make sure that it's declared before #including the asm-generic header in the arch header. Reported-by: Guenter Roeck Fixes: fd1102f0aade ("mm: mmu_notifier fix for tlb_end_vma") Signed-off-by: Will Deacon [groeck: Use forward declaration instead of moving inline function] Signed-off-by: Guenter Roeck Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlb.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index c229509288ea2d..439dc7072e05bf 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -14,6 +14,10 @@ #ifndef _ASM_RISCV_TLB_H #define _ASM_RISCV_TLB_H +struct mmu_gather; + +static void tlb_flush(struct mmu_gather *tlb); + #include static inline void tlb_flush(struct mmu_gather *tlb) -- cgit 1.2.3-korg From 47d80a68f10d3290204a12f7836a9a8190dfc327 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Aug 2018 09:37:16 -0700 Subject: RISC-V: Use a less ugly workaround for unused variable warnings Thanks to Christoph Hellwig for pointing out a cleaner way to do this, as my approach was quite ugly. CC: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 568026ccf6e871..fb03a4482ad60a 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -65,24 +65,11 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, uintptr_t, flags) { -#ifdef CONFIG_SMP - struct mm_struct *mm = current->mm; - bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; -#endif - /* Check the reserved flags. */ if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL)) return -EINVAL; - /* - * Without CONFIG_SMP flush_icache_mm is a just a flush_icache_all(), - * which generates unused variable warnings all over this function. - */ -#ifdef CONFIG_SMP - flush_icache_mm(mm, local); -#else - flush_icache_all(); -#endif + flush_icache_mm(current->mm, flags & SYS_RISCV_FLUSH_ICACHE_LOCAL); return 0; } -- cgit 1.2.3-korg From 8f3fafc9c2f0ece10832c25f7ffcb07c97a32ad4 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 26 Apr 2018 11:51:08 -0600 Subject: cdrom: Fix info leak/OOB read in cdrom_ioctl_drive_status Like d88b6d04: "cdrom: information leak in cdrom_ioctl_media_changed()" There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). Signed-off-by: Scott Bauer Signed-off-by: Scott Bauer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 113fc6edb2b037..a5d5a96479bfe8 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2546,7 +2546,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || (arg == CDSL_CURRENT || arg == CDSL_NONE)) return cdi->ops->drive_status(cdi, CDSL_CURRENT); - if (((int)arg >= cdi->capacity)) + if (arg >= cdi->capacity) return -EINVAL; return cdrom_slot_status(cdi, arg); } -- cgit 1.2.3-korg From ff69279a44e9ba876466b7d3ab84d6dbd31cac92 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Aug 2018 08:47:53 +0200 Subject: powerpc: disable support for relative ksymtab references The newly added code that emits ksymtab entries as pairs of 32-bit relative references interacts poorly with the way powerpc lays out its address space: when a module exports a per-CPU variable, the primary module region covering the ksymtab entry -and thus the 32-bit relative reference- is too far away from the actual per-CPU variable's base address (to which the per-CPU offsets are applied to obtain the respective address of each CPU's copy), resulting in corruption when the module loader attempts to resolve symbol references of modules that are loaded on top and link to the exported per-CPU symbol. So let's disable this feature on powerpc. Even though it implements CONFIG_RELOCATABLE, it does not implement CONFIG_RANDOMIZE_BASE and so KASLR kernels (which are the main target of the feature) do not exist on powerpc anyway. Reported-by: Andreas Schwab Suggested-by: Nicholas Piggin Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index db0b6eebbfa5b5..a8066920915538 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -177,7 +177,6 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 -- cgit 1.2.3-korg