From 34485c37ea931d4a086701de1d61a986b9707b7d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 7 Mar 2024 08:55:42 -0800 Subject: nvme: change shutdown timeout setting message User visible messages containing the word "timeout" can be alarming. This one from nvme is just reporting a potentially informative device configuration, and everything is working as designed. Change the text to report the less concerning "D3 entry latency", which is where this value comes from anyway. Reported-by: Len Brown Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2baf5786a92fe6..0dcaf3973dc49d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3233,7 +3233,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ctrl->shutdown_timeout != shutdown_timeout) dev_info(ctrl->device, - "Shutdown timeout set to %u seconds\n", + "D3 entry latency set to %u seconds\n", ctrl->shutdown_timeout); } else ctrl->shutdown_timeout = shutdown_timeout; -- cgit 1.2.3-korg From 0889d13b9e1cbef49e802ae09f3b516911ad82a1 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 8 Mar 2024 08:11:05 +0100 Subject: nvmet-tcp: do not continue for invalid icreq When the length check for an icreq sqe fails we should not continue processing but rather return immediately as all other contents of that sqe cannot be relied on. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c8655fc5aa5b8a..022d17bd36bf40 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -898,6 +898,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) pr_err("bad nvme-tcp pdu length (%d)\n", le32_to_cpu(icreq->hdr.plen)); nvmet_tcp_fatal_error(queue); + return -EPROTO; } if (icreq->pfv != NVME_TCP_PFV_1_0) { -- cgit 1.2.3-korg From 1843671f86e93311e12b7dde72736167a07158fd Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 8 Mar 2024 09:51:10 +0100 Subject: nvme-apple: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index a480cdeac2883c..dd6ec0865141a9 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1532,7 +1532,7 @@ put_dev: return ret; } -static int apple_nvme_remove(struct platform_device *pdev) +static void apple_nvme_remove(struct platform_device *pdev) { struct apple_nvme *anv = platform_get_drvdata(pdev); @@ -1547,8 +1547,6 @@ static int apple_nvme_remove(struct platform_device *pdev) apple_rtkit_shutdown(anv->rtk); apple_nvme_detach_genpd(anv); - - return 0; } static void apple_nvme_shutdown(struct platform_device *pdev) @@ -1598,7 +1596,7 @@ static struct platform_driver apple_nvme_driver = { .pm = pm_sleep_ptr(&apple_nvme_pm_ops), }, .probe = apple_nvme_probe, - .remove = apple_nvme_remove, + .remove_new = apple_nvme_remove, .shutdown = apple_nvme_shutdown, }; module_platform_driver(apple_nvme_driver); -- cgit 1.2.3-korg From 8fc3b0f1f47b8b6dd2801deeccae59a3b46c4c39 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 28 Feb 2024 16:37:54 +0800 Subject: nvmet: add tracing of authentication commands Add nvme_fabrics_type_auth_send and nvme_fabrics_type_auth_receive to the nvme target's tracing facility. Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/trace.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index 6ee1f3db81d040..ca537fae373000 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -176,6 +176,34 @@ static const char *nvmet_trace_fabrics_property_get(struct trace_seq *p, return ret; } +static const char *nvmet_trace_fabrics_auth_send(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 spsp0 = spc[1]; + u8 spsp1 = spc[2]; + u8 secp = spc[3]; + u32 tl = get_unaligned_le32(spc + 4); + + trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, tl=%u", + spsp0, spsp1, secp, tl); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvmet_trace_fabrics_auth_receive(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 spsp0 = spc[1]; + u8 spsp1 = spc[2]; + u8 secp = spc[3]; + u32 al = get_unaligned_le32(spc + 4); + + trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, al=%u", + spsp0, spsp1, secp, al); + trace_seq_putc(p, 0); + return ret; +} + static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc) { const char *ret = trace_seq_buffer_ptr(p); @@ -195,6 +223,10 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, return nvmet_trace_fabrics_connect(p, spc); case nvme_fabrics_type_property_get: return nvmet_trace_fabrics_property_get(p, spc); + case nvme_fabrics_type_auth_send: + return nvmet_trace_fabrics_auth_send(p, spc); + case nvme_fabrics_type_auth_receive: + return nvmet_trace_fabrics_auth_receive(p, spc); default: return nvmet_trace_fabrics_common(p, spc); } -- cgit 1.2.3-korg From 2bc91743096756d7c97d10c7079617192211369b Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 28 Feb 2024 16:37:55 +0800 Subject: nvmet: add tracing of zns commands Add nvme_cmd_zone_append, nvme_cmd_zone_mgmt_send and nvme_cmd_zone_mgmt_recv parse to nvme target tracing. Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/trace.c | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index ca537fae373000..8d1806a828879a 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -119,6 +119,67 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, } } +static const char *nvmet_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10) +{ + static const char * const zsa_strs[] = { + [0x01] = "close zone", + [0x02] = "finish zone", + [0x03] = "open zone", + [0x04] = "reset zone", + [0x05] = "offline zone", + [0x10] = "set zone descriptor extension" + }; + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + const char *zsa_str; + u8 zsa = cdw10[12]; + u8 all = cdw10[13]; + + if (zsa < ARRAY_SIZE(zsa_strs) && zsa_strs[zsa]) + zsa_str = zsa_strs[zsa]; + else + zsa_str = "reserved"; + + trace_seq_printf(p, "slba=%llu, zsa=%u:%s, all=%u", + slba, zsa, zsa_str, all); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvmet_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10) +{ + static const char * const zrasf_strs[] = { + [0x00] = "list all zones", + [0x01] = "list the zones in the ZSE: Empty state", + [0x02] = "list the zones in the ZSIO: Implicitly Opened state", + [0x03] = "list the zones in the ZSEO: Explicitly Opened state", + [0x04] = "list the zones in the ZSC: Closed state", + [0x05] = "list the zones in the ZSF: Full state", + [0x06] = "list the zones in the ZSRO: Read Only state", + [0x07] = "list the zones in the ZSO: Offline state", + [0x09] = "list the zones that have the zone attribute" + }; + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u32 numd = get_unaligned_le32(&cdw10[8]); + u8 zra = cdw10[12]; + u8 zrasf = cdw10[13]; + const char *zrasf_str; + u8 pr = cdw10[14]; + + if (zrasf < ARRAY_SIZE(zrasf_strs) && zrasf_strs[zrasf]) + zrasf_str = zrasf_strs[zrasf]; + else + zrasf_str = "reserved"; + + trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u:%s, pr=%u", + slba, numd, zra, zrasf, zrasf_str, pr); + trace_seq_putc(p, 0); + + return ret; +} + const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10) { @@ -126,9 +187,14 @@ const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, case nvme_cmd_read: case nvme_cmd_write: case nvme_cmd_write_zeroes: + case nvme_cmd_zone_append: return nvmet_trace_read_write(p, cdw10); case nvme_cmd_dsm: return nvmet_trace_dsm(p, cdw10); + case nvme_cmd_zone_mgmt_send: + return nvmet_trace_zone_mgmt_send(p, cdw10); + case nvme_cmd_zone_mgmt_recv: + return nvmet_trace_zone_mgmt_recv(p, cdw10); default: return nvmet_trace_common(p, cdw10); } -- cgit 1.2.3-korg From de105068fead55ed5c07ade75e9c8e7f86a00d1d Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Mon, 11 Mar 2024 10:09:27 +0800 Subject: nvme: fix reconnection fail due to reserved tag allocation We found a issue on production environment while using NVMe over RDMA, admin_q reconnect failed forever while remote target and network is ok. After dig into it, we found it may caused by a ABBA deadlock due to tag allocation. In my case, the tag was hold by a keep alive request waiting inside admin_q, as we quiesced admin_q while reset ctrl, so the request maked as idle and will not process before reset success. As fabric_q shares tagset with admin_q, while reconnect remote target, we need a tag for connect command, but the only one reserved tag was held by keep alive command which waiting inside admin_q. As a result, we failed to reconnect admin_q forever. In order to fix this issue, I think we should keep two reserved tags for admin queue. Fixes: ed01fee283a0 ("nvme-fabrics: only reserve a single tag") Signed-off-by: Chunguang Xu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 6 ++++-- drivers/nvme/host/fabrics.h | 7 ------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0dcaf3973dc49d..fb55b6ac038562 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4385,7 +4385,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, set->ops = ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect and keep alive */ + set->reserved_tags = 2; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_NO_SCHED; if (ctrl->ops->flags & NVME_F_BLOCKING) @@ -4454,7 +4455,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) set->reserved_tags = NVME_AQ_DEPTH; else if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect */ + set->reserved_tags = 1; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 06cc54851b1be3..37c974c38dcb07 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -18,13 +18,6 @@ /* default is -1: the fail fast mechanism is disabled */ #define NVMF_DEF_FAIL_FAST_TMO -1 -/* - * Reserved one command for internal usage. This command is used for sending - * the connect command, as well as for the keep alive command on the admin - * queue once live. - */ -#define NVMF_RESERVED_TAGS 1 - /* * Define a host as seen by the target. We allocate one at boot, but also * allow the override it when creating controllers. This is both to provide -- cgit 1.2.3-korg From dcad6f5f4303a224ac7a5802e7deffd46cf6f6cb Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 13 Mar 2024 10:29:05 +0800 Subject: nvme: use nvme_disk_is_ns_head helper Use nvme_disk_is_ns_head helper instead of check fops directly, and also drop CONFIG_NVME_MULTIPATH check. Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pr.c | 3 +-- drivers/nvme/host/sysfs.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index fc3eed00f9ff11..e05571b2a1b0c9 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -97,8 +97,7 @@ static int nvme_sc_to_pr_err(int nvme_sc) static int nvme_send_pr_command(struct block_device *bdev, struct nvme_command *c, void *data, unsigned int data_len) { - if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && - nvme_disk_is_ns_head(bdev->bd_disk)) + if (nvme_disk_is_ns_head(bdev->bd_disk)) return nvme_send_ns_head_pr_command(bdev, c, data, data_len); return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data, diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 6c7f1d5c056fc5..243ebc4d471a8b 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -236,8 +236,7 @@ static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, struct block_device *bdev = disk->part0; int ret; - if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && - bdev->bd_disk->fops == &nvme_ns_head_ops) + if (nvme_disk_is_ns_head(bdev->bd_disk)) ret = ns_head_update_nuse(head); else ret = ns_update_nuse(bdev->bd_disk->private_data); -- cgit 1.2.3-korg From 8d539f755c316dd38c8c43c0c25d1b9d26a3b003 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 12 Mar 2024 15:52:43 +0800 Subject: nvme: parse zns command's zsa and zrasf to string Parse zone mgmt send commands's zsa and receive command's zrasf to string to make the trace log more human-readable. Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/trace.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 1c36fcedea2008..5e94b3c6d58a93 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -164,12 +164,27 @@ static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10) static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10) { + static const char * const zsa_strs[] = { + [0x01] = "close zone", + [0x02] = "finish zone", + [0x03] = "open zone", + [0x04] = "reset zone", + [0x05] = "offline zone", + [0x10] = "set zone descriptor extension" + }; const char *ret = trace_seq_buffer_ptr(p); u64 slba = get_unaligned_le64(cdw10); + const char *zsa_str; u8 zsa = cdw10[12]; u8 all = cdw10[13]; - trace_seq_printf(p, "slba=%llu, zsa=%u, all=%u", slba, zsa, all); + if (zsa < ARRAY_SIZE(zsa_strs) && zsa_strs[zsa]) + zsa_str = zsa_strs[zsa]; + else + zsa_str = "reserved"; + + trace_seq_printf(p, "slba=%llu, zsa=%u:%s, all=%u", + slba, zsa, zsa_str, all); trace_seq_putc(p, 0); return ret; @@ -177,15 +192,32 @@ static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10) static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10) { + static const char * const zrasf_strs[] = { + [0x00] = "list all zones", + [0x01] = "list the zones in the ZSE: Empty state", + [0x02] = "list the zones in the ZSIO: Implicitly Opened state", + [0x03] = "list the zones in the ZSEO: Explicitly Opened state", + [0x04] = "list the zones in the ZSC: Closed state", + [0x05] = "list the zones in the ZSF: Full state", + [0x06] = "list the zones in the ZSRO: Read Only state", + [0x07] = "list the zones in the ZSO: Offline state", + [0x09] = "list the zones that have the zone attribute" + }; const char *ret = trace_seq_buffer_ptr(p); u64 slba = get_unaligned_le64(cdw10); u32 numd = get_unaligned_le32(cdw10 + 8); u8 zra = cdw10[12]; u8 zrasf = cdw10[13]; + const char *zrasf_str; u8 pr = cdw10[14]; - trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u, pr=%u", - slba, numd, zra, zrasf, pr); + if (zrasf < ARRAY_SIZE(zrasf_strs) && zrasf_strs[zrasf]) + zrasf_str = zrasf_strs[zrasf]; + else + zrasf_str = "reserved"; + + trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u:%s, pr=%u", + slba, numd, zra, zrasf, zrasf_str, pr); trace_seq_putc(p, 0); return ret; -- cgit 1.2.3-korg From 6a0164f9f4a0b629fd7a5414d6c7d21999141b5e Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 31 Jan 2024 17:12:20 +0800 Subject: nvme: add tracing of reservation commands Add detailed parsing of reservation commands to make the trace log more consistent and human-readable. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/trace.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 5e94b3c6d58a93..7e6719a3b624c9 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -223,6 +223,60 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10) return ret; } +static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 rrega = cdw10[0] & 0x7; + u8 iekey = (cdw10[0] >> 3) & 0x1; + u8 ptpl = (cdw10[3] >> 6) & 0x3; + + trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u", + rrega, iekey, ptpl); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 racqa = cdw10[0] & 0x7; + u8 iekey = (cdw10[0] >> 3) & 0x1; + u8 rtype = cdw10[1]; + + trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u", + racqa, iekey, rtype); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 rrela = cdw10[0] & 0x7; + u8 iekey = (cdw10[0] >> 3) & 0x1; + u8 rtype = cdw10[1]; + + trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u", + rrela, iekey, rtype); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_resv_report(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u32 numd = get_unaligned_le32(cdw10); + u8 eds = cdw10[4] & 0x1; + + trace_seq_printf(p, "numd=%u, eds=%u", numd, eds); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_common(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -275,6 +329,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, return nvme_trace_zone_mgmt_send(p, cdw10); case nvme_cmd_zone_mgmt_recv: return nvme_trace_zone_mgmt_recv(p, cdw10); + case nvme_cmd_resv_register: + return nvme_trace_resv_reg(p, cdw10); + case nvme_cmd_resv_acquire: + return nvme_trace_resv_acq(p, cdw10); + case nvme_cmd_resv_release: + return nvme_trace_resv_rel(p, cdw10); + case nvme_cmd_resv_report: + return nvme_trace_resv_report(p, cdw10); default: return nvme_trace_common(p, cdw10); } -- cgit 1.2.3-korg From 798edad968ace3c15c3180ba72e427630022e2d9 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 31 Jan 2024 17:12:22 +0800 Subject: nvme: parse format command's lbafu when tracing Add the parse of format command's lbafu to calculate lbaf. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 7e6719a3b624c9..0288315f005028 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -119,7 +119,10 @@ static const char *nvme_trace_get_lba_status(struct trace_seq *p, static const char *nvme_trace_admin_format_nvm(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); - u8 lbaf = cdw10[0] & 0xF; + /* + * lbafu(bit 13:12) is already in the upper 4 bits, lbafl: bit 03:00. + */ + u8 lbaf = (cdw10[1] & 0x30) | (cdw10[0] & 0xF); u8 mset = (cdw10[0] >> 4) & 0x1; u8 pi = (cdw10[0] >> 5) & 0x7; u8 pil = cdw10[1] & 0x1; -- cgit 1.2.3-korg From e89086c43f0500bc7c4ce225495b73b8ce234c1f Mon Sep 17 00:00:00 2001 From: "Jiawei Fu (iBug)" Date: Sat, 16 Mar 2024 03:27:49 +0800 Subject: drivers/nvme: Add quirks for device 126f:2262 This commit adds NVME_QUIRK_NO_DEEPEST_PS and NVME_QUIRK_BOGUS_NID for device [126f:2262], which appears to be a generic VID:PID pair used for many SSDs based on the Silicon Motion SM2262/SM2262EN controller. Two of my SSDs with this VID:PID pair exhibit the same behavior: * They frequently have trouble exiting the deepest power state (5), resulting in the entire disk unresponsive. Verified by setting nvme_core.default_ps_max_latency_us=10000 and observing them behaving normally. * They produce all-zero nguid and eui64 with `nvme id-ns` command. The offending products are: * HP SSD EX950 1TB * HIKVISION C2000Pro 2TB Signed-off-by: Jiawei Fu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e6267a6aa3801e..8e0bb9692685d4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3363,6 +3363,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_BOGUS_NID, }, -- cgit 1.2.3-korg From ec58afb49e90d6fd468b0e21d2de324dff1a711c Mon Sep 17 00:00:00 2001 From: Li Feng Date: Wed, 13 Mar 2024 20:38:09 +0800 Subject: nvme-tcp: Export the nvme_tcp_wq to sysfs Make the workqueue userspace visible for easy viewing and configuration. Signed-off-by: Li Feng Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index a6d596e0560211..2ec1186db0a324 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2800,7 +2800,7 @@ static int __init nvme_tcp_init_module(void) BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS, 0); if (!nvme_tcp_wq) return -ENOMEM; -- cgit 1.2.3-korg From 0c29f9fa46bbe4fdc218134823d80cf9934ef231 Mon Sep 17 00:00:00 2001 From: Li Feng Date: Wed, 13 Mar 2024 20:38:10 +0800 Subject: nvme/tcp: Add wq_unbound modparam for nvme_tcp_wq The default nvme_tcp_wq will use all CPUs to process tasks. Sometimes it is necessary to set CPU affinity to improve performance. A new module parameter wq_unbound is added here. If set to true, users can configure cpu affinity through /sys/devices/virtual/workqueue/nvme_tcp_wq/cpumask. Signed-off-by: Li Feng Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 2ec1186db0a324..34a882b2ec53d8 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -36,6 +36,14 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +/* + * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity + * from sysfs. + */ +static bool wq_unbound; +module_param(wq_unbound, bool, 0644); +MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)"); + /* * TLS handshake timeout */ @@ -1551,7 +1559,10 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) else if (nvme_tcp_poll_queue(queue)) n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - ctrl->io_queues[HCTX_TYPE_READ] - 1; - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + if (wq_unbound) + queue->io_cpu = WORK_CPU_UNBOUND; + else + queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) @@ -2790,6 +2801,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = { static int __init nvme_tcp_init_module(void) { + unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS; + BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24); @@ -2799,8 +2812,10 @@ static int __init nvme_tcp_init_module(void) BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128); BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); - nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS, 0); + if (wq_unbound) + wq_flags |= WQ_UNBOUND; + + nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0); if (!nvme_tcp_wq) return -ENOMEM; -- cgit 1.2.3-korg From 1e1c4bd16e385f0b1b39920ce3aa16bb33fcdb27 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 20 Mar 2024 17:19:49 +0800 Subject: nvme: remove redundant BUILD_BUG_ON check Remove redundant BUILD_BUG_ON check of struct nvme_dsm_range, it's already checked in nvme_init_ctrl(). Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fb55b6ac038562..2120059337829a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1803,9 +1803,6 @@ static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim) { struct nvme_ctrl *ctrl = ns->ctrl; - BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < - NVME_DSM_MAX_RANGES); - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX)) lim->max_hw_discard_sectors = nvme_lba_to_sect(ns->head, ctrl->dmrsl); -- cgit 1.2.3-korg From 910934da9444dbb102294796481ab05e4419d311 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 20 Mar 2024 19:08:21 +0800 Subject: nvmet-rdma: remove NVMET_RDMA_REQ_INVALIDATE_RKEY flag We can simply use invalidate_rkey to check instead of adding a flag. Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f2bb9d95ecf4bc..5b8c63e74639d7 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -53,7 +53,6 @@ struct nvmet_rdma_cmd { enum { NVMET_RDMA_REQ_INLINE_DATA = (1 << 0), - NVMET_RDMA_REQ_INVALIDATE_RKEY = (1 << 1), }; struct nvmet_rdma_rsp { @@ -722,7 +721,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) struct rdma_cm_id *cm_id = rsp->queue->cm_id; struct ib_send_wr *first_wr; - if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) { + if (rsp->invalidate_rkey) { rsp->send_wr.opcode = IB_WR_SEND_WITH_INV; rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey; } else { @@ -905,10 +904,8 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, goto error_out; rsp->n_rdma += ret; - if (invalidate) { + if (invalidate) rsp->invalidate_rkey = key; - rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY; - } return 0; @@ -1047,6 +1044,7 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) rsp->req.cmd = cmd->nvme_cmd; rsp->req.port = queue->port; rsp->n_rdma = 0; + rsp->invalidate_rkey = 0; if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; -- cgit 1.2.3-korg