aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-16 12:31:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-16 12:31:44 -0700
commit0d18c12b288a177906e31fecfab58ca2243ffc02 (patch)
tree86e2cdf2c82eb3817f8a38f19d014bad4d8e7a46
parent13fdaf041067a7827b8c3cae095b661aabbc6b65 (diff)
parent05d69d950d9d84218fc9beafd02dea1f6a70e09e (diff)
downloadstaging-0d18c12b288a177906e31fecfab58ca2243ffc02.tar.gz
Merge tag 'block-5.14-2021-07-16' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe fixes via Christoph: - fix various races in nvme-pci when shutting down just after probing (Casey Chen) - fix a net_device leak in nvme-tcp (Prabhakar Kushwaha) - Fix regression in xen-blkfront by cleaning up the removal state machine (Christoph) - Fix tag_set and queue cleanup ordering regression in nbd (Wang) - Fix tag_set and queue cleanup ordering regression in pd (Guoqing) * tag 'block-5.14-2021-07-16' of git://git.kernel.dk/linux-block: xen-blkfront: sanitize the removal state machine nbd: fix order of cleaning up the queue and freeing the tagset pd: fix order of cleaning up the queue and freeing the tagset nvme-pci: do not call nvme_dev_remove_admin from nvme_remove nvme-pci: fix multiple races in nvme_setup_io_queues nvme-tcp: use __dev_get_by_name instead dev_get_by_name for OPT_HOST_IFACE
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/block/paride/pd.c2
-rw-r--r--drivers/block/xen-blkfront.c224
-rw-r--r--drivers/nvme/host/pci.c67
-rw-r--r--drivers/nvme/host/tcp.c4
5 files changed, 87 insertions, 212 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b7d663736d35b3..c38317979f74ec 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -239,8 +239,8 @@ static void nbd_dev_remove(struct nbd_device *nbd)
if (disk) {
del_gendisk(disk);
- blk_mq_free_tag_set(&nbd->tag_set);
blk_cleanup_disk(disk);
+ blk_mq_free_tag_set(&nbd->tag_set);
}
/*
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 3b2b8e872beb6e..9b3298926356d8 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -1014,8 +1014,8 @@ static void __exit pd_exit(void)
if (p) {
disk->gd = NULL;
del_gendisk(p);
- blk_mq_free_tag_set(&disk->tag_set);
blk_cleanup_disk(p);
+ blk_mq_free_tag_set(&disk->tag_set);
pi_release(disk->pi);
}
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8d49f8fa98bbe6..d83fee21f6c59c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -502,34 +502,21 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
unsigned command, unsigned long argument)
{
- struct blkfront_info *info = bdev->bd_disk->private_data;
int i;
- dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
- command, (long)argument);
-
switch (command) {
case CDROMMULTISESSION:
- dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++)
if (put_user(0, (char __user *)(argument + i)))
return -EFAULT;
return 0;
-
- case CDROM_GET_CAPABILITY: {
- struct gendisk *gd = info->gd;
- if (gd->flags & GENHD_FL_CD)
+ case CDROM_GET_CAPABILITY:
+ if (bdev->bd_disk->flags & GENHD_FL_CD)
return 0;
return -EINVAL;
- }
-
default:
- /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
- command);*/
- return -EINVAL; /* same return as native Linux */
+ return -EINVAL;
}
-
- return 0;
}
static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
@@ -1177,36 +1164,6 @@ out_release_minors:
return err;
}
-static void xlvbd_release_gendisk(struct blkfront_info *info)
-{
- unsigned int minor, nr_minors, i;
- struct blkfront_ring_info *rinfo;
-
- if (info->rq == NULL)
- return;
-
- /* No more blkif_request(). */
- blk_mq_stop_hw_queues(info->rq);
-
- for_each_rinfo(info, rinfo, i) {
- /* No more gnttab callback work. */
- gnttab_cancel_free_callback(&rinfo->callback);
-
- /* Flush gnttab callback work. Must be done with no locks held. */
- flush_work(&rinfo->work);
- }
-
- del_gendisk(info->gd);
-
- minor = info->gd->first_minor;
- nr_minors = info->gd->minors;
- xlbd_release_minors(minor, nr_minors);
-
- blk_cleanup_disk(info->gd);
- info->gd = NULL;
- blk_mq_free_tag_set(&info->tag_set);
-}
-
/* Already hold rinfo->ring_lock. */
static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
{
@@ -1756,12 +1713,6 @@ abort_transaction:
return err;
}
-static void free_info(struct blkfront_info *info)
-{
- list_del(&info->info_list);
- kfree(info);
-}
-
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
@@ -1880,13 +1831,6 @@ again:
xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring:
blkif_free(info, 0);
-
- mutex_lock(&blkfront_mutex);
- free_info(info);
- mutex_unlock(&blkfront_mutex);
-
- dev_set_drvdata(&dev->dev, NULL);
-
return err;
}
@@ -2126,38 +2070,26 @@ static int blkfront_resume(struct xenbus_device *dev)
static void blkfront_closing(struct blkfront_info *info)
{
struct xenbus_device *xbdev = info->xbdev;
- struct block_device *bdev = NULL;
-
- mutex_lock(&info->mutex);
+ struct blkfront_ring_info *rinfo;
+ unsigned int i;
- if (xbdev->state == XenbusStateClosing) {
- mutex_unlock(&info->mutex);
+ if (xbdev->state == XenbusStateClosing)
return;
- }
- if (info->gd)
- bdev = bdgrab(info->gd->part0);
-
- mutex_unlock(&info->mutex);
-
- if (!bdev) {
- xenbus_frontend_closed(xbdev);
- return;
- }
+ /* No more blkif_request(). */
+ blk_mq_stop_hw_queues(info->rq);
+ blk_set_queue_dying(info->rq);
+ set_capacity(info->gd, 0);
- mutex_lock(&bdev->bd_disk->open_mutex);
+ for_each_rinfo(info, rinfo, i) {
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&rinfo->callback);
- if (bdev->bd_openers) {
- xenbus_dev_error(xbdev, -EBUSY,
- "Device in use; refusing to close");
- xenbus_switch_state(xbdev, XenbusStateClosing);
- } else {
- xlvbd_release_gendisk(info);
- xenbus_frontend_closed(xbdev);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_work(&rinfo->work);
}
- mutex_unlock(&bdev->bd_disk->open_mutex);
- bdput(bdev);
+ xenbus_frontend_closed(xbdev);
}
static void blkfront_setup_discard(struct blkfront_info *info)
@@ -2472,8 +2404,7 @@ static void blkback_changed(struct xenbus_device *dev,
break;
fallthrough;
case XenbusStateClosing:
- if (info)
- blkfront_closing(info);
+ blkfront_closing(info);
break;
}
}
@@ -2481,56 +2412,21 @@ static void blkback_changed(struct xenbus_device *dev,
static int blkfront_remove(struct xenbus_device *xbdev)
{
struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
- struct block_device *bdev = NULL;
- struct gendisk *disk;
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
- if (!info)
- return 0;
-
- blkif_free(info, 0);
-
- mutex_lock(&info->mutex);
-
- disk = info->gd;
- if (disk)
- bdev = bdgrab(disk->part0);
-
- info->xbdev = NULL;
- mutex_unlock(&info->mutex);
-
- if (!bdev) {
- mutex_lock(&blkfront_mutex);
- free_info(info);
- mutex_unlock(&blkfront_mutex);
- return 0;
- }
-
- /*
- * The xbdev was removed before we reached the Closed
- * state. See if it's safe to remove the disk. If the bdev
- * isn't closed yet, we let release take care of it.
- */
-
- mutex_lock(&disk->open_mutex);
- info = disk->private_data;
-
- dev_warn(disk_to_dev(disk),
- "%s was hot-unplugged, %d stale handles\n",
- xbdev->nodename, bdev->bd_openers);
+ del_gendisk(info->gd);
- if (info && !bdev->bd_openers) {
- xlvbd_release_gendisk(info);
- disk->private_data = NULL;
- mutex_lock(&blkfront_mutex);
- free_info(info);
- mutex_unlock(&blkfront_mutex);
- }
+ mutex_lock(&blkfront_mutex);
+ list_del(&info->info_list);
+ mutex_unlock(&blkfront_mutex);
- mutex_unlock(&disk->open_mutex);
- bdput(bdev);
+ blkif_free(info, 0);
+ xlbd_release_minors(info->gd->first_minor, info->gd->minors);
+ blk_cleanup_disk(info->gd);
+ blk_mq_free_tag_set(&info->tag_set);
+ kfree(info);
return 0;
}
@@ -2541,77 +2437,9 @@ static int blkfront_is_ready(struct xenbus_device *dev)
return info->is_ready && info->xbdev;
}
-static int blkif_open(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct blkfront_info *info;
- int err = 0;
-
- mutex_lock(&blkfront_mutex);
-
- info = disk->private_data;
- if (!info) {
- /* xbdev gone */
- err = -ERESTARTSYS;
- goto out;
- }
-
- mutex_lock(&info->mutex);
-
- if (!info->gd)
- /* xbdev is closed */
- err = -ERESTARTSYS;
-
- mutex_unlock(&info->mutex);
-
-out:
- mutex_unlock(&blkfront_mutex);
- return err;
-}
-
-static void blkif_release(struct gendisk *disk, fmode_t mode)
-{
- struct blkfront_info *info = disk->private_data;
- struct xenbus_device *xbdev;
-
- mutex_lock(&blkfront_mutex);
- if (disk->part0->bd_openers)
- goto out_mutex;
-
- /*
- * Check if we have been instructed to close. We will have
- * deferred this request, because the bdev was still open.
- */
-
- mutex_lock(&info->mutex);
- xbdev = info->xbdev;
-
- if (xbdev && xbdev->state == XenbusStateClosing) {
- /* pending switch to state closed */
- dev_info(disk_to_dev(disk), "releasing disk\n");
- xlvbd_release_gendisk(info);
- xenbus_frontend_closed(info->xbdev);
- }
-
- mutex_unlock(&info->mutex);
-
- if (!xbdev) {
- /* sudden device removal */
- dev_info(disk_to_dev(disk), "releasing disk\n");
- xlvbd_release_gendisk(info);
- disk->private_data = NULL;
- free_info(info);
- }
-
-out_mutex:
- mutex_unlock(&blkfront_mutex);
-}
-
static const struct block_device_operations xlvbd_block_fops =
{
.owner = THIS_MODULE,
- .open = blkif_open,
- .release = blkif_release,
.getgeo = blkif_getgeo,
.ioctl = blkif_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d3c5086673bcbb..320051f5a3ddca 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1554,6 +1554,28 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
wmb(); /* ensure the first interrupt sees the initialization */
}
+/*
+ * Try getting shutdown_lock while setting up IO queues.
+ */
+static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
+{
+ /*
+ * Give up if the lock is being held by nvme_dev_disable.
+ */
+ if (!mutex_trylock(&dev->shutdown_lock))
+ return -ENODEV;
+
+ /*
+ * Controller is in wrong state, fail early.
+ */
+ if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
+ mutex_unlock(&dev->shutdown_lock);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
{
struct nvme_dev *dev = nvmeq->dev;
@@ -1582,8 +1604,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
goto release_cq;
nvmeq->cq_vector = vector;
- nvme_init_queue(nvmeq, qid);
+ result = nvme_setup_io_queues_trylock(dev);
+ if (result)
+ return result;
+ nvme_init_queue(nvmeq, qid);
if (!polled) {
result = queue_request_irq(nvmeq);
if (result < 0)
@@ -1591,10 +1616,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
}
set_bit(NVMEQ_ENABLED, &nvmeq->flags);
+ mutex_unlock(&dev->shutdown_lock);
return result;
release_sq:
dev->online_queues--;
+ mutex_unlock(&dev->shutdown_lock);
adapter_delete_sq(dev, qid);
release_cq:
adapter_delete_cq(dev, qid);
@@ -2167,7 +2194,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
- clear_bit(NVMEQ_ENABLED, &adminq->flags);
+ /*
+ * Free IRQ resources as soon as NVMEQ_ENABLED bit transitions
+ * from set to unset. If there is a window to it is truely freed,
+ * pci_free_irq_vectors() jumping into this window will crash.
+ * And take lock to avoid racing with pci_free_irq_vectors() in
+ * nvme_dev_disable() path.
+ */
+ result = nvme_setup_io_queues_trylock(dev);
+ if (result)
+ return result;
+ if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
+ pci_free_irq(pdev, 0, adminq);
if (dev->cmb_use_sqes) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
@@ -2183,14 +2221,17 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
result = nvme_remap_bar(dev, size);
if (!result)
break;
- if (!--nr_io_queues)
- return -ENOMEM;
+ if (!--nr_io_queues) {
+ result = -ENOMEM;
+ goto out_unlock;
+ }
} while (1);
adminq->q_db = dev->dbs;
retry:
/* Deregister the admin queue's interrupt */
- pci_free_irq(pdev, 0, adminq);
+ if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
+ pci_free_irq(pdev, 0, adminq);
/*
* If we enable msix early due to not intx, disable it again before
@@ -2199,8 +2240,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
pci_free_irq_vectors(pdev);
result = nvme_setup_irqs(dev, nr_io_queues);
- if (result <= 0)
- return -EIO;
+ if (result <= 0) {
+ result = -EIO;
+ goto out_unlock;
+ }
dev->num_vecs = result;
result = max(result - 1, 1);
@@ -2214,8 +2257,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
result = queue_request_irq(adminq);
if (result)
- return result;
+ goto out_unlock;
set_bit(NVMEQ_ENABLED, &adminq->flags);
+ mutex_unlock(&dev->shutdown_lock);
result = nvme_create_io_queues(dev);
if (result || dev->online_queues < 2)
@@ -2224,6 +2268,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (dev->online_queues - 1 < dev->max_qid) {
nr_io_queues = dev->online_queues - 1;
nvme_disable_io_queues(dev);
+ result = nvme_setup_io_queues_trylock(dev);
+ if (result)
+ return result;
nvme_suspend_io_queues(dev);
goto retry;
}
@@ -2232,6 +2279,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->io_queues[HCTX_TYPE_READ],
dev->io_queues[HCTX_TYPE_POLL]);
return 0;
+out_unlock:
+ mutex_unlock(&dev->shutdown_lock);
+ return result;
}
static void nvme_del_queue_end(struct request *req, blk_status_t error)
@@ -2962,7 +3012,6 @@ static void nvme_remove(struct pci_dev *pdev)
if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
nvme_dev_disable(dev, true);
- nvme_dev_remove_admin(dev);
}
flush_work(&dev->ctrl.reset_work);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 12acfe05cd68fa..8cb15ee5b249ea 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -123,7 +123,6 @@ struct nvme_tcp_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct sockaddr_storage addr;
struct sockaddr_storage src_addr;
- struct net_device *ndev;
struct nvme_ctrl ctrl;
struct work_struct err_work;
@@ -2533,8 +2532,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
}
if (opts->mask & NVMF_OPT_HOST_IFACE) {
- ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface);
- if (!ctrl->ndev) {
+ if (!__dev_get_by_name(&init_net, opts->host_iface)) {
pr_err("invalid interface passed: %s\n",
opts->host_iface);
ret = -ENODEV;