From e719b4d156749f02eafed31a3c515f2aa9dcc72a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Sep 2023 11:34:07 +0200 Subject: block: Provide bdev_open_* functions Create struct bdev_handle that contains all parameters that need to be passed to blkdev_put() and provide bdev_open_* functions that return this structure instead of plain bdev pointer. This will eventually allow us to pass one more argument to blkdev_put() (renamed to bdev_release()) without too much hassle. Acked-by: Christoph Hellwig Reviewed-by: Christian Brauner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230927093442.25915-1-jack@suse.cz Signed-off-by: Christian Brauner --- block/bdev.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index f3b13aa1b7d42..bdc7d739882bb 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -829,6 +829,25 @@ put_blkdev: } EXPORT_SYMBOL(blkdev_get_by_dev); +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops) +{ + struct bdev_handle *handle = kmalloc(sizeof(*handle), GFP_KERNEL); + struct block_device *bdev; + + if (!handle) + return ERR_PTR(-ENOMEM); + bdev = blkdev_get_by_dev(dev, mode, holder, hops); + if (IS_ERR(bdev)) { + kfree(handle); + return ERR_CAST(bdev); + } + handle->bdev = bdev; + handle->holder = holder; + return handle; +} +EXPORT_SYMBOL(bdev_open_by_dev); + /** * blkdev_get_by_path - open a block device by name * @path: path to the block device to open @@ -867,6 +886,28 @@ struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); +struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, + void *holder, const struct blk_holder_ops *hops) +{ + struct bdev_handle *handle; + dev_t dev; + int error; + + error = lookup_bdev(path, &dev); + if (error) + return ERR_PTR(error); + + handle = bdev_open_by_dev(dev, mode, holder, hops); + if (!IS_ERR(handle) && (mode & BLK_OPEN_WRITE) && + bdev_read_only(handle->bdev)) { + bdev_release(handle); + return ERR_PTR(-EACCES); + } + + return handle; +} +EXPORT_SYMBOL(bdev_open_by_path); + void blkdev_put(struct block_device *bdev, void *holder) { struct gendisk *disk = bdev->bd_disk; @@ -903,6 +944,13 @@ void blkdev_put(struct block_device *bdev, void *holder) } EXPORT_SYMBOL(blkdev_put); +void bdev_release(struct bdev_handle *handle) +{ + blkdev_put(handle->bdev, handle->holder); + kfree(handle); +} +EXPORT_SYMBOL(bdev_release); + /** * lookup_bdev() - Look up a struct block_device by name. * @pathname: Name of the block device in the filesystem. -- cgit 1.2.3-korg From 841dd789b8625eb9288aaa2be9f10872e6622033 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Sep 2023 11:34:08 +0200 Subject: block: Use bdev_open_by_dev() in blkdev_open() Convert blkdev_open() to use bdev_open_by_dev(). To be able to propagate handle from blkdev_open() to blkdev_release() we need to stop using existence of file->private_data to determine exclusive block device opens. Use bdev_handle->mode for this purpose since file->f_flags isn't usable for this (O_EXCL is cleared from the flags during open). Acked-by: Christoph Hellwig Reviewed-by: Christian Brauner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230927093442.25915-2-jack@suse.cz Signed-off-by: Christian Brauner --- block/bdev.c | 3 +++ block/fops.c | 44 ++++++++++++++++++++++++++++---------------- include/linux/blkdev.h | 1 + 3 files changed, 32 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index bdc7d739882bb..4628dcb1da8a5 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -844,6 +844,9 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, } handle->bdev = bdev; handle->holder = holder; + if (holder) + mode |= BLK_OPEN_EXCL; + handle->mode = mode; return handle; } EXPORT_SYMBOL(bdev_open_by_dev); diff --git a/block/fops.c b/block/fops.c index 73e42742543f6..0abaac705dafb 100644 --- a/block/fops.c +++ b/block/fops.c @@ -542,15 +542,31 @@ static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, return error; } +/** + * file_to_blk_mode - get block open flags from file flags + * @file: file whose open flags should be converted + * + * Look at file open flags and generate corresponding block open flags from + * them. The function works both for file just being open (e.g. during ->open + * callback) and for file that is already open. This is actually non-trivial + * (see comment in the function). + */ blk_mode_t file_to_blk_mode(struct file *file) { blk_mode_t mode = 0; + struct bdev_handle *handle = file->private_data; if (file->f_mode & FMODE_READ) mode |= BLK_OPEN_READ; if (file->f_mode & FMODE_WRITE) mode |= BLK_OPEN_WRITE; - if (file->private_data) + /* + * do_dentry_open() clears O_EXCL from f_flags, use handle->mode to + * determine whether the open was exclusive for already open files. + */ + if (handle) + mode |= handle->mode & BLK_OPEN_EXCL; + else if (file->f_flags & O_EXCL) mode |= BLK_OPEN_EXCL; if (file->f_flags & O_NDELAY) mode |= BLK_OPEN_NDELAY; @@ -568,7 +584,8 @@ blk_mode_t file_to_blk_mode(struct file *file) static int blkdev_open(struct inode *inode, struct file *filp) { - struct block_device *bdev; + struct bdev_handle *handle; + blk_mode_t mode; /* * Preserve backwards compatibility and allow large file access @@ -579,29 +596,24 @@ static int blkdev_open(struct inode *inode, struct file *filp) filp->f_flags |= O_LARGEFILE; filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; - /* - * Use the file private data to store the holder for exclusive openes. - * file_to_blk_mode relies on it being present to set BLK_OPEN_EXCL. - */ - if (filp->f_flags & O_EXCL) - filp->private_data = filp; - - bdev = blkdev_get_by_dev(inode->i_rdev, file_to_blk_mode(filp), - filp->private_data, NULL); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + mode = file_to_blk_mode(filp); + handle = bdev_open_by_dev(inode->i_rdev, mode, + mode & BLK_OPEN_EXCL ? filp : NULL, NULL); + if (IS_ERR(handle)) + return PTR_ERR(handle); - if (bdev_nowait(bdev)) + if (bdev_nowait(handle->bdev)) filp->f_mode |= FMODE_NOWAIT; - filp->f_mapping = bdev->bd_inode->i_mapping; + filp->f_mapping = handle->bdev->bd_inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); + filp->private_data = handle; return 0; } static int blkdev_release(struct inode *inode, struct file *filp) { - blkdev_put(I_BDEV(filp->f_mapping->host), filp->private_data); + bdev_release(filp->private_data); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 03d3adc3ff347..51fa7ffdee83b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1482,6 +1482,7 @@ extern const struct blk_holder_ops fs_holder_ops; struct bdev_handle { struct block_device *bdev; void *holder; + blk_mode_t mode; }; struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, -- cgit 1.2.3-korg From acb083b55597872dcaebe9e0352da7fdf1684def Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Sep 2023 11:34:09 +0200 Subject: block: Use bdev_open_by_dev() in disk_scan_partitions() and blkdev_bszset() Convert disk_scan_partitions() and blkdev_bszset() to use bdev_open_by_dev(). Acked-by: Christoph Hellwig Reviewed-by: Christian Brauner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230927093442.25915-3-jack@suse.cz Signed-off-by: Christian Brauner --- block/genhd.c | 12 ++++++------ block/ioctl.c | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'block') diff --git a/block/genhd.c b/block/genhd.c index cc32a0c704eb8..4a16a424f57d4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(disk_uevent); int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) { - struct block_device *bdev; + struct bdev_handle *handle; int ret = 0; if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) @@ -366,12 +366,12 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) } set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL, - NULL); - if (IS_ERR(bdev)) - ret = PTR_ERR(bdev); + handle = bdev_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL, + NULL); + if (IS_ERR(handle)) + ret = PTR_ERR(handle); else - blkdev_put(bdev, NULL); + bdev_release(handle); /* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, diff --git a/block/ioctl.c b/block/ioctl.c index d5f5cd61efd7f..5d356c9643520 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -467,6 +467,7 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, int __user *argp) { int ret, n; + struct bdev_handle *handle; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -478,10 +479,11 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, if (mode & BLK_OPEN_EXCL) return set_blocksize(bdev, n); - if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode, &bdev, NULL))) + handle = bdev_open_by_dev(bdev->bd_dev, mode, &bdev, NULL); + if (IS_ERR(handle)) return -EBUSY; ret = set_blocksize(bdev, n); - blkdev_put(bdev, &bdev); + bdev_release(handle); return ret; } -- cgit 1.2.3-korg From fd1464105cb37a3b50a72c1d2902e97a71950af8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Oct 2023 17:29:24 +0200 Subject: fs: Avoid grabbing sb->s_umount under bdev->bd_holder_lock The implementation of bdev holder operations such as fs_bdev_mark_dead() and fs_bdev_sync() grab sb->s_umount semaphore under bdev->bd_holder_lock. This is problematic because it leads to disk->open_mutex -> sb->s_umount lock ordering which is counterintuitive (usually we grab higher level (e.g. filesystem) locks first and lower level (e.g. block layer) locks later) and indeed makes lockdep complain about possible locking cycles whenever we open a block device while holding sb->s_umount semaphore. Implement a function bdev_super_lock_shared() which safely transitions from holding bdev->bd_holder_lock to holding sb->s_umount on alive superblock without introducing the problematic lock dependency. We use this function fs_bdev_sync() and fs_bdev_mark_dead(). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20231018152924.3858-1-jack@suse.cz Link: https://lore.kernel.org/r/20231017184823.1383356-1-hch@lst.de Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- block/bdev.c | 5 +++-- block/ioctl.c | 5 +++-- fs/super.c | 50 ++++++++++++++++++++++++++++++++------------------ 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index 4628dcb1da8a5..9838085102b3c 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1012,9 +1012,10 @@ void bdev_mark_dead(struct block_device *bdev, bool surprise) mutex_lock(&bdev->bd_holder_lock); if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead) bdev->bd_holder_ops->mark_dead(bdev, surprise); - else + else { + mutex_unlock(&bdev->bd_holder_lock); sync_blockdev(bdev); - mutex_unlock(&bdev->bd_holder_lock); + } invalidate_bdev(bdev); } diff --git a/block/ioctl.c b/block/ioctl.c index 5d356c9643520..4160f4e6bd5b4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -370,9 +370,10 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, mutex_lock(&bdev->bd_holder_lock); if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) bdev->bd_holder_ops->sync(bdev); - else + else { + mutex_unlock(&bdev->bd_holder_lock); sync_blockdev(bdev); - mutex_unlock(&bdev->bd_holder_lock); + } invalidate_bdev(bdev); return 0; diff --git a/fs/super.c b/fs/super.c index 26b96191e9b3c..799b8db1931ed 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1419,32 +1419,47 @@ EXPORT_SYMBOL(sget_dev); #ifdef CONFIG_BLOCK /* - * Lock a super block that the callers holds a reference to. + * Lock the superblock that is holder of the bdev. Returns the superblock + * pointer if we successfully locked the superblock and it is alive. Otherwise + * we return NULL and just unlock bdev->bd_holder_lock. * - * The caller needs to ensure that the super_block isn't being freed while - * calling this function, e.g. by holding a lock over the call to this function - * and the place that clears the pointer to the superblock used by this function - * before freeing the superblock. + * The function must be called with bdev->bd_holder_lock and releases it. */ -static bool super_lock_shared_active(struct super_block *sb) +static struct super_block *bdev_super_lock_shared(struct block_device *bdev) + __releases(&bdev->bd_holder_lock) { - bool born = super_lock_shared(sb); + struct super_block *sb = bdev->bd_holder; + bool born; + + lockdep_assert_held(&bdev->bd_holder_lock); + lockdep_assert_not_held(&sb->s_umount); + + /* Make sure sb doesn't go away from under us */ + spin_lock(&sb_lock); + sb->s_count++; + spin_unlock(&sb_lock); + mutex_unlock(&bdev->bd_holder_lock); + born = super_lock_shared(sb); if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) { super_unlock_shared(sb); - return false; + put_super(sb); + return NULL; } - return true; + /* + * The superblock is active and we hold s_umount, we can drop our + * temporary reference now. + */ + put_super(sb); + return sb; } static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise) { - struct super_block *sb = bdev->bd_holder; - - /* bd_holder_lock ensures that the sb isn't freed */ - lockdep_assert_held(&bdev->bd_holder_lock); + struct super_block *sb; - if (!super_lock_shared_active(sb)) + sb = bdev_super_lock_shared(bdev); + if (!sb) return; if (!surprise) @@ -1459,11 +1474,10 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise) static void fs_bdev_sync(struct block_device *bdev) { - struct super_block *sb = bdev->bd_holder; - - lockdep_assert_held(&bdev->bd_holder_lock); + struct super_block *sb; - if (!super_lock_shared_active(sb)) + sb = bdev_super_lock_shared(bdev); + if (!sb) return; sync_filesystem(sb); super_unlock_shared(sb); -- cgit 1.2.3-korg From c30b9787a48118d2ed0283b6c8f2abee873a1d19 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 17 Oct 2023 20:48:19 +0200 Subject: block: simplify bdev_del_partition() BLKPG_DEL_PARTITION refuses to delete partitions that still have openers, i.e., that has an elevated @bdev->bd_openers count. If a device is claimed by setting @bdev->bd_holder and @bdev->bd_holder_ops @bdev->bd_openers and @bdev->bd_holders are incremented. @bdev->bd_openers is effectively guaranteed to be >= @bdev->bd_holders. So as long as @bdev->bd_openers isn't zero we know that this partition is still in active use and that there might still be @bdev->bd_holder and @bdev->bd_holder_ops set. The only current example is @fs_holder_ops for filesystems. But that means bdev_mark_dead() which calls into bdev->bd_holder_ops->mark_dead::fs_bdev_mark_dead() is a nop. As long as there's an elevated @bdev->bd_openers count we can't delete the partition and if there isn't an elevated @bdev->bd_openers count then there's no @bdev->bd_holder or @bdev->bd_holder_ops. So simply open-code what we need to do. This gets rid of one more instance where we acquire s_umount under @disk->open_mutex. Link: https://lore.kernel.org/r/20231016-fototermin-umriss-59f1ea6c1fe6@brauner Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231017184823.1383356-2-hch@lst.de Reviewed-by: Ming Lei Signed-off-by: Christian Brauner --- block/partitions/core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/partitions/core.c b/block/partitions/core.c index e137a87f4db0d..b0585536b407a 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -485,7 +485,18 @@ int bdev_del_partition(struct gendisk *disk, int partno) if (atomic_read(&part->bd_openers)) goto out_unlock; - delete_partition(part); + /* + * We verified that @part->bd_openers is zero above and so + * @part->bd_holder{_ops} can't be set. And since we hold + * @disk->open_mutex the device can't be claimed by anyone. + * + * So no need to call @part->bd_holder_ops->mark_dead() here. + * Just delete the partition and invalidate it. + */ + + remove_inode_hash(part->bd_inode); + invalidate_bdev(part); + drop_partition(part); ret = 0; out_unlock: mutex_unlock(&disk->open_mutex); -- cgit 1.2.3-korg From 51b4cb4f3e2265cf8303ffd9a4f239ee3805d3ca Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 17 Oct 2023 20:48:20 +0200 Subject: block: WARN_ON_ONCE() when we remove active partitions The logic for disk->open_partitions is: blkdev_get_by_*() -> bdev_is_partition() -> blkdev_get_part() -> blkdev_get_whole() // bdev_whole->bd_openers++ -> if (part->bd_openers == 0) disk->open_partitions++ part->bd_openers In other words, when we first claim/open a partition we increment disk->open_partitions and only when all part->bd_openers are closed will disk->open_partitions be zero. That should mean that disk->open_partitions is always > 0 as long as there's anyone that has an open partition. So the check for disk->open_partitions should mean that we can never remove an active partition that has a holder and holder ops set. Assert that in the code. The main disk isn't removed so that check doesn't work for disk->part0 which is what we want. After all we only care about partition not about the main disk. Link: https://lore.kernel.org/r/20231017184823.1383356-3-hch@lst.de Reviewed-by: Ming Lei Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/partitions/core.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'block') diff --git a/block/partitions/core.c b/block/partitions/core.c index b0585536b407a..f47ffcfdfcec2 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -274,17 +274,6 @@ void drop_partition(struct block_device *part) put_device(&part->bd_device); } -static void delete_partition(struct block_device *part) -{ - /* - * Remove the block device from the inode hash, so that it cannot be - * looked up any more even when openers still hold references. - */ - remove_inode_hash(part->bd_inode); - bdev_mark_dead(part, false); - drop_partition(part); -} - static ssize_t whole_disk_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -674,8 +663,23 @@ rescan: sync_blockdev(disk->part0); invalidate_bdev(disk->part0); - xa_for_each_start(&disk->part_tbl, idx, part, 1) - delete_partition(part); + xa_for_each_start(&disk->part_tbl, idx, part, 1) { + /* + * Remove the block device from the inode hash, so that + * it cannot be looked up any more even when openers + * still hold references. + */ + remove_inode_hash(part->bd_inode); + + /* + * If @disk->open_partitions isn't elevated but there's + * still an active holder of that block device things + * are broken. + */ + WARN_ON_ONCE(atomic_read(&part->bd_openers)); + invalidate_bdev(part); + drop_partition(part); + } clear_bit(GD_NEED_PART_SCAN, &disk->state); /* -- cgit 1.2.3-korg From 6e57236ed6e070607868da70fac3d52ae24e5417 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Oct 2023 20:48:21 +0200 Subject: block: move bdev_mark_dead out of disk_check_media_change disk_check_media_change is mostly called from ->open where it makes little sense to mark the file system on the device as dead, as we are just opening it. So instead of calling bdev_mark_dead from disk_check_media_change move it into the few callers that are not in an open instance. This avoid calling into bdev_mark_dead and thus taking s_umount with open_mutex held. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231017184823.1383356-4-hch@lst.de Reviewed-by: Ming Lei Reviewed-by: Christian Brauner Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/bdev.c | 9 ++++----- block/disk-events.c | 18 +++++++----------- drivers/block/ataflop.c | 4 +++- drivers/block/floppy.c | 4 +++- 4 files changed, 17 insertions(+), 18 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index 9838085102b3c..2018d250e1310 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1019,14 +1019,13 @@ void bdev_mark_dead(struct block_device *bdev, bool surprise) invalidate_bdev(bdev); } -#ifdef CONFIG_DASD_MODULE /* - * Drivers should not use this directly, but the DASD driver has historically - * had a shutdown to offline mode that doesn't actually remove the gendisk - * that otherwise looks a lot like a safe device removal. + * New drivers should not use this directly. There are some drivers however + * that needs this for historical reasons. For example, the DASD driver has + * historically had a shutdown to offline mode that doesn't actually remove the + * gendisk that otherwise looks a lot like a safe device removal. */ EXPORT_SYMBOL_GPL(bdev_mark_dead); -#endif void sync_bdevs(bool wait) { diff --git a/block/disk-events.c b/block/disk-events.c index 13c3372c465a3..2f697224386aa 100644 --- a/block/disk-events.c +++ b/block/disk-events.c @@ -266,11 +266,8 @@ static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) * disk_check_media_change - check if a removable media has been changed * @disk: gendisk to check * - * Check whether a removable media has been changed, and attempt to free all - * dentries and inodes and invalidates all block device page cache entries in - * that case. - * - * Returns %true if the media has changed, or %false if not. + * Returns %true and marks the disk for a partition rescan whether a removable + * media has been changed, and %false if the media did not change. */ bool disk_check_media_change(struct gendisk *disk) { @@ -278,12 +275,11 @@ bool disk_check_media_change(struct gendisk *disk) events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST); - if (!(events & DISK_EVENT_MEDIA_CHANGE)) - return false; - - bdev_mark_dead(disk->part0, true); - set_bit(GD_NEED_PART_SCAN, &disk->state); - return true; + if (events & DISK_EVENT_MEDIA_CHANGE) { + set_bit(GD_NEED_PART_SCAN, &disk->state); + return true; + } + return false; } EXPORT_SYMBOL(disk_check_media_change); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index cd738cab725f3..50949207798d2 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1760,8 +1760,10 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode, /* invalidate the buffer track to force a reread */ BufferDrive = -1; set_bit(drive, &fake_change); - if (disk_check_media_change(disk)) + if (disk_check_media_change(disk)) { + bdev_mark_dead(disk->part0, true); floppy_revalidate(disk); + } return 0; default: return -EINVAL; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ea4eb88a2e45f..11114a5d9e5c4 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3215,8 +3215,10 @@ static int invalidate_drive(struct gendisk *disk) /* invalidate the buffer track to force a reread */ set_bit((long)disk->private_data, &fake_change); process_fd_request(); - if (disk_check_media_change(disk)) + if (disk_check_media_change(disk)) { + bdev_mark_dead(disk->part0, true); floppy_revalidate(disk); + } return 0; } -- cgit 1.2.3-korg From f61033390bc34cd22ad4b4c12619a1e7a8a75600 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 17 Oct 2023 20:48:22 +0200 Subject: block: assert that we're not holding open_mutex over blk_report_disk_dead blk_report_disk_dead() has the following major callers: (1) del_gendisk() (2) blk_mark_disk_dead() Since del_gendisk() acquires disk->open_mutex it's clear that all callers are assumed to be called without disk->open_mutex held. In turn, blk_report_disk_dead() is called without disk->open_mutex held in del_gendisk(). All callers of blk_mark_disk_dead() call it without disk->open_mutex as well. Ensure that it is clear that blk_report_disk_dead() is called without disk->open_mutex on purpose by asserting it and a comment in the code. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231017184823.1383356-5-hch@lst.de Reviewed-by: Ming Lei Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/genhd.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'block') diff --git a/block/genhd.c b/block/genhd.c index 4a16a424f57d4..c9d06f72c587e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -559,6 +559,13 @@ static void blk_report_disk_dead(struct gendisk *disk, bool surprise) struct block_device *bdev; unsigned long idx; + /* + * On surprise disk removal, bdev_mark_dead() may call into file + * systems below. Make it clear that we're expecting to not hold + * disk->open_mutex. + */ + lockdep_assert_not_held(&disk->open_mutex); + rcu_read_lock(); xa_for_each(&disk->part_tbl, idx, bdev) { if (!kobject_get_unless_zero(&bdev->bd_device.kobj)) -- cgit 1.2.3-korg