aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bdev.c69
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-core.c9
-rw-r--r--block/blk-crypto-fallback.c36
-rw-r--r--block/blk-iocost.c9
-rw-r--r--block/blk-mq.c32
-rw-r--r--block/disk-events.c23
-rw-r--r--block/elevator.c3
-rw-r--r--block/fops.c7
-rw-r--r--block/genhd.c45
-rw-r--r--block/ioctl.c9
-rw-r--r--block/partitions/core.c5
12 files changed, 139 insertions, 110 deletions
diff --git a/block/bdev.c b/block/bdev.c
index 979e28a46b988..f3b13aa1b7d42 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -206,23 +206,6 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
}
EXPORT_SYMBOL(sync_blockdev_range);
-/*
- * Write out and wait upon all dirty data associated with this
- * device. Filesystem data as well as the underlying block
- * device. Takes the superblock lock.
- */
-int fsync_bdev(struct block_device *bdev)
-{
- struct super_block *sb = get_super(bdev);
- if (sb) {
- int res = sync_filesystem(sb);
- drop_super(sb);
- return res;
- }
- return sync_blockdev(bdev);
-}
-EXPORT_SYMBOL(fsync_bdev);
-
/**
* freeze_bdev - lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
@@ -248,9 +231,9 @@ int freeze_bdev(struct block_device *bdev)
if (!sb)
goto sync;
if (sb->s_op->freeze_super)
- error = sb->s_op->freeze_super(sb);
+ error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
else
- error = freeze_super(sb);
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
deactivate_super(sb);
if (error) {
@@ -291,9 +274,9 @@ int thaw_bdev(struct block_device *bdev)
goto out;
if (sb->s_op->thaw_super)
- error = sb->s_op->thaw_super(sb);
+ error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
else
- error = thaw_super(sb);
+ error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
if (error)
bdev->bd_fsfreeze_count++;
else
@@ -960,26 +943,38 @@ out_path_put:
}
EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+/**
+ * bdev_mark_dead - mark a block device as dead
+ * @bdev: block device to operate on
+ * @surprise: indicate a surprise removal
+ *
+ * Tell the file system that this devices or media is dead. If @surprise is set
+ * to %true the device or media is already gone, if not we are preparing for an
+ * orderly removal.
+ *
+ * This calls into the file system, which then typicall syncs out all dirty data
+ * and writes back inodes and then invalidates any cached data in the inodes on
+ * the file system. In addition we also invalidate the block device mapping.
+ */
+void bdev_mark_dead(struct block_device *bdev, bool surprise)
{
- struct super_block *sb = get_super(bdev);
- int res = 0;
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
+ bdev->bd_holder_ops->mark_dead(bdev, surprise);
+ else
+ sync_blockdev(bdev);
+ mutex_unlock(&bdev->bd_holder_lock);
- if (sb) {
- /*
- * no need to lock the super, get_super holds the
- * read mutex so the filesystem cannot go away
- * under us (->put_super runs with the write lock
- * hold).
- */
- shrink_dcache_sb(sb);
- res = invalidate_inodes(sb, kill_dirty);
- drop_super(sb);
- }
invalidate_bdev(bdev);
- return res;
}
-EXPORT_SYMBOL(__invalidate_device);
+#ifdef CONFIG_DASD_MODULE
+/*
+ * Drivers should not use this directly, but the DASD driver has historically
+ * had a shutdown to offline mode that doesn't actually remove the gendisk
+ * that otherwise looks a lot like a safe device removal.
+ */
+EXPORT_SYMBOL_GPL(bdev_mark_dead);
+#endif
void sync_bdevs(bool wait)
{
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 638400bf049f2..4a42ea2972ad8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work_struct *work)
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
if (blkg->parent)
blkg_put(blkg->parent);
+ spin_lock_irq(&q->queue_lock);
list_del_init(&blkg->q_node);
+ spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex);
blk_put_queue(q);
diff --git a/block/blk-core.c b/block/blk-core.c
index 141e54545cc1d..9d51e9894ece7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -723,14 +723,9 @@ void submit_bio_noacct(struct bio *bio)
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
blk_status_t status = BLK_STS_IOERR;
- struct blk_plug *plug;
might_sleep();
- plug = blk_mq_plug(bio);
- if (plug && plug->nowait)
- bio->bi_opf |= REQ_NOWAIT;
-
/*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue does not support NOWAIT.
@@ -1060,7 +1055,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
plug->rq_count = 0;
plug->multiple_queues = false;
plug->has_elevator = false;
- plug->nowait = false;
INIT_LIST_HEAD(&plug->cb_list);
/*
@@ -1145,8 +1139,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
if (!list_empty(&plug->cb_list))
flush_plug_callbacks(plug, from_schedule);
- if (!rq_list_empty(plug->mq_list))
- blk_mq_flush_plug_list(plug, from_schedule);
+ blk_mq_flush_plug_list(plug, from_schedule);
/*
* Unconditionally flush out cached requests, even if the unplug
* event came from schedule. Since we know hold references to the
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index ad9844c5b40cb..e6468eab2681e 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot {
struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
} *blk_crypto_keyslots;
-static struct blk_crypto_profile blk_crypto_fallback_profile;
+static struct blk_crypto_profile *blk_crypto_fallback_profile;
static struct workqueue_struct *blk_crypto_wq;
static mempool_t *blk_crypto_bounce_page_pool;
static struct bio_set crypto_bio_split;
@@ -292,7 +292,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
* Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
* this bio's algorithm and key.
*/
- blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+ blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
bc->bc_key, &slot);
if (blk_st != BLK_STS_OK) {
src_bio->bi_status = blk_st;
@@ -395,7 +395,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
* Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
* this bio's algorithm and key.
*/
- blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+ blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
bc->bc_key, &slot);
if (blk_st != BLK_STS_OK) {
bio->bi_status = blk_st;
@@ -499,7 +499,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
return false;
}
- if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile,
+ if (!__blk_crypto_cfg_supported(blk_crypto_fallback_profile,
&bc->bc_key->crypto_cfg)) {
bio->bi_status = BLK_STS_NOTSUPP;
return false;
@@ -526,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
{
- return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key);
+ return __blk_crypto_evict_key(blk_crypto_fallback_profile, key);
}
static bool blk_crypto_fallback_inited;
@@ -534,7 +534,6 @@ static int blk_crypto_fallback_init(void)
{
int i;
int err;
- struct blk_crypto_profile *profile = &blk_crypto_fallback_profile;
if (blk_crypto_fallback_inited)
return 0;
@@ -545,18 +544,27 @@ static int blk_crypto_fallback_init(void)
if (err)
goto out;
- err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots);
- if (err)
+ /* Dynamic allocation is needed because of lockdep_register_key(). */
+ blk_crypto_fallback_profile =
+ kzalloc(sizeof(*blk_crypto_fallback_profile), GFP_KERNEL);
+ if (!blk_crypto_fallback_profile) {
+ err = -ENOMEM;
goto fail_free_bioset;
+ }
+
+ err = blk_crypto_profile_init(blk_crypto_fallback_profile,
+ blk_crypto_num_keyslots);
+ if (err)
+ goto fail_free_profile;
err = -ENOMEM;
- profile->ll_ops = blk_crypto_fallback_ll_ops;
- profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+ blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops;
+ blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
/* All blk-crypto modes have a crypto API fallback. */
for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
- profile->modes_supported[i] = 0xFFFFFFFF;
- profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+ blk_crypto_fallback_profile->modes_supported[i] = 0xFFFFFFFF;
+ blk_crypto_fallback_profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
WQ_UNBOUND | WQ_HIGHPRI |
@@ -597,7 +605,9 @@ fail_free_keyslots:
fail_free_wq:
destroy_workqueue(blk_crypto_wq);
fail_destroy_profile:
- blk_crypto_profile_destroy(profile);
+ blk_crypto_profile_destroy(blk_crypto_fallback_profile);
+fail_free_profile:
+ kfree(blk_crypto_fallback_profile);
fail_free_bioset:
bioset_exit(&crypto_bio_split);
out:
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9dfcf540f4001..089fcb9cfce37 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2516,6 +2516,10 @@ static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
u64 seek_pages = 0;
u64 cost = 0;
+ /* Can't calculate cost for empty bio */
+ if (!bio->bi_iter.bi_size)
+ goto out;
+
switch (bio_op(bio)) {
case REQ_OP_READ:
coef_seqio = ioc->params.lcoefs[LCOEF_RSEQIO];
@@ -3297,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
if (qos[QOS_MIN] > qos[QOS_MAX])
goto einval;
- if (enable) {
+ if (enable && !ioc->enabled) {
blk_stat_enable_accounting(disk->queue);
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = true;
- } else {
+ } else if (!enable && ioc->enabled) {
+ blk_stat_disable_accounting(disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = false;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 84400157c5f4e..ec922c6bccbe2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -682,6 +682,21 @@ out_queue_exit:
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+static void blk_mq_finish_request(struct request *rq)
+{
+ struct request_queue *q = rq->q;
+
+ if (rq->rq_flags & RQF_USE_SCHED) {
+ q->elevator->type->ops.finish_request(rq);
+ /*
+ * For postflush request that may need to be
+ * completed twice, we should clear this flag
+ * to avoid double finish_request() on the rq.
+ */
+ rq->rq_flags &= ~RQF_USE_SCHED;
+ }
+}
+
static void __blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -708,9 +723,7 @@ void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
- if ((rq->rq_flags & RQF_USE_SCHED) &&
- q->elevator->type->ops.finish_request)
- q->elevator->type->ops.finish_request(rq);
+ blk_mq_finish_request(rq);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->disk->bdi);
@@ -1021,6 +1034,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
if (blk_mq_need_time_stamp(rq))
__blk_mq_end_request_acct(rq, ktime_get_ns());
+ blk_mq_finish_request(rq);
+
if (rq->end_io) {
rq_qos_done(rq->q, rq);
if (rq->end_io(rq, error) == RQ_END_IO_FREE)
@@ -1075,6 +1090,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
if (iob->need_ts)
__blk_mq_end_request_acct(rq, now);
+ blk_mq_finish_request(rq);
+
rq_qos_done(rq->q, rq);
/*
@@ -2755,7 +2772,14 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request *rq;
- if (rq_list_empty(plug->mq_list))
+ /*
+ * We may have been called recursively midway through handling
+ * plug->mq_list via a schedule() in the driver's queue_rq() callback.
+ * To avoid mq_list changing under our feet, clear rq_count early and
+ * bail out specifically if rq_count is 0 rather than checking
+ * whether the mq_list is empty.
+ */
+ if (plug->rq_count == 0)
return;
plug->rq_count = 0;
diff --git a/block/disk-events.c b/block/disk-events.c
index 0cfac464e6d12..422db8292d099 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -281,9 +281,7 @@ bool disk_check_media_change(struct gendisk *disk)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return false;
- if (__invalidate_device(disk->part0, true))
- pr_warn("VFS: busy inodes on changed media %s\n",
- disk->disk_name);
+ bdev_mark_dead(disk->part0, true);
set_bit(GD_NEED_PART_SCAN, &disk->state);
return true;
}
@@ -294,25 +292,16 @@ EXPORT_SYMBOL(disk_check_media_change);
* @disk: the disk which will raise the event
* @events: the events to raise
*
- * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
- * attempt to free all dentries and inodes and invalidates all block
+ * Should be called when the media changes for @disk. Generates a uevent
+ * and attempts to free all dentries and inodes and invalidates all block
* device page cache entries in that case.
- *
- * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
*/
-bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+void disk_force_media_change(struct gendisk *disk)
{
- disk_event_uevent(disk, events);
-
- if (!(events & DISK_EVENT_MEDIA_CHANGE))
- return false;
-
+ disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
inc_diskseq(disk);
- if (__invalidate_device(disk->part0, true))
- pr_warn("VFS: busy inodes on changed media %s\n",
- disk->disk_name);
+ bdev_mark_dead(disk->part0, true);
set_bit(GD_NEED_PART_SCAN, &disk->state);
- return true;
}
EXPORT_SYMBOL_GPL(disk_force_media_change);
diff --git a/block/elevator.c b/block/elevator.c
index 8400e303fbcbd..5ff093cb3cf8f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -499,6 +499,9 @@ void elv_unregister_queue(struct request_queue *q)
int elv_register(struct elevator_type *e)
{
+ /* finish request is mandatory */
+ if (WARN_ON_ONCE(!e->ops.finish_request))
+ return -EINVAL;
/* insert_requests and dispatch_request are mandatory */
if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
return -EINVAL;
diff --git a/block/fops.c b/block/fops.c
index eaa98a987213d..a24a624d3bf71 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -350,13 +350,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
task_io_account_write(bio->bi_iter.bi_size);
}
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
+
if (iocb->ki_flags & IOCB_HIPRI) {
- bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+ bio->bi_opf |= REQ_POLLED;
submit_bio(bio);
WRITE_ONCE(iocb->private, bio);
} else {
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio->bi_opf |= REQ_NOWAIT;
submit_bio(bio);
}
return -EIOCBQUEUED;
diff --git a/block/genhd.c b/block/genhd.c
index 3d287b32d50df..cc32a0c704eb8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -554,7 +554,7 @@ out_exit_elevator:
}
EXPORT_SYMBOL(device_add_disk);
-static void blk_report_disk_dead(struct gendisk *disk)
+static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
{
struct block_device *bdev;
unsigned long idx;
@@ -565,10 +565,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
continue;
rcu_read_unlock();
- mutex_lock(&bdev->bd_holder_lock);
- if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
- bdev->bd_holder_ops->mark_dead(bdev);
- mutex_unlock(&bdev->bd_holder_lock);
+ bdev_mark_dead(bdev, surprise);
put_device(&bdev->bd_device);
rcu_read_lock();
@@ -576,14 +573,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
rcu_read_unlock();
}
-/**
- * blk_mark_disk_dead - mark a disk as dead
- * @disk: disk to mark as dead
- *
- * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
- * to this disk.
- */
-void blk_mark_disk_dead(struct gendisk *disk)
+static void __blk_mark_disk_dead(struct gendisk *disk)
{
/*
* Fail any new I/O.
@@ -603,8 +593,19 @@ void blk_mark_disk_dead(struct gendisk *disk)
* Prevent new I/O from crossing bio_queue_enter().
*/
blk_queue_start_drain(disk->queue);
+}
- blk_report_disk_dead(disk);
+/**
+ * blk_mark_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
+{
+ __blk_mark_disk_dead(disk);
+ blk_report_disk_dead(disk, true);
}
EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
@@ -641,18 +642,20 @@ void del_gendisk(struct gendisk *disk)
disk_del_events(disk);
/*
- * Prevent new openers by unlinked the bdev inode, and write out
- * dirty data before marking the disk dead and stopping all I/O.
+ * Prevent new openers by unlinked the bdev inode.
*/
mutex_lock(&disk->open_mutex);
- xa_for_each(&disk->part_tbl, idx, part) {
+ xa_for_each(&disk->part_tbl, idx, part)
remove_inode_hash(part->bd_inode);
- fsync_bdev(part);
- __invalidate_device(part, true);
- }
mutex_unlock(&disk->open_mutex);
- blk_mark_disk_dead(disk);
+ /*
+ * Tell the file system to write back all dirty data and shut down if
+ * it hasn't been notified earlier.
+ */
+ if (!test_bit(GD_DEAD, &disk->state))
+ blk_report_disk_dead(disk, false);
+ __blk_mark_disk_dead(disk);
/*
* Drop all partitions now that the disk is marked dead.
diff --git a/block/ioctl.c b/block/ioctl.c
index 3be11941fb2dd..648670ddb164a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -364,7 +364,14 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd,
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- fsync_bdev(bdev);
+
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync)
+ bdev->bd_holder_ops->sync(bdev);
+ else
+ sync_blockdev(bdev);
+ mutex_unlock(&bdev->bd_holder_lock);
+
invalidate_bdev(bdev);
return 0;
}
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 13a7341299a91..e137a87f4db0d 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -281,10 +281,7 @@ static void delete_partition(struct block_device *part)
* looked up any more even when openers still hold references.
*/
remove_inode_hash(part->bd_inode);
-
- fsync_bdev(part);
- __invalidate_device(part, true);
-
+ bdev_mark_dead(part, false);
drop_partition(part);
}