diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bdev.c | 69 | ||||
-rw-r--r-- | block/blk-cgroup.c | 2 | ||||
-rw-r--r-- | block/blk-core.c | 9 | ||||
-rw-r--r-- | block/blk-crypto-fallback.c | 36 | ||||
-rw-r--r-- | block/blk-iocost.c | 9 | ||||
-rw-r--r-- | block/blk-mq.c | 32 | ||||
-rw-r--r-- | block/disk-events.c | 23 | ||||
-rw-r--r-- | block/elevator.c | 3 | ||||
-rw-r--r-- | block/fops.c | 7 | ||||
-rw-r--r-- | block/genhd.c | 45 | ||||
-rw-r--r-- | block/ioctl.c | 9 | ||||
-rw-r--r-- | block/partitions/core.c | 5 |
12 files changed, 139 insertions, 110 deletions
diff --git a/block/bdev.c b/block/bdev.c index 979e28a46b988..f3b13aa1b7d42 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -206,23 +206,6 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) } EXPORT_SYMBOL(sync_blockdev_range); -/* - * Write out and wait upon all dirty data associated with this - * device. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_bdev(struct block_device *bdev) -{ - struct super_block *sb = get_super(bdev); - if (sb) { - int res = sync_filesystem(sb); - drop_super(sb); - return res; - } - return sync_blockdev(bdev); -} -EXPORT_SYMBOL(fsync_bdev); - /** * freeze_bdev - lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock @@ -248,9 +231,9 @@ int freeze_bdev(struct block_device *bdev) if (!sb) goto sync; if (sb->s_op->freeze_super) - error = sb->s_op->freeze_super(sb); + error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); else - error = freeze_super(sb); + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); deactivate_super(sb); if (error) { @@ -291,9 +274,9 @@ int thaw_bdev(struct block_device *bdev) goto out; if (sb->s_op->thaw_super) - error = sb->s_op->thaw_super(sb); + error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); else - error = thaw_super(sb); + error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); if (error) bdev->bd_fsfreeze_count++; else @@ -960,26 +943,38 @@ out_path_put: } EXPORT_SYMBOL(lookup_bdev); -int __invalidate_device(struct block_device *bdev, bool kill_dirty) +/** + * bdev_mark_dead - mark a block device as dead + * @bdev: block device to operate on + * @surprise: indicate a surprise removal + * + * Tell the file system that this devices or media is dead. If @surprise is set + * to %true the device or media is already gone, if not we are preparing for an + * orderly removal. + * + * This calls into the file system, which then typicall syncs out all dirty data + * and writes back inodes and then invalidates any cached data in the inodes on + * the file system. In addition we also invalidate the block device mapping. + */ +void bdev_mark_dead(struct block_device *bdev, bool surprise) { - struct super_block *sb = get_super(bdev); - int res = 0; + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead) + bdev->bd_holder_ops->mark_dead(bdev, surprise); + else + sync_blockdev(bdev); + mutex_unlock(&bdev->bd_holder_lock); - if (sb) { - /* - * no need to lock the super, get_super holds the - * read mutex so the filesystem cannot go away - * under us (->put_super runs with the write lock - * hold). - */ - shrink_dcache_sb(sb); - res = invalidate_inodes(sb, kill_dirty); - drop_super(sb); - } invalidate_bdev(bdev); - return res; } -EXPORT_SYMBOL(__invalidate_device); +#ifdef CONFIG_DASD_MODULE +/* + * Drivers should not use this directly, but the DASD driver has historically + * had a shutdown to offline mode that doesn't actually remove the gendisk + * that otherwise looks a lot like a safe device removal. + */ +EXPORT_SYMBOL_GPL(bdev_mark_dead); +#endif void sync_bdevs(bool wait) { diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 638400bf049f2..4a42ea2972ad8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work_struct *work) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); + spin_lock_irq(&q->queue_lock); list_del_init(&blkg->q_node); + spin_unlock_irq(&q->queue_lock); mutex_unlock(&q->blkcg_mutex); blk_put_queue(q); diff --git a/block/blk-core.c b/block/blk-core.c index 141e54545cc1d..9d51e9894ece7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -723,14 +723,9 @@ void submit_bio_noacct(struct bio *bio) struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); blk_status_t status = BLK_STS_IOERR; - struct blk_plug *plug; might_sleep(); - plug = blk_mq_plug(bio); - if (plug && plug->nowait) - bio->bi_opf |= REQ_NOWAIT; - /* * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue does not support NOWAIT. @@ -1060,7 +1055,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) plug->rq_count = 0; plug->multiple_queues = false; plug->has_elevator = false; - plug->nowait = false; INIT_LIST_HEAD(&plug->cb_list); /* @@ -1145,8 +1139,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) { if (!list_empty(&plug->cb_list)) flush_plug_callbacks(plug, from_schedule); - if (!rq_list_empty(plug->mq_list)) - blk_mq_flush_plug_list(plug, from_schedule); + blk_mq_flush_plug_list(plug, from_schedule); /* * Unconditionally flush out cached requests, even if the unplug * event came from schedule. Since we know hold references to the diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index ad9844c5b40cb..e6468eab2681e 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot { struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX]; } *blk_crypto_keyslots; -static struct blk_crypto_profile blk_crypto_fallback_profile; +static struct blk_crypto_profile *blk_crypto_fallback_profile; static struct workqueue_struct *blk_crypto_wq; static mempool_t *blk_crypto_bounce_page_pool; static struct bio_set crypto_bio_split; @@ -292,7 +292,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for * this bio's algorithm and key. */ - blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile, + blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile, bc->bc_key, &slot); if (blk_st != BLK_STS_OK) { src_bio->bi_status = blk_st; @@ -395,7 +395,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work) * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for * this bio's algorithm and key. */ - blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile, + blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile, bc->bc_key, &slot); if (blk_st != BLK_STS_OK) { bio->bi_status = blk_st; @@ -499,7 +499,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr) return false; } - if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile, + if (!__blk_crypto_cfg_supported(blk_crypto_fallback_profile, &bc->bc_key->crypto_cfg)) { bio->bi_status = BLK_STS_NOTSUPP; return false; @@ -526,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr) int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) { - return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key); + return __blk_crypto_evict_key(blk_crypto_fallback_profile, key); } static bool blk_crypto_fallback_inited; @@ -534,7 +534,6 @@ static int blk_crypto_fallback_init(void) { int i; int err; - struct blk_crypto_profile *profile = &blk_crypto_fallback_profile; if (blk_crypto_fallback_inited) return 0; @@ -545,18 +544,27 @@ static int blk_crypto_fallback_init(void) if (err) goto out; - err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots); - if (err) + /* Dynamic allocation is needed because of lockdep_register_key(). */ + blk_crypto_fallback_profile = + kzalloc(sizeof(*blk_crypto_fallback_profile), GFP_KERNEL); + if (!blk_crypto_fallback_profile) { + err = -ENOMEM; goto fail_free_bioset; + } + + err = blk_crypto_profile_init(blk_crypto_fallback_profile, + blk_crypto_num_keyslots); + if (err) + goto fail_free_profile; err = -ENOMEM; - profile->ll_ops = blk_crypto_fallback_ll_ops; - profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; + blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops; + blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; /* All blk-crypto modes have a crypto API fallback. */ for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) - profile->modes_supported[i] = 0xFFFFFFFF; - profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; + blk_crypto_fallback_profile->modes_supported[i] = 0xFFFFFFFF; + blk_crypto_fallback_profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; blk_crypto_wq = alloc_workqueue("blk_crypto_wq", WQ_UNBOUND | WQ_HIGHPRI | @@ -597,7 +605,9 @@ fail_free_keyslots: fail_free_wq: destroy_workqueue(blk_crypto_wq); fail_destroy_profile: - blk_crypto_profile_destroy(profile); + blk_crypto_profile_destroy(blk_crypto_fallback_profile); +fail_free_profile: + kfree(blk_crypto_fallback_profile); fail_free_bioset: bioset_exit(&crypto_bio_split); out: diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9dfcf540f4001..089fcb9cfce37 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2516,6 +2516,10 @@ static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg, u64 seek_pages = 0; u64 cost = 0; + /* Can't calculate cost for empty bio */ + if (!bio->bi_iter.bi_size) + goto out; + switch (bio_op(bio)) { case REQ_OP_READ: coef_seqio = ioc->params.lcoefs[LCOEF_RSEQIO]; @@ -3297,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, if (qos[QOS_MIN] > qos[QOS_MAX]) goto einval; - if (enable) { + if (enable && !ioc->enabled) { blk_stat_enable_accounting(disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = true; - } else { + } else if (!enable && ioc->enabled) { + blk_stat_disable_accounting(disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = false; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 84400157c5f4e..ec922c6bccbe2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -682,6 +682,21 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); +static void blk_mq_finish_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + if (rq->rq_flags & RQF_USE_SCHED) { + q->elevator->type->ops.finish_request(rq); + /* + * For postflush request that may need to be + * completed twice, we should clear this flag + * to avoid double finish_request() on the rq. + */ + rq->rq_flags &= ~RQF_USE_SCHED; + } +} + static void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; @@ -708,9 +723,7 @@ void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - if ((rq->rq_flags & RQF_USE_SCHED) && - q->elevator->type->ops.finish_request) - q->elevator->type->ops.finish_request(rq); + blk_mq_finish_request(rq); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); @@ -1021,6 +1034,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (blk_mq_need_time_stamp(rq)) __blk_mq_end_request_acct(rq, ktime_get_ns()); + blk_mq_finish_request(rq); + if (rq->end_io) { rq_qos_done(rq->q, rq); if (rq->end_io(rq, error) == RQ_END_IO_FREE) @@ -1075,6 +1090,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + blk_mq_finish_request(rq); + rq_qos_done(rq->q, rq); /* @@ -2755,7 +2772,14 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request *rq; - if (rq_list_empty(plug->mq_list)) + /* + * We may have been called recursively midway through handling + * plug->mq_list via a schedule() in the driver's queue_rq() callback. + * To avoid mq_list changing under our feet, clear rq_count early and + * bail out specifically if rq_count is 0 rather than checking + * whether the mq_list is empty. + */ + if (plug->rq_count == 0) return; plug->rq_count = 0; diff --git a/block/disk-events.c b/block/disk-events.c index 0cfac464e6d12..422db8292d099 100644 --- a/block/disk-events.c +++ b/block/disk-events.c @@ -281,9 +281,7 @@ bool disk_check_media_change(struct gendisk *disk) if (!(events & DISK_EVENT_MEDIA_CHANGE)) return false; - if (__invalidate_device(disk->part0, true)) - pr_warn("VFS: busy inodes on changed media %s\n", - disk->disk_name); + bdev_mark_dead(disk->part0, true); set_bit(GD_NEED_PART_SCAN, &disk->state); return true; } @@ -294,25 +292,16 @@ EXPORT_SYMBOL(disk_check_media_change); * @disk: the disk which will raise the event * @events: the events to raise * - * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present, - * attempt to free all dentries and inodes and invalidates all block + * Should be called when the media changes for @disk. Generates a uevent + * and attempts to free all dentries and inodes and invalidates all block * device page cache entries in that case. - * - * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not. */ -bool disk_force_media_change(struct gendisk *disk, unsigned int events) +void disk_force_media_change(struct gendisk *disk) { - disk_event_uevent(disk, events); - - if (!(events & DISK_EVENT_MEDIA_CHANGE)) - return false; - + disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE); inc_diskseq(disk); - if (__invalidate_device(disk->part0, true)) - pr_warn("VFS: busy inodes on changed media %s\n", - disk->disk_name); + bdev_mark_dead(disk->part0, true); set_bit(GD_NEED_PART_SCAN, &disk->state); - return true; } EXPORT_SYMBOL_GPL(disk_force_media_change); diff --git a/block/elevator.c b/block/elevator.c index 8400e303fbcbd..5ff093cb3cf8f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -499,6 +499,9 @@ void elv_unregister_queue(struct request_queue *q) int elv_register(struct elevator_type *e) { + /* finish request is mandatory */ + if (WARN_ON_ONCE(!e->ops.finish_request)) + return -EINVAL; /* insert_requests and dispatch_request are mandatory */ if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request)) return -EINVAL; diff --git a/block/fops.c b/block/fops.c index eaa98a987213d..a24a624d3bf71 100644 --- a/block/fops.c +++ b/block/fops.c @@ -350,13 +350,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, task_io_account_write(bio->bi_iter.bi_size); } + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; + if (iocb->ki_flags & IOCB_HIPRI) { - bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; + bio->bi_opf |= REQ_POLLED; submit_bio(bio); WRITE_ONCE(iocb->private, bio); } else { - if (iocb->ki_flags & IOCB_NOWAIT) - bio->bi_opf |= REQ_NOWAIT; submit_bio(bio); } return -EIOCBQUEUED; diff --git a/block/genhd.c b/block/genhd.c index 3d287b32d50df..cc32a0c704eb8 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -554,7 +554,7 @@ out_exit_elevator: } EXPORT_SYMBOL(device_add_disk); -static void blk_report_disk_dead(struct gendisk *disk) +static void blk_report_disk_dead(struct gendisk *disk, bool surprise) { struct block_device *bdev; unsigned long idx; @@ -565,10 +565,7 @@ static void blk_report_disk_dead(struct gendisk *disk) continue; rcu_read_unlock(); - mutex_lock(&bdev->bd_holder_lock); - if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead) - bdev->bd_holder_ops->mark_dead(bdev); - mutex_unlock(&bdev->bd_holder_lock); + bdev_mark_dead(bdev, surprise); put_device(&bdev->bd_device); rcu_read_lock(); @@ -576,14 +573,7 @@ static void blk_report_disk_dead(struct gendisk *disk) rcu_read_unlock(); } -/** - * blk_mark_disk_dead - mark a disk as dead - * @disk: disk to mark as dead - * - * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O - * to this disk. - */ -void blk_mark_disk_dead(struct gendisk *disk) +static void __blk_mark_disk_dead(struct gendisk *disk) { /* * Fail any new I/O. @@ -603,8 +593,19 @@ void blk_mark_disk_dead(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(disk->queue); +} - blk_report_disk_dead(disk); +/** + * blk_mark_disk_dead - mark a disk as dead + * @disk: disk to mark as dead + * + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O + * to this disk. + */ +void blk_mark_disk_dead(struct gendisk *disk) +{ + __blk_mark_disk_dead(disk); + blk_report_disk_dead(disk, true); } EXPORT_SYMBOL_GPL(blk_mark_disk_dead); @@ -641,18 +642,20 @@ void del_gendisk(struct gendisk *disk) disk_del_events(disk); /* - * Prevent new openers by unlinked the bdev inode, and write out - * dirty data before marking the disk dead and stopping all I/O. + * Prevent new openers by unlinked the bdev inode. */ mutex_lock(&disk->open_mutex); - xa_for_each(&disk->part_tbl, idx, part) { + xa_for_each(&disk->part_tbl, idx, part) remove_inode_hash(part->bd_inode); - fsync_bdev(part); - __invalidate_device(part, true); - } mutex_unlock(&disk->open_mutex); - blk_mark_disk_dead(disk); + /* + * Tell the file system to write back all dirty data and shut down if + * it hasn't been notified earlier. + */ + if (!test_bit(GD_DEAD, &disk->state)) + blk_report_disk_dead(disk, false); + __blk_mark_disk_dead(disk); /* * Drop all partitions now that the disk is marked dead. diff --git a/block/ioctl.c b/block/ioctl.c index 3be11941fb2dd..648670ddb164a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -364,7 +364,14 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, { if (!capable(CAP_SYS_ADMIN)) return -EACCES; - fsync_bdev(bdev); + + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) + bdev->bd_holder_ops->sync(bdev); + else + sync_blockdev(bdev); + mutex_unlock(&bdev->bd_holder_lock); + invalidate_bdev(bdev); return 0; } diff --git a/block/partitions/core.c b/block/partitions/core.c index 13a7341299a91..e137a87f4db0d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -281,10 +281,7 @@ static void delete_partition(struct block_device *part) * looked up any more even when openers still hold references. */ remove_inode_hash(part->bd_inode); - - fsync_bdev(part); - __invalidate_device(part, true); - + bdev_mark_dead(part, false); drop_partition(part); } |