aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 20:21:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 20:21:42 -0700
commit3d3dfeb3aec7b612d266d500c82054f1fded4980 (patch)
tree11649eab5c74deb74e6e0879613e8053ae3b9970 /block
parentc1b7fcf3f6d94c2c3528bf77054bf174a5ef63d7 (diff)
parent146afeb235ccec10c17ad8ea26327c0c79dbd968 (diff)
downloadlinux-3d3dfeb3aec7b612d266d500c82054f1fded4980.tar.gz
Merge tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: "Pretty quiet round for this release. This contains: - Add support for zoned storage to ublk (Andreas, Ming) - Series improving performance for drivers that mark themselves as needing a blocking context for issue (Bart) - Cleanup the flush logic (Chengming) - sed opal keyring support (Greg) - Fixes and improvements to the integrity support (Jinyoung) - Add some exports for bcachefs that we can hopefully delete again in the future (Kent) - deadline throttling fix (Zhiguo) - Series allowing building the kernel without buffer_head support (Christoph) - Sanitize the bio page adding flow (Christoph) - Write back cache fixes (Christoph) - MD updates via Song: - Fix perf regression for raid0 large sequential writes (Jan) - Fix split bio iostat for raid0 (David) - Various raid1 fixes (Heinz, Xueshi) - raid6test build fixes (WANG) - Deprecate bitmap file support (Christoph) - Fix deadlock with md sync thread (Yu) - Refactor md io accounting (Yu) - Various non-urgent fixes (Li, Yu, Jack) - Various fixes and cleanups (Arnd, Azeem, Chengming, Damien, Li, Ming, Nitesh, Ruan, Tejun, Thomas, Xu)" * tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux: (113 commits) block: use strscpy() to instead of strncpy() block: sed-opal: keyring support for SED keys block: sed-opal: Implement IOC_OPAL_REVERT_LSP block: sed-opal: Implement IOC_OPAL_DISCOVERY blk-mq: prealloc tags when increase tagset nr_hw_queues blk-mq: delete redundant tagset map update when fallback blk-mq: fix tags leak when shrink nr_hw_queues ublk: zoned: support REQ_OP_ZONE_RESET_ALL md: raid0: account for split bio in iostat accounting md/raid0: Fix performance regression for large sequential writes md/raid0: Factor out helper for mapping and submitting a bio md raid1: allow writebehind to work on any leg device set WriteMostly md/raid1: hold the barrier until handle_read_error() finishes md/raid1: free the r1bio before waiting for blocked rdev md/raid1: call free_r1bio() before allow_barrier() in raid_end_bio_io() blk-cgroup: Fix NULL deref caused by blkg_policy_data being installed before init drivers/rnbd: restore sysfs interface to rnbd-client md/raid5-cache: fix null-ptr-deref for r5l_flush_stripe_to_raid() raid6: test: only check for Altivec if building on powerpc hosts raid6: test: make sure all intermediate and artifact files are .gitignored ...
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig3
-rw-r--r--block/bio-integrity.c59
-rw-r--r--block/bio.c142
-rw-r--r--block/blk-cgroup.c32
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-flush.c26
-rw-r--r--block/blk-iolatency.c35
-rw-r--r--block/blk-mq.c45
-rw-r--r--block/blk-settings.c7
-rw-r--r--block/blk-sysfs.c21
-rw-r--r--block/blk.h10
-rw-r--r--block/fops.c143
-rw-r--r--block/mq-deadline.c3
-rw-r--r--block/opal_proto.h4
-rw-r--r--block/partitions/cmdline.c12
-rw-r--r--block/sed-opal.c252
16 files changed, 579 insertions, 216 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 86122e459fe046..f1364d1c0d93eb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,6 +5,7 @@
menuconfig BLOCK
bool "Enable the block layer" if EXPERT
default y
+ select FS_IOMAP
select SBITMAP
help
Provide block layer support for the kernel.
@@ -183,6 +184,8 @@ config BLK_DEBUG_FS_ZONED
config BLK_SED_OPAL
bool "Logic for interfacing with Opal enabled SEDs"
+ depends on KEYS
+ select PSERIES_PLPKS if PPC_PSERIES
help
Builds Logic for interfacing with Opal enabled controllers.
Enabling this option enables users to setup/unlock/lock
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 4533eb49166109..ec8ac8cf6e1b98 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -123,20 +123,38 @@ void bio_integrity_free(struct bio *bio)
int bio_integrity_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_integrity_payload *bip = bio_integrity(bio);
- if (bip->bip_vcnt >= bip->bip_max_vcnt) {
- printk(KERN_ERR "%s: bip_vec full\n", __func__);
+ if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
+ queue_max_hw_sectors(q))
return 0;
- }
- if (bip->bip_vcnt &&
- bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
- &bip->bip_vec[bip->bip_vcnt - 1], offset))
- return 0;
+ if (bip->bip_vcnt > 0) {
+ struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
+ bool same_page = false;
+
+ if (bvec_try_merge_hw_page(q, bv, page, len, offset,
+ &same_page)) {
+ bip->bip_iter.bi_size += len;
+ return len;
+ }
+
+ if (bip->bip_vcnt >=
+ min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
+ return 0;
+
+ /*
+ * If the queue doesn't support SG gaps and adding this segment
+ * would create a gap, disallow it.
+ */
+ if (bvec_gap_to_prev(&q->limits, bv, offset))
+ return 0;
+ }
bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
bip->bip_vcnt++;
+ bip->bip_iter.bi_size += len;
return len;
}
@@ -199,8 +217,6 @@ bool bio_integrity_prep(struct bio *bio)
unsigned long start, end;
unsigned int len, nr_pages;
unsigned int bytes, offset, i;
- unsigned int intervals;
- blk_status_t status;
if (!bi)
return true;
@@ -224,12 +240,10 @@ bool bio_integrity_prep(struct bio *bio)
!(bi->flags & BLK_INTEGRITY_GENERATE))
return true;
}
- intervals = bio_integrity_intervals(bi, bio_sectors(bio));
/* Allocate kernel buffer for protection data */
- len = intervals * bi->tuple_size;
+ len = bio_integrity_bytes(bi, bio_sectors(bio));
buf = kmalloc(len, GFP_NOIO);
- status = BLK_STS_RESOURCE;
if (unlikely(buf == NULL)) {
printk(KERN_ERR "could not allocate integrity buffer\n");
goto err_end_io;
@@ -244,12 +258,10 @@ bool bio_integrity_prep(struct bio *bio)
if (IS_ERR(bip)) {
printk(KERN_ERR "could not allocate data integrity bioset\n");
kfree(buf);
- status = BLK_STS_RESOURCE;
goto err_end_io;
}
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
- bip->bip_iter.bi_size = len;
bip_set_seed(bip, bio->bi_iter.bi_sector);
if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
@@ -257,28 +269,18 @@ bool bio_integrity_prep(struct bio *bio)
/* Map it */
offset = offset_in_page(buf);
- for (i = 0 ; i < nr_pages ; i++) {
- int ret;
+ for (i = 0; i < nr_pages && len > 0; i++) {
bytes = PAGE_SIZE - offset;
- if (len <= 0)
- break;
-
if (bytes > len)
bytes = len;
- ret = bio_integrity_add_page(bio, virt_to_page(buf),
- bytes, offset);
-
- if (ret == 0) {
+ if (bio_integrity_add_page(bio, virt_to_page(buf),
+ bytes, offset) < bytes) {
printk(KERN_ERR "could not attach integrity payload\n");
- status = BLK_STS_RESOURCE;
goto err_end_io;
}
- if (ret < bytes)
- break;
-
buf += bytes;
len -= bytes;
offset = 0;
@@ -294,10 +296,9 @@ bool bio_integrity_prep(struct bio *bio)
return true;
err_end_io:
- bio->bi_status = status;
+ bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return false;
-
}
EXPORT_SYMBOL(bio_integrity_prep);
diff --git a/block/bio.c b/block/bio.c
index 8672179213b939..816d412c06e9b4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -606,15 +606,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
}
EXPORT_SYMBOL(bio_kmalloc);
-void zero_fill_bio(struct bio *bio)
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter)
+ __bio_for_each_segment(bv, bio, iter, start)
memzero_bvec(&bv);
}
-EXPORT_SYMBOL(zero_fill_bio);
+EXPORT_SYMBOL(zero_fill_bio_iter);
/**
* bio_truncate - truncate the bio to small size of @new_size
@@ -903,9 +903,8 @@ static inline bool bio_full(struct bio *bio, unsigned len)
return false;
}
-static inline bool page_is_mergeable(const struct bio_vec *bv,
- struct page *page, unsigned int len, unsigned int off,
- bool *same_page)
+static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
+ unsigned int len, unsigned int off, bool *same_page)
{
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
@@ -919,49 +918,15 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
return false;
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
- if (*same_page)
- return true;
- else if (IS_ENABLED(CONFIG_KMSAN))
- return false;
- return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
-}
-
-/**
- * __bio_try_merge_page - try appending data to an existing bvec.
- * @bio: destination bio
- * @page: start page to add
- * @len: length of the data to add
- * @off: offset of the data relative to @page
- * @same_page: return if the segment has been merged inside the same page
- *
- * Try to add the data at @page + @off to the last bvec of @bio. This is a
- * useful optimisation for file systems with a block size smaller than the
- * page size.
- *
- * Warn if (@len, @off) crosses pages in case that @same_page is true.
- *
- * Return %true on success or %false on failure.
- */
-static bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off, bool *same_page)
-{
- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return false;
-
- if (bio->bi_vcnt > 0) {
- struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
- if (page_is_mergeable(bv, page, len, off, same_page)) {
- if (bio->bi_iter.bi_size > UINT_MAX - len) {
- *same_page = false;
- return false;
- }
- bv->bv_len += len;
- bio->bi_iter.bi_size += len;
- return true;
- }
+ if (!*same_page) {
+ if (IS_ENABLED(CONFIG_KMSAN))
+ return false;
+ if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
+ return false;
}
- return false;
+
+ bv->bv_len += len;
+ return true;
}
/*
@@ -969,11 +934,10 @@ static bool __bio_try_merge_page(struct bio *bio, struct page *page,
* size limit. This is not for normal read/write bios, but for passthrough
* or Zone Append operations that we can't split.
*/
-static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned len,
- unsigned offset, bool *same_page)
+bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
+ struct page *page, unsigned len, unsigned offset,
+ bool *same_page)
{
- struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned long mask = queue_segment_boundary(q);
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
@@ -982,7 +946,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
return false;
if (bv->bv_len + len > queue_max_segment_size(q))
return false;
- return __bio_try_merge_page(bio, page, len, offset, same_page);
+ return bvec_try_merge_page(bv, page, len, offset, same_page);
}
/**
@@ -1002,33 +966,33 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page)
{
- struct bio_vec *bvec;
-
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
- if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
+ if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
return 0;
if (bio->bi_vcnt > 0) {
- if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+ if (bvec_try_merge_hw_page(q, bv, page, len, offset,
+ same_page)) {
+ bio->bi_iter.bi_size += len;
return len;
+ }
+
+ if (bio->bi_vcnt >=
+ min(bio->bi_max_vecs, queue_max_segments(q)))
+ return 0;
/*
* If the queue doesn't support SG gaps and adding this segment
* would create a gap, disallow it.
*/
- bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (bvec_gap_to_prev(&q->limits, bvec, offset))
+ if (bvec_gap_to_prev(&q->limits, bv, offset))
return 0;
}
- if (bio_full(bio, len))
- return 0;
-
- if (bio->bi_vcnt >= queue_max_segments(q))
- return 0;
-
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
bio->bi_vcnt++;
bio->bi_iter.bi_size += len;
@@ -1129,11 +1093,21 @@ int bio_add_page(struct bio *bio, struct page *page,
{
bool same_page = false;
- if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
- if (bio_full(bio, len))
- return 0;
- __bio_add_page(bio, page, len, offset);
+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+ return 0;
+ if (bio->bi_iter.bi_size > UINT_MAX - len)
+ return 0;
+
+ if (bio->bi_vcnt > 0 &&
+ bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
+ page, len, offset, &same_page)) {
+ bio->bi_iter.bi_size += len;
+ return len;
}
+
+ if (bio->bi_vcnt >= bio->bi_max_vecs)
+ return 0;
+ __bio_add_page(bio, page, len, offset);
return len;
}
EXPORT_SYMBOL(bio_add_page);
@@ -1207,13 +1181,18 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
{
bool same_page = false;
- if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
- __bio_add_page(bio, page, len, offset);
+ if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len))
+ return -EIO;
+
+ if (bio->bi_vcnt > 0 &&
+ bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
+ page, len, offset, &same_page)) {
+ bio->bi_iter.bi_size += len;
+ if (same_page)
+ bio_release_page(bio, page);
return 0;
}
-
- if (same_page)
- bio_release_page(bio, page);
+ __bio_add_page(bio, page, len, offset);
return 0;
}
@@ -1252,7 +1231,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
struct page **pages = (struct page **)bv;
ssize_t size, left;
unsigned len, i = 0;
- size_t offset, trim;
+ size_t offset;
int ret = 0;
/*
@@ -1281,10 +1260,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
- trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
- iov_iter_revert(iter, trim);
+ if (bio->bi_bdev) {
+ size_t trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
+ iov_iter_revert(iter, trim);
+ size -= trim;
+ }
- size -= trim;
if (unlikely(!size)) {
ret = -EFAULT;
goto out;
@@ -1337,6 +1318,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
int ret = 0;
+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+ return -EIO;
+
if (iov_iter_is_bvec(iter)) {
bio_iov_bvec_set(bio, iter);
iov_iter_advance(iter, bio->bi_iter.bi_size);
@@ -1490,6 +1474,7 @@ void bio_set_pages_dirty(struct bio *bio)
set_page_dirty_lock(bvec->bv_page);
}
}
+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
/*
* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
@@ -1549,6 +1534,7 @@ defer:
spin_unlock_irqrestore(&bio_dirty_lock, flags);
schedule_work(&bio_dirty_work);
}
+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
static inline bool bio_remaining_done(struct bio *bio)
{
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 9faafcd10e1775..4a42ea2972ad85 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1511,7 +1511,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
retry:
spin_lock_irq(&q->queue_lock);
- /* blkg_list is pushed at the head, reverse walk to allocate parents first */
+ /* blkg_list is pushed at the head, reverse walk to initialize parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;
@@ -1549,21 +1549,20 @@ retry:
goto enomem;
}
- blkg->pd[pol->plid] = pd;
+ spin_lock(&blkg->blkcg->lock);
+
pd->blkg = blkg;
pd->plid = pol->plid;
- pd->online = false;
- }
+ blkg->pd[pol->plid] = pd;
- /* all allocated, init in the same order */
- if (pol->pd_init_fn)
- list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
- pol->pd_init_fn(blkg->pd[pol->plid]);
+ if (pol->pd_init_fn)
+ pol->pd_init_fn(pd);
- list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
if (pol->pd_online_fn)
- pol->pd_online_fn(blkg->pd[pol->plid]);
- blkg->pd[pol->plid]->online = true;
+ pol->pd_online_fn(pd);
+ pd->online = true;
+
+ spin_unlock(&blkg->blkcg->lock);
}
__set_bit(pol->plid, q->blkcg_pols);
@@ -1580,14 +1579,19 @@ out:
return ret;
enomem:
- /* alloc failed, nothing's initialized yet, free everything */
+ /* alloc failed, take down everything */
spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
+ struct blkg_policy_data *pd;
spin_lock(&blkcg->lock);
- if (blkg->pd[pol->plid]) {
- pol->pd_free_fn(blkg->pd[pol->plid]);
+ pd = blkg->pd[pol->plid];
+ if (pd) {
+ if (pd->online && pol->pd_offline_fn)
+ pol->pd_offline_fn(pd);
+ pd->online = false;
+ pol->pd_free_fn(pd);
blkg->pd[pol->plid] = NULL;
}
spin_unlock(&blkcg->lock);
diff --git a/block/blk-core.c b/block/blk-core.c
index 9866468c72a2a6..9d51e9894ece78 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -208,6 +208,7 @@ const char *blk_status_to_str(blk_status_t status)
return "<null>";
return blk_errors[idx].name;
}
+EXPORT_SYMBOL_GPL(blk_status_to_str);
/**
* blk_sync_queue - cancel any pending callbacks on a queue
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8220517c2d67da..e73dc22d05c1d1 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -183,13 +183,13 @@ static void blk_flush_complete_seq(struct request *rq,
/* queue for flush */
if (list_empty(pending))
fq->flush_pending_since = jiffies;
- list_move_tail(&rq->flush.list, pending);
+ list_move_tail(&rq->queuelist, pending);
break;
case REQ_FSEQ_DATA:
- list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
+ fq->flush_data_in_flight++;
spin_lock(&q->requeue_lock);
- list_add(&rq->queuelist, &q->requeue_list);
+ list_move(&rq->queuelist, &q->requeue_list);
spin_unlock(&q->requeue_lock);
blk_mq_kick_requeue_list(q);
break;
@@ -201,7 +201,7 @@ static void blk_flush_complete_seq(struct request *rq,
* flush data request completion path. Restore @rq for
* normal completion and end it.
*/
- list_del_init(&rq->flush.list);
+ list_del_init(&rq->queuelist);
blk_flush_restore_request(rq);
blk_mq_end_request(rq, error);
break;
@@ -257,7 +257,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
fq->flush_running_idx ^= 1;
/* and push the waiting requests to the next stage */
- list_for_each_entry_safe(rq, n, running, flush.list) {
+ list_for_each_entry_safe(rq, n, running, queuelist) {
unsigned int seq = blk_flush_cur_seq(rq);
BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
@@ -291,7 +291,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
{
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
struct request *first_rq =
- list_first_entry(pending, struct request, flush.list);
+ list_first_entry(pending, struct request, queuelist);
struct request *flush_rq = fq->flush_rq;
/* C1 described at the top of this file */
@@ -299,7 +299,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
return;
/* C2 and C3 */
- if (!list_empty(&fq->flush_data_in_flight) &&
+ if (fq->flush_data_in_flight &&
time_before(jiffies,
fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
return;
@@ -374,6 +374,12 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
* the comment in flush_end_io().
*/
spin_lock_irqsave(&fq->mq_flush_lock, flags);
+ fq->flush_data_in_flight--;
+ /*
+ * May have been corrupted by rq->rq_next reuse, we need to
+ * re-initialize rq->queuelist before reusing it here.
+ */
+ INIT_LIST_HEAD(&rq->queuelist);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
@@ -384,7 +390,6 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
static void blk_rq_init_flush(struct request *rq)
{
rq->flush.seq = 0;
- INIT_LIST_HEAD(&rq->flush.list);
rq->rq_flags |= RQF_FLUSH_SEQ;
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
rq->end_io = mq_flush_data_end_io;
@@ -443,9 +448,9 @@ bool blk_insert_flush(struct request *rq)
* the post flush, and then just pass the command on.
*/
blk_rq_init_flush(rq);
- rq->flush.seq |= REQ_FSEQ_POSTFLUSH;
+ rq->flush.seq |= REQ_FSEQ_PREFLUSH;
spin_lock_irq(&fq->mq_flush_lock);
- list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
+ fq->flush_data_in_flight++;
spin_unlock_irq(&fq->mq_flush_lock);
return false;
default:
@@ -496,7 +501,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
INIT_LIST_HEAD(&fq->flush_queue[0]);
INIT_LIST_HEAD(&fq->flush_queue[1]);
- INIT_LIST_HEAD(&fq->flush_data_in_flight);
return fq;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index fd5fec989e3906..c1a6aba1d59e4d 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -824,29 +824,6 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
}
}
-static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx)
-{
- static DEFINE_MUTEX(init_mutex);
- int ret;
-
- ret = blkg_conf_open_bdev(ctx);
- if (ret)
- return ret;
-
- /*
- * blk_iolatency_init() may fail after rq_qos_add() succeeds which can
- * confuse iolat_rq_qos() test. Make the test and init atomic.
- */
- mutex_lock(&init_mutex);
-
- if (!iolat_rq_qos(ctx->bdev->bd_queue))
- ret = blk_iolatency_init(ctx->bdev->bd_disk);
-
- mutex_unlock(&init_mutex);
-
- return ret;
-}
-
static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
@@ -861,7 +838,17 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
blkg_conf_init(&ctx, buf);
- ret = blk_iolatency_try_init(&ctx);
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
+ goto out;
+
+ /*
+ * blk_iolatency_init() may fail after rq_qos_add() succeeds which can
+ * confuse iolat_rq_qos() test. Make the test and init atomic.
+ */
+ lockdep_assert_held(&ctx.bdev->bd_queue->rq_qos_mutex);
+ if (!iolat_rq_qos(ctx.bdev->bd_queue))
+ ret = blk_iolatency_init(ctx.bdev->bd_disk);
if (ret)
goto out;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 953f08354c8c36..ec922c6bccbe20 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -43,6 +43,7 @@
#include "blk-ioprio.h"
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
+static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
static void blk_mq_request_bypass_insert(struct request *rq,
@@ -1174,15 +1175,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
static void blk_mq_complete_send_ipi(struct request *rq)
{
- struct llist_head *list;
unsigned int cpu;
cpu = rq->mq_ctx->cpu;
- list = &per_cpu(blk_cpu_done, cpu);
- if (llist_add(&rq->ipi_list, list)) {
- INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
- smp_call_function_single_async(cpu, &rq->csd);
- }
+ if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu)))
+ smp_call_function_single_async(cpu, &per_cpu(blk_cpu_csd, cpu));
}
static void blk_mq_raise_softirq(struct request *rq)
@@ -1343,7 +1340,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
}
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
- blk_mq_run_hw_queue(hctx, false);
+ blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
@@ -2242,6 +2239,8 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
*/
WARN_ON_ONCE(!async && in_interrupt());
+ might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
+
/*
* When queue is quiesced, we may be switching io scheduler, or
* updating nr_hw_queues, or other things, and we can't run queue
@@ -2257,8 +2256,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
if (!need_run)
return;
- if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
- !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
+ if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
blk_mq_delay_run_hw_queue(hctx, 0);
return;
}
@@ -2393,7 +2391,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
- blk_mq_run_hw_queue(hctx, false);
+ blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);
@@ -2423,7 +2421,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
- blk_mq_start_stopped_hw_queue(hctx, async);
+ blk_mq_start_stopped_hw_queue(hctx, async ||
+ (hctx->flags & BLK_MQ_F_BLOCKING));
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -2481,6 +2480,8 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
list_for_each_entry(rq, list, queuelist) {
BUG_ON(rq->mq_ctx != ctx);
trace_block_rq_insert(rq);
+ if (rq->cmd_flags & REQ_NOWAIT)
+ run_queue_async = true;
}
spin_lock(&ctx->lock);
@@ -2641,7 +2642,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
blk_mq_insert_request(rq, 0);
- blk_mq_run_hw_queue(hctx, false);
+ blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);
return;
}
@@ -4402,9 +4403,13 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
int new_nr_hw_queues)
{
struct blk_mq_tags **new_tags;
+ int i;
- if (set->nr_hw_queues >= new_nr_hw_queues)
+ if (set->nr_hw_queues >= new_nr_hw_queues) {
+ for (i = new_nr_hw_queues; i < set->nr_hw_queues; i++)
+ __blk_mq_free_map_and_rqs(set, i);
goto done;
+ }
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
@@ -4416,6 +4421,16 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
sizeof(*set->tags));
kfree(set->tags);
set->tags = new_tags;
+
+ for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
+ if (!__blk_mq_alloc_map_and_rqs(set, i)) {
+ while (--i >= set->nr_hw_queues)
+ __blk_mq_free_map_and_rqs(set, i);
+ return -ENOMEM;
+ }
+ cond_resched();
+ }
+
done:
set->nr_hw_queues = new_nr_hw_queues;
return 0;
@@ -4749,7 +4764,6 @@ fallback:
__blk_mq_free_map_and_rqs(set, i);
set->nr_hw_queues = prev_nr_hw_queues;
- blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
goto fallback;
}
blk_mq_map_swqueue(q);
@@ -4853,6 +4867,9 @@ static int __init blk_mq_init(void)
for_each_possible_cpu(i)
init_llist_head(&per_cpu(blk_cpu_done, i));
+ for_each_possible_cpu(i)
+ INIT_CSD(&per_cpu(blk_cpu_csd, i),
+ __blk_mq_complete_request_remote, NULL);
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4dd59059b788eb..0046b447268f91 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -830,10 +830,13 @@ EXPORT_SYMBOL(blk_set_queue_depth);
*/
void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
{
- if (wc)
+ if (wc) {
+ blk_queue_flag_set(QUEUE_FLAG_HW_WC, q);
blk_queue_flag_set(QUEUE_FLAG_WC, q);
- else
+ } else {
+ blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q);
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
+ }
if (fua)
blk_queue_flag_set(QUEUE_FLAG_FUA, q);
else
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index afc797fb0dfc48..63e4812623361d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -449,21 +449,16 @@ static ssize_t queue_wc_show(struct request_queue *q, char *page)
static ssize_t queue_wc_store(struct request_queue *q, const char *page,
size_t count)
{
- int set = -1;
-
- if (!strncmp(page, "write back", 10))
- set = 1;
- else if (!strncmp(page, "write through", 13) ||
- !strncmp(page, "none", 4))
- set = 0;
-
- if (set == -1)
- return -EINVAL;
-
- if (set)
+ if (!strncmp(page, "write back", 10)) {
+ if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags))
+ return -EINVAL;
blk_queue_flag_set(QUEUE_FLAG_WC, q);
- else
+ } else if (!strncmp(page, "write through", 13) ||
+ !strncmp(page, "none", 4)) {
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
+ } else {
+ return -EINVAL;
+ }
return count;
}
diff --git a/block/blk.h b/block/blk.h
index 608c5dcc516b55..08a358bc0919e2 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -15,15 +15,14 @@ struct elevator_type;
extern struct dentry *blk_debugfs_root;
struct blk_flush_queue {
+ spinlock_t mq_flush_lock;
unsigned int flush_pending_idx:1;
unsigned int flush_running_idx:1;
blk_status_t rq_status;
unsigned long flush_pending_since;
struct list_head flush_queue[2];
- struct list_head flush_data_in_flight;
+ unsigned long flush_data_in_flight;
struct request *flush_rq;
-
- spinlock_t mq_flush_lock;
};
bool is_flush_rq(struct request *req);
@@ -76,6 +75,10 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask);
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
+bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
+ struct page *page, unsigned len, unsigned offset,
+ bool *same_page);
+
static inline bool biovec_phys_mergeable(struct request_queue *q,
struct bio_vec *vec1, struct bio_vec *vec2)
{
@@ -251,7 +254,6 @@ static inline void bio_integrity_free(struct bio *bio)
unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);
-const char *blk_status_to_str(blk_status_t status);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs);
diff --git a/block/fops.c b/block/fops.c
index 838ffada53413f..a24a624d3bf71a 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -15,6 +15,7 @@
#include <linux/falloc.h>
#include <linux/suspend.h>
#include <linux/fs.h>
+#include <linux/iomap.h>
#include <linux/module.h>
#include "blk.h"
@@ -23,15 +24,6 @@ static inline struct inode *bdev_file_inode(struct file *file)
return file->f_mapping->host;
}
-static int blkdev_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- bh->b_bdev = I_BDEV(inode);
- bh->b_blocknr = iblock;
- set_buffer_mapped(bh);
- return 0;
-}
-
static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
{
blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
@@ -387,6 +379,37 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
}
+static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+ struct block_device *bdev = I_BDEV(inode);
+ loff_t isize = i_size_read(inode);
+
+ iomap->bdev = bdev;
+ iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
+ if (iomap->offset >= isize)
+ return -EIO;
+ iomap->type = IOMAP_MAPPED;
+ iomap->addr = iomap->offset;
+ iomap->length = isize - iomap->offset;
+ iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */
+ return 0;
+}
+
+static const struct iomap_ops blkdev_iomap_ops = {
+ .iomap_begin = blkdev_iomap_begin,
+};
+
+#ifdef CONFIG_BUFFER_HEAD
+static int blkdev_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ bh->b_bdev = I_BDEV(inode);
+ bh->b_blocknr = iblock;
+ set_buffer_mapped(bh);
+ return 0;
+}
+
static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page, blkdev_get_block, wbc);
@@ -429,10 +452,58 @@ const struct address_space_operations def_blk_aops = {
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
.write_end = blkdev_write_end,
- .direct_IO = blkdev_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
};
+#else /* CONFIG_BUFFER_HEAD */
+static int blkdev_read_folio(struct file *file, struct folio *folio)
+{
+ return iomap_read_folio(folio, &blkdev_iomap_ops);
+}
+
+static void blkdev_readahead(struct readahead_control *rac)
+{
+ iomap_readahead(rac, &blkdev_iomap_ops);
+}
+
+static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
+ struct inode *inode, loff_t offset)
+{
+ loff_t isize = i_size_read(inode);
+
+ if (WARN_ON_ONCE(offset >= isize))
+ return -EIO;
+ if (offset >= wpc->iomap.offset &&
+ offset < wpc->iomap.offset + wpc->iomap.length)
+ return 0;
+ return blkdev_iomap_begin(inode, offset, isize - offset,
+ IOMAP_WRITE, &wpc->iomap, NULL);
+}
+
+static const struct iomap_writeback_ops blkdev_writeback_ops = {
+ .map_blocks = blkdev_map_blocks,
+};
+
+static int blkdev_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct iomap_writepage_ctx wpc = { };
+
+ return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops);
+}
+
+const struct address_space_operations def_blk_aops = {
+ .dirty_folio = filemap_dirty_folio,
+ .release_folio = iomap_release_folio,
+ .invalidate_folio = iomap_invalidate_folio,
+ .read_folio = blkdev_read_folio,
+ .readahead = blkdev_readahead,
+ .writepages = blkdev_writepages,
+ .is_partially_uptodate = iomap_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
+ .migrate_folio = filemap_migrate_folio,
+};
+#endif /* CONFIG_BUFFER_HEAD */
/*
* for a block special file file_inode(file)->i_size is zero
@@ -506,7 +577,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
* during an unstable branch.
*/
filp->f_flags |= O_LARGEFILE;
- filp->f_mode |= FMODE_BUF_RASYNC;
+ filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
/*
* Use the file private data to store the holder for exclusive openes.
@@ -534,6 +605,35 @@ static int blkdev_release(struct inode *inode, struct file *filp)
return 0;
}
+static ssize_t
+blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ size_t count = iov_iter_count(from);
+ ssize_t written;
+
+ written = kiocb_invalidate_pages(iocb, count);
+ if (written) {
+ if (written == -EBUSY)
+ return 0;
+ return written;
+ }
+
+ written = blkdev_direct_IO(iocb, from);
+ if (written > 0) {
+ kiocb_invalidate_post_direct_write(iocb, count);
+ iocb->ki_pos += written;
+ count -= written;
+ }
+ if (written != -EIOCBQUEUED)
+ iov_iter_revert(from, count - iov_iter_count(from));
+ return written;
+}
+
+static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
+}
+
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
@@ -543,7 +643,8 @@ static int blkdev_release(struct inode *inode, struct file *filp)
*/
static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
+ struct file *file = iocb->ki_filp;
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct inode *bd_inode = bdev->bd_inode;
loff_t size = bdev_nr_bytes(bdev);
size_t shorted = 0;
@@ -570,7 +671,23 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
iov_iter_truncate(from, size);
}
- ret = __generic_file_write_iter(iocb, from);
+ ret = file_remove_privs(file);
+ if (ret)
+ return ret;
+
+ ret = file_update_time(file);
+ if (ret)
+ return ret;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = blkdev_direct_write(iocb, from);
+ if (ret >= 0 && iov_iter_count(from))
+ ret = direct_write_fallback(iocb, from, ret,
+ blkdev_buffered_write(iocb, from));
+ } else {
+ ret = blkdev_buffered_write(iocb, from);
+ }
+
if (ret > 0)
ret = generic_write_sync(iocb, ret);
iov_iter_reexpand(from, iov_iter_count(from) + shorted);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 02a916ba62ee75..f958e79277b8bc 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -646,8 +646,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
+ unsigned int shift = tags->bitmap_tags.sb.shift;
- dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
+ dd->async_depth = max(1U, 3 * (1U << shift) / 4);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
}
diff --git a/block/opal_proto.h b/block/opal_proto.h
index a4e56845dd8267..dec7ce3a3edb70 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -225,6 +225,10 @@ enum opal_parameter {
OPAL_SUM_SET_LIST = 0x060000,
};
+enum opal_revertlsp {
+ OPAL_KEEP_GLOBAL_RANGE_KEY = 0x060000,
+};
+
/* Packets derived from:
* TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
* Secion: 3.2.3 ComPackets, Packets & Subpackets
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index 1af610f0ba8c6c..c03bc105e57539 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -81,8 +81,7 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
length = min_t(int, next - partdef,
sizeof(new_subpart->name) - 1);
- strncpy(new_subpart->name, partdef, length);
- new_subpart->name[length] = '\0';
+ strscpy(new_subpart->name, partdef, length);
partdef = ++next;
} else
@@ -140,8 +139,7 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
}
length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
- strncpy(newparts->name, bdevdef, length);
- newparts->name[length] = '\0';
+ strscpy(newparts->name, bdevdef, length);
newparts->nr_subparts = 0;
next_subpart = &newparts->subpart;
@@ -153,8 +151,7 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
length = (!next) ? (sizeof(buf) - 1) :
min_t(int, next - bdevdef, sizeof(buf) - 1);
- strncpy(buf, bdevdef, length);
- buf[length] = '\0';
+ strscpy(buf, bdevdef, length);
ret = parse_subpart(next_subpart, buf);
if (ret)
@@ -267,8 +264,7 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
label_min = min_t(int, sizeof(info->volname) - 1,
sizeof(subpart->name));
- strncpy(info->volname, subpart->name, label_min);
- info->volname[label_min] = '\0';
+ strscpy(info->volname, subpart->name, label_min);
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
strlcat(state->pp_buf, tmp, PAGE_SIZE);
diff --git a/block/sed-opal.c b/block/sed-opal.c
index c18339446ef379..6d7f25d1711ba7 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -20,6 +20,9 @@
#include <linux/sed-opal.h>
#include <linux/string.h>
#include <linux/kdev_t.h>
+#include <linux/key.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
#include "opal_proto.h"
@@ -29,6 +32,8 @@
/* Number of bytes needed by cmd_finalize. */
#define CMD_FINALIZE_BYTES_NEEDED 7
+static struct key *sed_opal_keyring;
+
struct opal_step {
int (*fn)(struct opal_dev *dev, void *data);
void *data;
@@ -269,6 +274,101 @@ static void print_buffer(const u8 *ptr, u32 length)
#endif
}
+/*
+ * Allocate/update a SED Opal key and add it to the SED Opal keyring.
+ */
+static int update_sed_opal_key(const char *desc, u_char *key_data, int keylen)
+{
+ key_ref_t kr;
+
+ if (!sed_opal_keyring)
+ return -ENOKEY;
+
+ kr = key_create_or_update(make_key_ref(sed_opal_keyring, true), "user",
+ desc, (const void *)key_data, keylen,
+ KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_WRITE,
+ KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN |
+ KEY_ALLOC_BYPASS_RESTRICTION);
+ if (IS_ERR(kr)) {
+ pr_err("Error adding SED key (%ld)\n", PTR_ERR(kr));
+ return PTR_ERR(kr);
+ }
+
+ return 0;
+}
+
+/*
+ * Read a SED Opal key from the SED Opal keyring.
+ */
+static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen)
+{
+ int ret;
+ key_ref_t kref;
+ struct key *key;
+
+ if (!sed_opal_keyring)
+ return -ENOKEY;
+
+ kref = keyring_search(make_key_ref(sed_opal_keyring, true),
+ &key_type_user, key_name, true);
+
+ if (IS_ERR(kref))
+ ret = PTR_ERR(kref);
+
+ key = key_ref_to_ptr(kref);
+ down_read(&key->sem);
+ ret = key_validate(key);
+ if (ret == 0) {
+ if (buflen > key->datalen)
+ buflen = key->datalen;
+
+ ret = key->type->read(key, (char *)buffer, buflen);
+ }
+ up_read(&key->sem);
+
+ key_ref_put(kref);
+
+ return ret;
+}
+
+static int opal_get_key(struct opal_dev *dev, struct opal_key *key)
+{
+ int ret = 0;
+
+ switch (key->key_type) {
+ case OPAL_INCLUDED:
+ /* the key is ready to use */
+ break;
+ case OPAL_KEYRING:
+ /* the key is in the keyring */
+ ret = read_sed_opal_key(OPAL_AUTH_KEY, key->key, OPAL_KEY_MAX);
+ if (ret > 0) {
+ if (ret > U8_MAX) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ key->key_len = ret;
+ key->key_type = OPAL_INCLUDED;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ if (ret < 0)
+ goto error;
+
+ /* must have a PEK by now or it's an error */
+ if (key->key_type != OPAL_INCLUDED || key->key_len == 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+ return 0;
+error:
+ pr_debug("Error getting password: %d\n", ret);
+ return ret;
+}
+
static bool check_tper(const void *data)
{
const struct d0_tper_features *tper = data;
@@ -463,8 +563,11 @@ out_error:
return error;
}
-static int opal_discovery0_end(struct opal_dev *dev)
+static int opal_discovery0_end(struct opal_dev *dev, void *data)
{
+ struct opal_discovery *discv_out = data; /* may be NULL */
+ u8 __user *buf_out;
+ u64 len_out;
bool found_com_id = false, supported = true, single_user = false;
const struct d0_header *hdr = (struct d0_header *)dev->resp;
const u8 *epos = dev->resp, *cpos = dev->resp;
@@ -480,6 +583,15 @@ static int opal_discovery0_end(struct opal_dev *dev)
return -EFAULT;
}
+ if (discv_out) {
+ buf_out = (u8 __user *)(uintptr_t)discv_out->data;
+ len_out = min_t(u64, discv_out->size, hlen);
+ if (buf_out && copy_to_user(buf_out, dev->resp, len_out))
+ return -EFAULT;
+
+ discv_out->size = hlen; /* actual size of data */
+ }
+
epos += hlen; /* end of buffer */
cpos += sizeof(*hdr); /* current position on buffer */
@@ -565,13 +677,13 @@ static int opal_discovery0(struct opal_dev *dev, void *data)
if (ret)
return ret;
- return opal_discovery0_end(dev);
+ return opal_discovery0_end(dev, data);
}
static int opal_discovery0_step(struct opal_dev *dev)
{
const struct opal_step discovery0_step = {
- opal_discovery0,
+ opal_discovery0, NULL
};
return execute_step(dev, &discovery0_step, 0);
@@ -1757,6 +1869,26 @@ static int internal_activate_user(struct opal_dev *dev, void *data)
return finalize_and_send(dev, parse_and_check_status);
}
+static int revert_lsp(struct opal_dev *dev, void *data)
+{
+ struct opal_revert_lsp *rev = data;
+ int err;
+
+ err = cmd_start(dev, opaluid[OPAL_THISSP_UID],
+ opalmethod[OPAL_REVERTSP]);
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u64(&err, dev, OPAL_KEEP_GLOBAL_RANGE_KEY);
+ add_token_u8(&err, dev, (rev->options & OPAL_PRESERVE) ?
+ OPAL_TRUE : OPAL_FALSE);
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+ if (err) {
+ pr_debug("Error building REVERT SP command.\n");
+ return err;
+ }
+
+ return finalize_and_send(dev, parse_and_check_status);
+}
+
static int erase_locking_range(struct opal_dev *dev, void *data)
{
struct opal_session_info *session = data;
@@ -2427,6 +2559,9 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev,
};
int ret;
+ ret = opal_get_key(dev, &opal_session->opal_key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps));
@@ -2435,6 +2570,42 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev,
return ret;
}
+static int opal_get_discv(struct opal_dev *dev, struct opal_discovery *discv)
+{
+ const struct opal_step discovery0_step = {
+ opal_discovery0, discv
+ };
+ int ret = 0;
+
+ mutex_lock(&dev->dev_lock);
+ setup_opal_dev(dev);
+ ret = execute_step(dev, &discovery0_step, 0);
+ mutex_unlock(&dev->dev_lock);
+ if (ret)
+ return ret;
+ return discv->size; /* modified to actual length of data */
+}
+
+static int opal_revertlsp(struct opal_dev *dev, struct opal_revert_lsp *rev)
+{
+ /* controller will terminate session */
+ const struct opal_step steps[] = {
+ { start_admin1LSP_opal_session, &rev->key },
+ { revert_lsp, rev }
+ };
+ int ret;
+
+ ret = opal_get_key(dev, &rev->key);
+ if (ret)
+ return ret;
+ mutex_lock(&dev->dev_lock);
+ setup_opal_dev(dev);
+ ret = execute_steps(dev, steps, ARRAY_SIZE(steps));
+ mutex_unlock(&dev->dev_lock);
+
+ return ret;
+}
+
static int opal_erase_locking_range(struct opal_dev *dev,
struct opal_session_info *opal_session)
{
@@ -2445,6 +2616,9 @@ static int opal_erase_locking_range(struct opal_dev *dev,
};
int ret;
+ ret = opal_get_key(dev, &opal_session->opal_key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps));
@@ -2473,6 +2647,9 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
opal_mbr->enable_disable != OPAL_MBR_DISABLE)
return -EINVAL;
+ ret = opal_get_key(dev, &opal_mbr->key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
@@ -2498,6 +2675,9 @@ static int opal_set_mbr_done(struct opal_dev *dev,
mbr_done->done_flag != OPAL_MBR_NOT_DONE)
return -EINVAL;
+ ret = opal_get_key(dev, &mbr_done->key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
@@ -2519,6 +2699,9 @@ static int opal_write_shadow_mbr(struct opal_dev *dev,
if (info->size == 0)
return 0;
+ ret = opal_get_key(dev, &info->key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
@@ -2576,6 +2759,9 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
return -EINVAL;
}
+ ret = opal_get_key(dev, &lk_unlk->session.opal_key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, steps, ARRAY_SIZE(steps));
@@ -2598,6 +2784,10 @@ static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal, bool psi
int ret;
+ ret = opal_get_key(dev, opal);
+
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
if (psid)
@@ -2698,6 +2888,9 @@ static int opal_lock_unlock(struct opal_dev *dev,
if (lk_unlk->session.who > OPAL_USER9)
return -EINVAL;
+ ret = opal_get_key(dev, &lk_unlk->session.opal_key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
opal_lock_check_for_saved_key(dev, lk_unlk);
ret = __opal_lock_unlock(dev, lk_unlk);
@@ -2721,6 +2914,9 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
if (!dev)
return -ENODEV;
+ ret = opal_get_key(dev, opal);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, owner_steps, ARRAY_SIZE(owner_steps));
@@ -2743,6 +2939,9 @@ static int opal_activate_lsp(struct opal_dev *dev,
if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS)
return -EINVAL;
+ ret = opal_get_key(dev, &opal_lr_act->key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps));
@@ -2761,6 +2960,9 @@ static int opal_setup_locking_range(struct opal_dev *dev,
};
int ret;
+ ret = opal_get_key(dev, &opal_lrs->session.opal_key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
@@ -2814,6 +3016,14 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps));
mutex_unlock(&dev->dev_lock);
+ if (ret)
+ return ret;
+
+ /* update keyring with new password */
+ ret = update_sed_opal_key(OPAL_AUTH_KEY,
+ opal_pw->new_user_pw.opal_key.key,
+ opal_pw->new_user_pw.opal_key.key_len);
+
return ret;
}
@@ -2834,6 +3044,9 @@ static int opal_activate_user(struct opal_dev *dev,
return -EINVAL;
}
+ ret = opal_get_key(dev, &opal_session->opal_key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, act_steps, ARRAY_SIZE(act_steps));
@@ -2920,6 +3133,9 @@ static int opal_generic_read_write_table(struct opal_dev *dev,
{
int ret, bit_set;
+ ret = opal_get_key(dev, &rw_tbl->key);
+ if (ret)
+ return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
@@ -2988,9 +3204,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (!dev)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (!(dev->flags & OPAL_FL_SUPPORTED))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (cmd & IOC_IN) {
p = memdup_user(arg, _IOC_SIZE(cmd));
@@ -3056,6 +3272,13 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
case IOC_OPAL_GET_GEOMETRY:
ret = opal_get_geometry(dev, arg);
break;
+ case IOC_OPAL_REVERT_LSP:
+ ret = opal_revertlsp(dev, p);
+ break;
+ case IOC_OPAL_DISCOVERY:
+ ret = opal_get_discv(dev, p);
+ break;
+
default:
break;
}
@@ -3065,3 +3288,22 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
return ret;
}
EXPORT_SYMBOL_GPL(sed_ioctl);
+
+static int __init sed_opal_init(void)
+{
+ struct key *kr;
+
+ kr = keyring_alloc(".sed_opal",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW |
+ KEY_USR_READ | KEY_USR_SEARCH | KEY_USR_WRITE,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(kr))
+ return PTR_ERR(kr);
+
+ sed_opal_keyring = kr;
+
+ return 0;
+}
+late_initcall(sed_opal_init);