aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2022-08-01 11:21:43 -0600
committerJens Axboe <axboe@kernel.dk>2022-08-01 11:21:43 -0600
commitfb0c50f4fedf8b654c2bf33f8c03fa7469cbd4ea (patch)
treebc9c2f26a1ed339164209c6f21a79a201087b494
parentc9ca8dcc66a99d1123f0fdc2dc161436b93d194b (diff)
parentae0a80935d6a65764b0db00c8b03d3807b4110a6 (diff)
downloadlinux-block-for-5.20/drivers-post.tar.gz
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.20/drivers-postfor-5.20/drivers-post
Pull MD fixes from Song: "The major change is: "1. Fix potential deadlock with raid5_quiesce and raid5_get_active_stripe, by Logan Gunthorpe." * 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: drivers:md:fix a potential use-after-free bug md/raid5: Ensure batch_last is released before sleeping for quiesce md/raid5: Move stripe_request_ctx up md/raid5: Drop unnecessary call to r5c_check_stripe_cache_usage() md/raid5: Make is_inactive_blocked() helper md/raid5: Refactor raid5_get_active_stripe()
-rw-r--r--drivers/md/raid5.c164
-rw-r--r--drivers/md/raid5.h2
2 files changed, 102 insertions, 64 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3b7887428cd0e8..4456ac51f7c536 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -755,9 +755,54 @@ static bool has_failed(struct r5conf *conf)
return degraded > conf->max_degraded;
}
-struct stripe_head *
-raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
- int previous, int noblock, int noquiesce)
+enum stripe_result {
+ STRIPE_SUCCESS = 0,
+ STRIPE_RETRY,
+ STRIPE_SCHEDULE_AND_RETRY,
+ STRIPE_FAIL,
+};
+
+struct stripe_request_ctx {
+ /* a reference to the last stripe_head for batching */
+ struct stripe_head *batch_last;
+
+ /* first sector in the request */
+ sector_t first_sector;
+
+ /* last sector in the request */
+ sector_t last_sector;
+
+ /*
+ * bitmap to track stripe sectors that have been added to stripes
+ * add one to account for unaligned requests
+ */
+ DECLARE_BITMAP(sectors_to_do, RAID5_MAX_REQ_STRIPES + 1);
+
+ /* the request had REQ_PREFLUSH, cleared after the first stripe_head */
+ bool do_flush;
+};
+
+/*
+ * Block until another thread clears R5_INACTIVE_BLOCKED or
+ * there are fewer than 3/4 the maximum number of active stripes
+ * and there is an inactive stripe available.
+ */
+static bool is_inactive_blocked(struct r5conf *conf, int hash)
+{
+ int active = atomic_read(&conf->active_stripes);
+
+ if (list_empty(conf->inactive_list + hash))
+ return false;
+
+ if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
+ return true;
+
+ return active < (conf->max_nr_stripes * 3 / 4);
+}
+
+static struct stripe_head *__raid5_get_active_stripe(struct r5conf *conf,
+ struct stripe_request_ctx *ctx, sector_t sector,
+ bool previous, bool noblock, bool noquiesce)
{
struct stripe_head *sh;
int hash = stripe_hash_locks_hash(conf, sector);
@@ -766,45 +811,65 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
spin_lock_irq(conf->hash_locks + hash);
- do {
- wait_event_lock_irq(conf->wait_for_quiescent,
- conf->quiesce == 0 || noquiesce,
+retry:
+ if (!noquiesce && conf->quiesce) {
+ /*
+ * Must release the reference to batch_last before waiting,
+ * on quiesce, otherwise the batch_last will hold a reference
+ * to a stripe and raid5_quiesce() will deadlock waiting for
+ * active_stripes to go to zero.
+ */
+ if (ctx && ctx->batch_last) {
+ raid5_release_stripe(ctx->batch_last);
+ ctx->batch_last = NULL;
+ }
+
+ wait_event_lock_irq(conf->wait_for_quiescent, !conf->quiesce,
*(conf->hash_locks + hash));
- sh = find_get_stripe(conf, sector, conf->generation - previous,
- hash);
- if (sh)
- break;
+ }
- if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
- sh = get_free_stripe(conf, hash);
- if (!sh && !test_bit(R5_DID_ALLOC, &conf->cache_state))
- set_bit(R5_ALLOC_MORE, &conf->cache_state);
- }
- if (noblock && !sh)
- break;
+ sh = find_get_stripe(conf, sector, conf->generation - previous, hash);
+ if (sh)
+ goto out;
+
+ if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
+ goto wait_for_stripe;
+ sh = get_free_stripe(conf, hash);
+ if (sh) {
r5c_check_stripe_cache_usage(conf);
- if (!sh) {
- set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
- r5l_wake_reclaim(conf->log, 0);
- wait_event_lock_irq(conf->wait_for_stripe,
- !list_empty(conf->inactive_list + hash) &&
- (atomic_read(&conf->active_stripes)
- < (conf->max_nr_stripes * 3 / 4)
- || !test_bit(R5_INACTIVE_BLOCKED,
- &conf->cache_state)),
- *(conf->hash_locks + hash));
- clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
- } else {
- init_stripe(sh, sector, previous);
- atomic_inc(&sh->count);
- }
- } while (sh == NULL);
+ init_stripe(sh, sector, previous);
+ atomic_inc(&sh->count);
+ goto out;
+ }
+
+ if (!test_bit(R5_DID_ALLOC, &conf->cache_state))
+ set_bit(R5_ALLOC_MORE, &conf->cache_state);
+wait_for_stripe:
+ if (noblock)
+ goto out;
+
+ set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
+ r5l_wake_reclaim(conf->log, 0);
+ wait_event_lock_irq(conf->wait_for_stripe,
+ is_inactive_blocked(conf, hash),
+ *(conf->hash_locks + hash));
+ clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
+ goto retry;
+
+out:
spin_unlock_irq(conf->hash_locks + hash);
return sh;
}
+struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,
+ sector_t sector, bool previous, bool noblock, bool noquiesce)
+{
+ return __raid5_get_active_stripe(conf, NULL, sector, previous, noblock,
+ noquiesce);
+}
+
static bool is_full_stripe_write(struct stripe_head *sh)
{
BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
@@ -2887,10 +2952,10 @@ static void raid5_end_write_request(struct bio *bi)
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
- raid5_release_stripe(sh);
if (sh->batch_head && sh != sh->batch_head)
raid5_release_stripe(sh->batch_head);
+ raid5_release_stripe(sh);
}
static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
@@ -5856,33 +5921,6 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf,
return ret;
}
-enum stripe_result {
- STRIPE_SUCCESS = 0,
- STRIPE_RETRY,
- STRIPE_SCHEDULE_AND_RETRY,
- STRIPE_FAIL,
-};
-
-struct stripe_request_ctx {
- /* a reference to the last stripe_head for batching */
- struct stripe_head *batch_last;
-
- /* first sector in the request */
- sector_t first_sector;
-
- /* last sector in the request */
- sector_t last_sector;
-
- /*
- * bitmap to track stripe sectors that have been added to stripes
- * add one to account for unaligned requests
- */
- DECLARE_BITMAP(sectors_to_do, RAID5_MAX_REQ_STRIPES + 1);
-
- /* the request had REQ_PREFLUSH, cleared after the first stripe_head */
- bool do_flush;
-};
-
static int add_all_stripe_bios(struct r5conf *conf,
struct stripe_request_ctx *ctx, struct stripe_head *sh,
struct bio *bi, int forwrite, int previous)
@@ -5974,8 +6012,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
pr_debug("raid456: %s, sector %llu logical %llu\n", __func__,
new_sector, logical_sector);
- sh = raid5_get_active_stripe(conf, new_sector, previous,
- (bi->bi_opf & REQ_RAHEAD), 0);
+ sh = __raid5_get_active_stripe(conf, ctx, new_sector, previous,
+ (bi->bi_opf & REQ_RAHEAD), 0);
if (unlikely(!sh)) {
/* cannot get stripe, just give-up */
bi->bi_status = BLK_STS_IOERR;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 638d29863503b8..a5082bed83c863 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -812,7 +812,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
struct stripe_head *sh);
extern struct stripe_head *
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
- int previous, int noblock, int noquiesce);
+ bool previous, bool noblock, bool noquiesce);
extern int raid5_calc_degraded(struct r5conf *conf);
extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode);
#endif