From: Neil Brown I've made a bunch of changes to the 'md' bits - largely moving the unplugging into the individual personalities which know more about which drives are actually in use. --- 25-akpm/drivers/md/linear.c | 15 ++++++++++ 25-akpm/drivers/md/md.c | 35 +++-------------------- 25-akpm/drivers/md/multipath.c | 23 +++++++++++++++ 25-akpm/drivers/md/raid0.c | 17 +++++++++++ 25-akpm/drivers/md/raid1.c | 56 +++++++++++++++++++++++++++++++------- 25-akpm/drivers/md/raid5.c | 36 +++++++++++++++++++++--- 25-akpm/drivers/md/raid6main.c | 36 ++++++++++++++++++++++-- 25-akpm/include/linux/raid/md_k.h | 7 ++-- 8 files changed, 175 insertions(+), 50 deletions(-) diff -puN drivers/md/linear.c~md-unplug-update drivers/md/linear.c --- 25/drivers/md/linear.c~md-unplug-update 2004-04-06 19:04:25.157284248 -0700 +++ 25-akpm/drivers/md/linear.c 2004-04-06 19:04:25.172281968 -0700 @@ -80,6 +80,20 @@ static int linear_mergeable_bvec(request return maxsectors << 9; } +static void linear_unplug(request_queue_t *q) +{ + mddev_t *mddev = q->queuedata; + linear_conf_t *conf = mddev_to_conf(mddev); + int i; + + for (i=0; i < mddev->raid_disks; i++) { + request_queue_t *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); + if (r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } +} + + static int linear_run (mddev_t *mddev) { linear_conf_t *conf; @@ -185,6 +199,7 @@ static int linear_run (mddev_t *mddev) BUG(); blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); + mddev->queue->unplug_fn = linear_unplug; return 0; out: diff -puN drivers/md/md.c~md-unplug-update drivers/md/md.c --- 25/drivers/md/md.c~md-unplug-update 2004-04-06 19:04:25.159283944 -0700 +++ 25-akpm/drivers/md/md.c 2004-04-06 19:04:25.174281664 -0700 @@ -160,30 +160,6 @@ static int md_fail_request (request_queu return 0; } -void md_unplug_mddev(mddev_t *mddev) -{ - struct list_head *tmp; - mdk_rdev_t *rdev; - - /* - * this list iteration is done without any locking in md?! - */ - ITERATE_RDEV(mddev, rdev, tmp) { - request_queue_t *r_queue = bdev_get_queue(rdev->bdev); - - if (r_queue->unplug_fn) - r_queue->unplug_fn(r_queue); - } -} -EXPORT_SYMBOL(md_unplug_mddev); - -static void md_unplug_all(request_queue_t *q) -{ - mddev_t *mddev = q->queuedata; - - md_unplug_mddev(mddev); -} - static inline mddev_t *mddev_get(mddev_t *mddev) { atomic_inc(&mddev->active); @@ -1669,7 +1645,6 @@ static int do_md_run(mddev_t * mddev) */ mddev->queue->queuedata = mddev; mddev->queue->make_request_fn = mddev->pers->make_request; - mddev->queue->unplug_fn = md_unplug_all; mddev->changed = 1; return 0; @@ -2742,10 +2717,9 @@ int md_thread(void * arg) clear_bit(THREAD_WAKEUP, &thread->flags); run = thread->run; - if (run) { + if (run) run(thread->mddev); - md_unplug_mddev(thread->mddev); - } + if (signal_pending(current)) flush_signals(current); } @@ -3313,8 +3287,6 @@ static void md_do_sync(mddev_t *mddev) test_bit(MD_RECOVERY_ERR, &mddev->recovery)) break; - md_unplug_mddev(mddev); - repeat: if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) { /* step marks */ @@ -3347,6 +3319,7 @@ static void md_do_sync(mddev_t *mddev) * about not overloading the IO subsystem. (things like an * e2fsck being done on the RAID array should execute fast) */ + mddev->queue->unplug_fn(mddev->queue); cond_resched(); currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1; @@ -3365,6 +3338,8 @@ static void md_do_sync(mddev_t *mddev) * this also signals 'finished resyncing' to md_stop */ out: + mddev->queue->unplug_fn(mddev->queue); + wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); /* tell personality that we are finished */ diff -puN drivers/md/multipath.c~md-unplug-update drivers/md/multipath.c --- 25/drivers/md/multipath.c~md-unplug-update 2004-04-06 19:04:25.160283792 -0700 +++ 25-akpm/drivers/md/multipath.c 2004-04-06 19:04:25.175281512 -0700 @@ -155,6 +155,27 @@ static int multipath_read_balance (multi return 0; } +static void unplug_slaves(mddev_t *mddev) +{ + multipath_conf_t *conf = mddev_to_conf(mddev); + int i; + + for (i=0; iraid_disks; i++) { + mdk_rdev_t *rdev = conf->multipaths[i].rdev; + if (rdev && !rdev->faulty) { + request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + + if (r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } + } +} +static void multipath_unplug(request_queue_t *q) +{ + unplug_slaves(q->queuedata); +} + + static int multipath_make_request (request_queue_t *q, struct bio * bio) { mddev_t *mddev = q->queuedata; @@ -419,6 +440,8 @@ static int multipath_run (mddev_t *mddev } memset(conf->multipaths, 0, sizeof(struct multipath_info)*mddev->raid_disks); + mddev->queue->unplug_fn = multipath_unplug; + conf->working_disks = 0; ITERATE_RDEV(mddev,rdev,tmp) { disk_idx = rdev->raid_disk; diff -puN drivers/md/raid0.c~md-unplug-update drivers/md/raid0.c --- 25/drivers/md/raid0.c~md-unplug-update 2004-04-06 19:04:25.162283488 -0700 +++ 25-akpm/drivers/md/raid0.c 2004-04-06 19:04:25.176281360 -0700 @@ -25,6 +25,21 @@ #define MD_DRIVER #define MD_PERSONALITY +static void raid0_unplug(request_queue_t *q) +{ + mddev_t *mddev = q->queuedata; + raid0_conf_t *conf = mddev_to_conf(mddev); + mdk_rdev_t **devlist = conf->strip_zone[0].dev; + int i; + + for (i=0; iraid_disks; i++) { + request_queue_t *r_queue = bdev_get_queue(devlist[i]->bdev); + + if (r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } +} + static int create_strip_zones (mddev_t *mddev) { int i, c, j; @@ -202,6 +217,8 @@ static int create_strip_zones (mddev_t * conf->hash_spacing = sz; } + mddev->queue->unplug_fn = raid0_unplug; + printk("raid0: done.\n"); return 0; abort: diff -puN drivers/md/raid1.c~md-unplug-update drivers/md/raid1.c --- 25/drivers/md/raid1.c~md-unplug-update 2004-04-06 19:04:25.164283184 -0700 +++ 25-akpm/drivers/md/raid1.c 2004-04-06 19:04:25.178281056 -0700 @@ -37,6 +37,9 @@ static mdk_personality_t raid1_personali static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED; static LIST_HEAD(retry_list_head); +static void unplug_slaves(mddev_t *mddev); + + static void * r1bio_pool_alloc(int gfp_flags, void *data) { mddev_t *mddev = data; @@ -47,6 +50,8 @@ static void * r1bio_pool_alloc(int gfp_f gfp_flags); if (r1_bio) memset(r1_bio, 0, sizeof(*r1_bio) + sizeof(struct bio*)*mddev->raid_disks); + else + unplug_slaves(mddev); return r1_bio; } @@ -71,8 +76,10 @@ static void * r1buf_pool_alloc(int gfp_f int i, j; r1_bio = r1bio_pool_alloc(gfp_flags, conf->mddev); - if (!r1_bio) + if (!r1_bio) { + unplug_slaves(conf->mddev); return NULL; + } /* * Allocate bios : 1 for reading, n-1 for writing @@ -443,6 +450,29 @@ rb_out: return new_disk; } +static void unplug_slaves(mddev_t *mddev) +{ + conf_t *conf = mddev_to_conf(mddev); + int i; + unsigned long flags; + + spin_lock_irqsave(&conf->device_lock, flags); + for (i=0; iraid_disks; i++) { + mdk_rdev_t *rdev = conf->mirrors[i].rdev; + if (rdev && !rdev->faulty) { + request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + + if (r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } + } + spin_unlock_irqrestore(&conf->device_lock, flags); +} +static void raid1_unplug(request_queue_t *q) +{ + unplug_slaves(q->queuedata); +} + /* * Throttle resync depth, so that we can both get proper overlapping of * requests, but are still able to handle normal requests quickly. @@ -451,16 +481,18 @@ rb_out: static void device_barrier(conf_t *conf, sector_t sect) { - md_unplug_mddev(conf->mddev); spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock); + wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), + conf->resync_lock, unplug_slaves(conf->mddev)); if (!conf->barrier++) { - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, conf->resync_lock); + wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, + conf->resync_lock, unplug_slaves(conf->mddev)); if (conf->nr_pending) BUG(); } - wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, conf->resync_lock); + wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, + conf->resync_lock, unplug_slaves(conf->mddev)); conf->next_resync = sect; spin_unlock_irq(&conf->resync_lock); } @@ -479,9 +511,8 @@ static int make_request(request_queue_t * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ - md_unplug_mddev(conf->mddev); spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock); + wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); conf->nr_pending++; spin_unlock_irq(&conf->resync_lock); @@ -646,9 +677,9 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { - md_unplug_mddev(conf->mddev); spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock); + wait_event_lock_irq(conf->wait_resume, !conf->barrier, + conf->resync_lock, unplug_slaves(conf->mddev)); spin_unlock_irq(&conf->resync_lock); if (conf->barrier) BUG(); @@ -862,6 +893,7 @@ static void raid1d(mddev_t *mddev) struct bio *bio; unsigned long flags; conf_t *conf = mddev_to_conf(mddev); + int unplug=0; mdk_rdev_t *rdev; md_check_recovery(mddev); @@ -881,6 +913,7 @@ static void raid1d(mddev_t *mddev) bio = r1_bio->master_bio; if (test_bit(R1BIO_IsSync, &r1_bio->state)) { sync_request_write(mddev, r1_bio); + unplug = 1; } else { if (map(mddev, &rdev) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" @@ -896,12 +929,14 @@ static void raid1d(mddev_t *mddev) bio->bi_bdev = rdev->bdev; bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_rw = READ; - + unplug = 1; generic_make_request(bio); } } } spin_unlock_irqrestore(&retry_list_lock, flags); + if (unplug) + unplug_slaves(mddev); } @@ -1104,6 +1139,7 @@ static int run(mddev_t *mddev) mdname(mddev)); goto out_free_conf; } + mddev->queue->unplug_fn = raid1_unplug; ITERATE_RDEV(mddev, rdev, tmp) { diff -puN drivers/md/raid5.c~md-unplug-update drivers/md/raid5.c --- 25/drivers/md/raid5.c~md-unplug-update 2004-04-06 19:04:25.166282880 -0700 +++ 25-akpm/drivers/md/raid5.c 2004-04-06 19:04:25.179280904 -0700 @@ -231,6 +231,8 @@ static struct stripe_head *__find_stripe return NULL; } +static void unplug_slaves(mddev_t *mddev); + static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int pd_idx, int noblock) { @@ -249,12 +251,13 @@ static struct stripe_head *get_active_st break; if (!sh) { conf->inactive_blocked = 1; - md_unplug_mddev(conf->mddev); wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list) && (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) || !conf->inactive_blocked), - conf->device_lock); + conf->device_lock, + unplug_slaves(conf->mddev); + ); conf->inactive_blocked = 0; } else init_stripe(sh, sector, pd_idx); @@ -1293,6 +1296,25 @@ static inline void raid5_activate_delaye } } } + +static void unplug_slaves(mddev_t *mddev) +{ + raid5_conf_t *conf = mddev_to_conf(mddev); + int i; + + for (i=0; iraid_disks; i++) { + mdk_rdev_t *rdev = conf->disks[i].rdev; + if (rdev && !rdev->faulty) { + struct block_device *bdev = rdev->bdev; + if (bdev) { + request_queue_t *r_queue = bdev_get_queue(bdev); + if (r_queue && r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } + } + } +} + static void raid5_unplug_device(request_queue_t *q) { mddev_t *mddev = q->queuedata; @@ -1306,6 +1328,8 @@ static void raid5_unplug_device(request_ md_wakeup_thread(mddev->thread); spin_unlock_irqrestore(&conf->device_lock, flags); + + unplug_slaves(mddev); } static inline void raid5_plug_device(raid5_conf_t *conf) @@ -1392,9 +1416,11 @@ static int sync_request (mddev_t *mddev, int raid_disks = conf->raid_disks; int data_disks = raid_disks-1; - if (sector_nr >= mddev->size <<1) - /* just being told to finish up .. nothing to do */ + if (sector_nr >= mddev->size <<1) { + /* just being told to finish up .. nothing much to do */ + unplug_slaves(mddev); return 0; + } x = sector_nr; chunk_offset = sector_div(x, sectors_per_chunk); @@ -1474,6 +1500,8 @@ static void raid5d (mddev_t *mddev) spin_unlock_irq(&conf->device_lock); + unplug_slaves(mddev); + PRINTK("--- raid5d inactive\n"); } diff -puN drivers/md/raid6main.c~md-unplug-update drivers/md/raid6main.c --- 25/drivers/md/raid6main.c~md-unplug-update 2004-04-06 19:04:25.167282728 -0700 +++ 25-akpm/drivers/md/raid6main.c 2004-04-06 19:04:25.181280600 -0700 @@ -250,6 +250,8 @@ static struct stripe_head *__find_stripe return NULL; } +static void unplug_slaves(mddev_t *mddev); + static struct stripe_head *get_active_stripe(raid6_conf_t *conf, sector_t sector, int pd_idx, int noblock) { @@ -272,7 +274,9 @@ static struct stripe_head *get_active_st !list_empty(&conf->inactive_list) && (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) || !conf->inactive_blocked), - conf->device_lock); + conf->device_lock, + unplug_slaves(conf->mddev); + ); conf->inactive_blocked = 0; } else init_stripe(sh, sector, pd_idx); @@ -1454,6 +1458,26 @@ static inline void raid6_activate_delaye } } } + +static void unplug_slaves(mddev_t *mddev) +{ + /* note: this is always called with device_lock held */ + raid6_conf_t *conf = mddev_to_conf(mddev); + int i; + + for (i=0; iraid_disks; i++) { + mdk_rdev_t *rdev = conf->disks[i].rdev; + if (rdev && !rdev->faulty) { + struct block_device *bdev = rdev->bdev; + if (bdev) { + request_queue_t *r_queue = bdev_get_queue(bdev); + if (r_queue && r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } + } + } +} + static void raid6_unplug_device(request_queue_t *q) { mddev_t *mddev = q->queuedata; @@ -1467,6 +1491,8 @@ static void raid6_unplug_device(request_ md_wakeup_thread(mddev->thread); spin_unlock_irqrestore(&conf->device_lock, flags); + + unplug_slaves(mddev); } static inline void raid6_plug_device(raid6_conf_t *conf) @@ -1553,9 +1579,11 @@ static int sync_request (mddev_t *mddev, int raid_disks = conf->raid_disks; int data_disks = raid_disks - 2; - if (sector_nr >= mddev->size <<1) - /* just being told to finish up .. nothing to do */ + if (sector_nr >= mddev->size <<1) { + /* just being told to finish up .. nothing much to do */ + unplug_slaves(mddev); return 0; + } x = sector_nr; chunk_offset = sector_div(x, sectors_per_chunk); @@ -1635,6 +1663,8 @@ static void raid6d (mddev_t *mddev) spin_unlock_irq(&conf->device_lock); + unplug_slaves(mddev); + PRINTK("--- raid6d inactive\n"); } diff -puN include/linux/raid/md_k.h~md-unplug-update include/linux/raid/md_k.h --- 25/include/linux/raid/md_k.h~md-unplug-update 2004-04-06 19:04:25.168282576 -0700 +++ 25-akpm/include/linux/raid/md_k.h 2004-04-06 19:04:25.182280448 -0700 @@ -315,7 +315,7 @@ typedef struct mdk_thread_s { #define THREAD_WAKEUP 0 -#define __wait_event_lock_irq(wq, condition, lock) \ +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ do { \ wait_queue_t __wait; \ init_waitqueue_entry(&__wait, current); \ @@ -326,6 +326,7 @@ do { \ if (condition) \ break; \ spin_unlock_irq(&lock); \ + cmd; \ schedule(); \ spin_lock_irq(&lock); \ } \ @@ -333,11 +334,11 @@ do { \ remove_wait_queue(&wq, &__wait); \ } while (0) -#define wait_event_lock_irq(wq, condition, lock) \ +#define wait_event_lock_irq(wq, condition, lock, cmd) \ do { \ if (condition) \ break; \ - __wait_event_lock_irq(wq, condition, lock); \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ } while (0) #endif _