From: Jens Axboe --- 25-akpm/drivers/block/ll_rw_blk.c | 101 ++++++++++++----------------------- 25-akpm/drivers/block/loop.c | 15 ++++- 25-akpm/drivers/block/umem.c | 3 - 25-akpm/drivers/md/dm-crypt.c | 2 25-akpm/drivers/md/dm-table.c | 18 ++++++ 25-akpm/drivers/md/dm.c | 24 +++++++- 25-akpm/drivers/md/dm.h | 1 25-akpm/drivers/md/md.c | 30 +++++++++- 25-akpm/drivers/md/raid1.c | 3 + 25-akpm/drivers/md/raid5.c | 4 - 25-akpm/drivers/md/raid6main.c | 3 - 25-akpm/drivers/mtd/devices/blkmtd.c | 6 -- 25-akpm/fs/buffer.c | 5 - 25-akpm/fs/direct-io.c | 4 - 25-akpm/fs/jfs/jfs_logmgr.c | 6 -- 25-akpm/fs/ntfs/compress.c | 3 - 25-akpm/fs/ufs/truncate.c | 3 - 25-akpm/fs/xfs/linux/xfs_buf.c | 24 ++------ 25-akpm/include/linux/backing-dev.h | 2 25-akpm/include/linux/bio.h | 2 25-akpm/include/linux/blkdev.h | 23 +++++-- 25-akpm/include/linux/fs.h | 2 25-akpm/include/linux/raid/md_k.h | 26 --------- 25-akpm/kernel/power/disk.c | 1 25-akpm/kernel/power/pmdisk.c | 3 - 25-akpm/kernel/power/swsusp.c | 5 - 25-akpm/mm/mempool.c | 2 25-akpm/mm/readahead.c | 5 + 28 files changed, 172 insertions(+), 154 deletions(-) diff -puN drivers/block/ll_rw_blk.c~per-backing_dev-unplugging drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~per-backing_dev-unplugging 2004-03-14 13:55:17.301447008 -0800 +++ 25-akpm/drivers/block/ll_rw_blk.c 2004-03-14 13:55:17.343440624 -0800 @@ -42,12 +42,6 @@ static void blk_unplug_timeout(unsigned */ static kmem_cache_t *request_cachep; -/* - * plug management - */ -static LIST_HEAD(blk_plug_list); -static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; - static wait_queue_head_t congestion_wqh[2]; /* @@ -234,9 +228,13 @@ void blk_queue_make_request(request_queu blk_queue_dma_alignment(q, 511); q->unplug_thresh = 4; /* hmm */ +#if 0 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ if (q->unplug_delay == 0) q->unplug_delay = 1; +#else + q->unplug_delay = HZ; +#endif INIT_WORK(&q->unplug_work, blk_unplug_work, q); @@ -248,8 +246,6 @@ void blk_queue_make_request(request_queu */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); - INIT_LIST_HEAD(&q->plug_list); - blk_queue_activity_fn(q, NULL, NULL); } @@ -1101,13 +1097,11 @@ void blk_plug_device(request_queue_t *q) * don't plug a stopped queue, it must be paired with blk_start_queue() * which will restart the queueing */ - if (!blk_queue_plugged(q) - && !test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) { - spin_lock(&blk_plug_lock); - list_add_tail(&q->plug_list, &blk_plug_list); + if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) + return; + + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - spin_unlock(&blk_plug_lock); - } } EXPORT_SYMBOL(blk_plug_device); @@ -1119,15 +1113,12 @@ EXPORT_SYMBOL(blk_plug_device); int blk_remove_plug(request_queue_t *q) { WARN_ON(!irqs_disabled()); - if (blk_queue_plugged(q)) { - spin_lock(&blk_plug_lock); - list_del_init(&q->plug_list); - del_timer(&q->unplug_timer); - spin_unlock(&blk_plug_lock); - return 1; - } - return 0; + if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + return 0; + + del_timer(&q->unplug_timer); + return 1; } EXPORT_SYMBOL(blk_remove_plug); @@ -1158,14 +1149,11 @@ static inline void __generic_unplug_devi * Linux uses plugging to build bigger requests queues before letting * the device have at them. If a queue is plugged, the I/O scheduler * is still adding and merging requests on the queue. Once the queue - * gets unplugged (either by manually calling this function, or by - * calling blk_run_queues()), the request_fn defined for the - * queue is invoked and transfers started. + * gets unplugged, the request_fn defined for the queue is invoked and + * transfers started. **/ -void generic_unplug_device(void *data) +void generic_unplug_device(request_queue_t *q) { - request_queue_t *q = data; - spin_lock_irq(q->queue_lock); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -1173,9 +1161,23 @@ void generic_unplug_device(void *data) EXPORT_SYMBOL(generic_unplug_device); +static inline void blk_backing_dev_unplug(struct backing_dev_info *bdi) +{ + request_queue_t *q = bdi->unplug_io_data; + + /* + * devices don't necessarily have an ->unplug_fn defined + */ + if (q->unplug_fn) + q->unplug_fn(q); +} + +EXPORT_SYMBOL(blk_backing_dev_unplug); + static void blk_unplug_work(void *data) { request_queue_t *q = data; + q->unplug_fn(q); } @@ -1253,42 +1255,6 @@ void blk_run_queue(struct request_queue EXPORT_SYMBOL(blk_run_queue); /** - * blk_run_queues - fire all plugged queues - * - * Description: - * Start I/O on all plugged queues known to the block layer. Queues that - * are currently stopped are ignored. This is equivalent to the older - * tq_disk task queue run. - **/ -#define blk_plug_entry(entry) list_entry((entry), request_queue_t, plug_list) -void blk_run_queues(void) -{ - LIST_HEAD(local_plug_list); - - spin_lock_irq(&blk_plug_lock); - - /* - * this will happen fairly often - */ - if (list_empty(&blk_plug_list)) - goto out; - - list_splice_init(&blk_plug_list, &local_plug_list); - - while (!list_empty(&local_plug_list)) { - request_queue_t *q = blk_plug_entry(local_plug_list.next); - - spin_unlock_irq(&blk_plug_lock); - q->unplug_fn(q); - spin_lock_irq(&blk_plug_lock); - } -out: - spin_unlock_irq(&blk_plug_lock); -} - -EXPORT_SYMBOL(blk_run_queues); - -/** * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed * @q: the request queue to be released * @@ -1393,6 +1359,10 @@ request_queue_t *blk_alloc_queue(int gfp memset(q, 0, sizeof(*q)); init_timer(&q->unplug_timer); atomic_set(&q->refcnt, 1); + + q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; + q->backing_dev_info.unplug_io_data = q; + return q; } @@ -2053,7 +2023,6 @@ long blk_congestion_wait(int rw, long ti DEFINE_WAIT(wait); wait_queue_head_t *wqh = &congestion_wqh[rw]; - blk_run_queues(); prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = io_schedule_timeout(timeout); finish_wait(wqh, &wait); @@ -2309,7 +2278,7 @@ out: if (blk_queue_plugged(q)) { int nr_queued = q->rq.count[READ] + q->rq.count[WRITE]; - if (nr_queued == q->unplug_thresh) + if (nr_queued == q->unplug_thresh || bio_sync(bio)) __generic_unplug_device(q); } spin_unlock_irq(q->queue_lock); diff -puN drivers/block/loop.c~per-backing_dev-unplugging drivers/block/loop.c --- 25/drivers/block/loop.c~per-backing_dev-unplugging 2004-03-14 13:55:17.303446704 -0800 +++ 25-akpm/drivers/block/loop.c 2004-03-14 13:55:17.345440320 -0800 @@ -434,6 +434,17 @@ inactive: goto out; } +/* + * kick off io on the underlying address space + */ +static void loop_unplug(request_queue_t *q) +{ + struct loop_device *lo = q->queuedata; + + clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + blk_run_address_space(lo->lo_backing_file->f_mapping); +} + struct switch_request { struct file *file; struct completion wait; @@ -614,7 +625,6 @@ static int loop_set_fd(struct loop_devic { struct file *file; struct inode *inode; - struct block_device *lo_device = NULL; struct address_space *mapping; unsigned lo_blocksize; int lo_flags = 0; @@ -671,7 +681,7 @@ static int loop_set_fd(struct loop_devic set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->lo_blocksize = lo_blocksize; - lo->lo_device = lo_device; + lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; lo->transfer = NULL; @@ -689,6 +699,7 @@ static int loop_set_fd(struct loop_devic */ blk_queue_make_request(lo->lo_queue, loop_make_request); lo->lo_queue->queuedata = lo; + lo->lo_queue->unplug_fn = loop_unplug; set_capacity(disks[lo->lo_number], size); diff -puN drivers/block/umem.c~per-backing_dev-unplugging drivers/block/umem.c --- 25/drivers/block/umem.c~per-backing_dev-unplugging 2004-03-14 13:55:17.304446552 -0800 +++ 25-akpm/drivers/block/umem.c 2004-03-14 13:55:17.346440168 -0800 @@ -368,9 +368,8 @@ static inline void reset_page(struct mm_ page->biotail = & page->bio; } -static void mm_unplug_device(void *data) +static void mm_unplug_device(request_queue_t *q) { - request_queue_t *q = data; struct cardinfo *card = q->queuedata; unsigned long flags; diff -puN drivers/md/dm.c~per-backing_dev-unplugging drivers/md/dm.c --- 25/drivers/md/dm.c~per-backing_dev-unplugging 2004-03-14 13:55:17.306446248 -0800 +++ 25-akpm/drivers/md/dm.c 2004-03-14 13:55:17.346440168 -0800 @@ -575,6 +575,18 @@ static int dm_request(request_queue_t *q return 0; } +static void dm_unplug_all(request_queue_t *q) +{ + struct mapped_device *md = q->queuedata; + struct dm_table *map = dm_get_table(md); + + clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + if (map) { + dm_table_unplug_all(map); + dm_table_put(map); + } +} + static int dm_any_congested(void *congested_data, int bdi_bits) { int r; @@ -672,6 +684,7 @@ static struct mapped_device *alloc_dev(u md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_data = md; blk_queue_make_request(md->queue, dm_request); + md->queue->unplug_fn = dm_unplug_all; md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, mempool_free_slab, _io_cache); @@ -896,11 +909,17 @@ int dm_suspend(struct mapped_device *md) add_wait_queue(&md->wait, &wait); up_write(&md->lock); + /* unplug */ + map = dm_get_table(md); + if (map) { + dm_table_unplug_all(map); + dm_table_put(map); + } + /* * Then we wait for the already mapped ios to * complete. */ - blk_run_queues(); while (1) { set_current_state(TASK_INTERRUPTIBLE); @@ -945,10 +964,9 @@ int dm_resume(struct mapped_device *md) def = bio_list_get(&md->deferred); __flush_deferred_io(md, def); up_write(&md->lock); + dm_table_unplug_all(md->map); dm_table_put(map); - blk_run_queues(); - return 0; } diff -puN drivers/md/dm-crypt.c~per-backing_dev-unplugging drivers/md/dm-crypt.c --- 25/drivers/md/dm-crypt.c~per-backing_dev-unplugging 2004-03-14 13:55:17.308445944 -0800 +++ 25-akpm/drivers/md/dm-crypt.c 2004-03-14 13:55:17.347440016 -0800 @@ -668,7 +668,7 @@ static int crypt_map(struct dm_target *t /* out of memory -> run queues */ if (remaining) - blk_run_queues(); + blk_congestion_wait(bio_data_dir(clone), HZ/100); } /* drop reference, clones could have returned before we reach this */ diff -puN drivers/md/dm.h~per-backing_dev-unplugging drivers/md/dm.h --- 25/drivers/md/dm.h~per-backing_dev-unplugging 2004-03-14 13:55:17.309445792 -0800 +++ 25-akpm/drivers/md/dm.h 2004-03-14 13:55:17.348439864 -0800 @@ -116,6 +116,7 @@ int dm_table_get_mode(struct dm_table *t void dm_table_suspend_targets(struct dm_table *t); void dm_table_resume_targets(struct dm_table *t); int dm_table_any_congested(struct dm_table *t, int bdi_bits); +void dm_table_unplug_all(struct dm_table *t); /*----------------------------------------------------------------- * A registry of target types. diff -puN drivers/md/dm-table.c~per-backing_dev-unplugging drivers/md/dm-table.c --- 25/drivers/md/dm-table.c~per-backing_dev-unplugging 2004-03-14 13:55:17.311445488 -0800 +++ 25-akpm/drivers/md/dm-table.c 2004-03-14 13:55:17.348439864 -0800 @@ -885,8 +885,26 @@ int dm_table_any_congested(struct dm_tab return r; } +void dm_table_unplug_all(struct dm_table *t) +{ + struct list_head *d, *devices = dm_table_get_devices(t); + + for (d = devices->next; d != devices; d = d->next) { + struct dm_dev *dd = list_entry(d, struct dm_dev, list); + request_queue_t *q = bdev_get_queue(dd->bdev); + + if (q->unplug_fn) { + set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + q->unplug_fn(q); + } + } +} + EXPORT_SYMBOL(dm_vcalloc); EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_put_device); EXPORT_SYMBOL(dm_table_event); EXPORT_SYMBOL(dm_table_get_mode); +EXPORT_SYMBOL(dm_table_put); +EXPORT_SYMBOL(dm_table_get); +EXPORT_SYMBOL(dm_table_unplug_all); diff -puN drivers/md/md.c~per-backing_dev-unplugging drivers/md/md.c --- 25/drivers/md/md.c~per-backing_dev-unplugging 2004-03-14 13:55:17.312445336 -0800 +++ 25-akpm/drivers/md/md.c 2004-03-14 13:55:17.351439408 -0800 @@ -160,6 +160,28 @@ static int md_fail_request (request_queu return 0; } +static void md_unplug_all(request_queue_t *q) +{ + mddev_t *mddev = q->queuedata; + struct list_head *tmp; + mdk_rdev_t *rdev; + + clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + + /* + * this list iteration is done without any locking in md?! + */ + ITERATE_RDEV(mddev, rdev, tmp) { + request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + + if (r_queue->unplug_fn) { + set_bit(QUEUE_FLAG_PLUGGED, &r_queue->queue_flags); + r_queue->unplug_fn(r_queue); + } + } + +} + static inline mddev_t *mddev_get(mddev_t *mddev) { atomic_inc(&mddev->active); @@ -335,6 +357,8 @@ static int sync_page_io(struct block_dev struct bio_vec vec; struct completion event; + rw |= (1 << BIO_RW_SYNC); + bio_init(&bio); bio.bi_io_vec = &vec; vec.bv_page = page; @@ -349,7 +373,6 @@ static int sync_page_io(struct block_dev bio.bi_private = &event; bio.bi_end_io = bi_complete; submit_bio(rw, &bio); - blk_run_queues(); wait_for_completion(&event); return test_bit(BIO_UPTODATE, &bio.bi_flags); @@ -1644,6 +1667,7 @@ static int do_md_run(mddev_t * mddev) */ mddev->queue->queuedata = mddev; mddev->queue->make_request_fn = mddev->pers->make_request; + mddev->queue->unplug_fn = md_unplug_all; mddev->changed = 1; return 0; @@ -2718,7 +2742,7 @@ int md_thread(void * arg) run = thread->run; if (run) { run(thread->mddev); - blk_run_queues(); + blk_run_queue(thread->mddev->queue); } if (signal_pending(current)) flush_signals(current); @@ -3286,7 +3310,7 @@ static void md_do_sync(mddev_t *mddev) test_bit(MD_RECOVERY_ERR, &mddev->recovery)) break; - blk_run_queues(); + blk_run_queue(mddev->queue); repeat: if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) { diff -puN drivers/md/raid1.c~per-backing_dev-unplugging drivers/md/raid1.c --- 25/drivers/md/raid1.c~per-backing_dev-unplugging 2004-03-14 13:55:17.314445032 -0800 +++ 25-akpm/drivers/md/raid1.c 2004-03-14 13:55:17.352439256 -0800 @@ -451,6 +451,7 @@ rb_out: static void device_barrier(conf_t *conf, sector_t sect) { + blk_run_queue(conf->mddev->queue); spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock); @@ -478,6 +479,7 @@ static int make_request(request_queue_t * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ + blk_run_queue(conf->mddev->queue); spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock); conf->nr_pending++; @@ -644,6 +646,7 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { + blk_run_queue(conf->mddev->queue); spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock); spin_unlock_irq(&conf->resync_lock); diff -puN drivers/md/raid5.c~per-backing_dev-unplugging drivers/md/raid5.c --- 25/drivers/md/raid5.c~per-backing_dev-unplugging 2004-03-14 13:55:17.315444880 -0800 +++ 25-akpm/drivers/md/raid5.c 2004-03-14 13:55:17.353439104 -0800 @@ -249,6 +249,7 @@ static struct stripe_head *get_active_st break; if (!sh) { conf->inactive_blocked = 1; + blk_run_queue(conf->mddev->queue); wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list) && (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) @@ -1292,9 +1293,8 @@ static inline void raid5_activate_delaye } } } -static void raid5_unplug_device(void *data) +static void raid5_unplug_device(request_queue_t *q) { - request_queue_t *q = data; mddev_t *mddev = q->queuedata; raid5_conf_t *conf = mddev_to_conf(mddev); unsigned long flags; diff -puN drivers/md/raid6main.c~per-backing_dev-unplugging drivers/md/raid6main.c --- 25/drivers/md/raid6main.c~per-backing_dev-unplugging 2004-03-14 13:55:17.316444728 -0800 +++ 25-akpm/drivers/md/raid6main.c 2004-03-14 13:55:17.355438800 -0800 @@ -1454,9 +1454,8 @@ static inline void raid6_activate_delaye } } } -static void raid6_unplug_device(void *data) +static void raid6_unplug_device(request_queue_t *q) { - request_queue_t *q = data; mddev_t *mddev = q->queuedata; raid6_conf_t *conf = mddev_to_conf(mddev); unsigned long flags; diff -puN drivers/mtd/devices/blkmtd.c~per-backing_dev-unplugging drivers/mtd/devices/blkmtd.c --- 25/drivers/mtd/devices/blkmtd.c~per-backing_dev-unplugging 2004-03-14 13:55:17.318444424 -0800 +++ 25-akpm/drivers/mtd/devices/blkmtd.c 2004-03-14 13:55:17.356438648 -0800 @@ -147,8 +147,7 @@ static int blkmtd_readpage(struct blkmtd bio->bi_private = &event; bio->bi_end_io = bi_read_complete; if(bio_add_page(bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { - submit_bio(READ, bio); - blk_run_queues(); + submit_bio(READ_SYNC, bio); wait_for_completion(&event); err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO; bio_put(bio); @@ -179,8 +178,7 @@ static int blkmtd_write_out(struct bio * init_completion(&event); bio->bi_private = &event; bio->bi_end_io = bi_write_complete; - submit_bio(WRITE, bio); - blk_run_queues(); + submit_bio(WRITE_SYNC, bio); wait_for_completion(&event); DEBUG(3, "submit_bio completed, bi_vcnt = %d\n", bio->bi_vcnt); err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO; diff -puN fs/buffer.c~per-backing_dev-unplugging fs/buffer.c --- 25/fs/buffer.c~per-backing_dev-unplugging 2004-03-14 13:55:17.319444272 -0800 +++ 25-akpm/fs/buffer.c 2004-03-14 13:55:17.358438344 -0800 @@ -132,7 +132,7 @@ void __wait_on_buffer(struct buffer_head do { prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); if (buffer_locked(bh)) { - blk_run_queues(); + blk_run_address_space(bh->b_bdev->bd_inode->i_mapping); io_schedule(); } } while (buffer_locked(bh)); @@ -491,7 +491,6 @@ static void free_more_memory(void) pg_data_t *pgdat; wakeup_bdflush(1024); - blk_run_queues(); yield(); for_each_pgdat(pgdat) { @@ -2929,7 +2928,7 @@ EXPORT_SYMBOL(try_to_free_buffers); int block_sync_page(struct page *page) { - blk_run_queues(); + blk_run_address_space(page->mapping); return 0; } diff -puN fs/direct-io.c~per-backing_dev-unplugging fs/direct-io.c --- 25/fs/direct-io.c~per-backing_dev-unplugging 2004-03-14 13:55:17.321443968 -0800 +++ 25-akpm/fs/direct-io.c 2004-03-14 13:55:17.359438192 -0800 @@ -329,7 +329,7 @@ static struct bio *dio_await_one(struct if (dio->bio_list == NULL) { dio->waiter = current; spin_unlock_irqrestore(&dio->bio_list_lock, flags); - blk_run_queues(); + blk_run_address_space(dio->inode->i_mapping); io_schedule(); spin_lock_irqsave(&dio->bio_list_lock, flags); dio->waiter = NULL; @@ -960,7 +960,7 @@ direct_io_worker(int rw, struct kiocb *i if (ret == 0) ret = dio->result; /* Bytes written */ finished_one_bio(dio); /* This can free the dio */ - blk_run_queues(); + blk_run_address_space(inode->i_mapping); } else { finished_one_bio(dio); ret2 = dio_await_completion(dio); diff -puN fs/jfs/jfs_logmgr.c~per-backing_dev-unplugging fs/jfs/jfs_logmgr.c --- 25/fs/jfs/jfs_logmgr.c~per-backing_dev-unplugging 2004-03-14 13:55:17.322443816 -0800 +++ 25-akpm/fs/jfs/jfs_logmgr.c 2004-03-14 13:55:17.361437888 -0800 @@ -1972,8 +1972,7 @@ static int lbmRead(struct jfs_log * log, bio->bi_end_io = lbmIODone; bio->bi_private = bp; - submit_bio(READ, bio); - blk_run_queues(); + submit_bio(READ_SYNC, bio); wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); @@ -2117,9 +2116,8 @@ static void lbmStartIO(struct lbuf * bp) /* check if journaling to disk has been disabled */ if (!log->no_integrity) { - submit_bio(WRITE, bio); + submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); - blk_run_queues(); } else { bio->bi_size = 0; diff -puN fs/ntfs/compress.c~per-backing_dev-unplugging fs/ntfs/compress.c --- 25/fs/ntfs/compress.c~per-backing_dev-unplugging 2004-03-14 13:55:17.323443664 -0800 +++ 25-akpm/fs/ntfs/compress.c 2004-03-14 13:55:17.362437736 -0800 @@ -23,6 +23,7 @@ #include #include +#include #include "ntfs.h" @@ -668,7 +669,7 @@ lock_retry_remap: "uptodate! Unplugging the disk queue " "and rescheduling."); get_bh(tbh); - blk_run_queues(); + blk_run_address_space(mapping); schedule(); put_bh(tbh); if (unlikely(!buffer_uptodate(tbh))) diff -puN fs/ufs/truncate.c~per-backing_dev-unplugging fs/ufs/truncate.c --- 25/fs/ufs/truncate.c~per-backing_dev-unplugging 2004-03-14 13:55:17.325443360 -0800 +++ 25-akpm/fs/ufs/truncate.c 2004-03-14 13:55:17.363437584 -0800 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "swab.h" @@ -456,7 +457,7 @@ void ufs_truncate (struct inode * inode) break; if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) ufs_sync_inode (inode); - blk_run_queues(); + blk_run_address_space(inode->i_mapping); yield(); } offset = inode->i_size & uspi->s_fshift; diff -puN fs/xfs/linux/xfs_buf.c~per-backing_dev-unplugging fs/xfs/linux/xfs_buf.c --- 25/fs/xfs/linux/xfs_buf.c~per-backing_dev-unplugging 2004-03-14 13:55:17.326443208 -0800 +++ 25-akpm/fs/xfs/linux/xfs_buf.c 2004-03-14 13:55:17.364437432 -0800 @@ -1013,7 +1013,7 @@ pagebuf_lock( { PB_TRACE(pb, "lock", 0); if (atomic_read(&pb->pb_io_remaining)) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); down(&pb->pb_sema); PB_SET_OWNER(pb); PB_TRACE(pb, "locked", 0); @@ -1109,7 +1109,7 @@ _pagebuf_wait_unpin( if (atomic_read(&pb->pb_pin_count) == 0) break; if (atomic_read(&pb->pb_io_remaining)) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); schedule(); } remove_wait_queue(&pb->pb_waiters, &wait); @@ -1407,7 +1407,7 @@ submit_io: if (pb->pb_flags & PBF_RUN_QUEUES) { pb->pb_flags &= ~PBF_RUN_QUEUES; if (atomic_read(&pb->pb_io_remaining) > 1) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); } } @@ -1471,7 +1471,7 @@ pagebuf_iowait( { PB_TRACE(pb, "iowait", 0); if (atomic_read(&pb->pb_io_remaining)) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); down(&pb->pb_iodonesema); PB_TRACE(pb, "iowaited", (long)pb->pb_error); return pb->pb_error; @@ -1617,7 +1617,6 @@ STATIC int pagebuf_daemon( void *data) { - int count; page_buf_t *pb; struct list_head *curr, *next, tmp; @@ -1640,7 +1639,6 @@ pagebuf_daemon( spin_lock(&pbd_delwrite_lock); - count = 0; list_for_each_safe(curr, next, &pbd_delwrite_queue) { pb = list_entry(curr, page_buf_t, pb_list); @@ -1657,7 +1655,6 @@ pagebuf_daemon( pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); - count++; } } @@ -1667,12 +1664,11 @@ pagebuf_daemon( list_del_init(&pb->pb_list); pagebuf_iostrategy(pb); + blk_run_address_space(pb->pb_target->pbr_mapping); } if (as_list_len > 0) purge_addresses(); - if (count) - blk_run_queues(); force_flush = 0; } while (pagebuf_daemon_active); @@ -1689,7 +1685,6 @@ pagebuf_delwri_flush( page_buf_t *pb; struct list_head *curr, *next, tmp; int pincount = 0; - int flush_cnt = 0; pagebuf_runall_queues(pagebuf_dataio_workqueue); pagebuf_runall_queues(pagebuf_logio_workqueue); @@ -1733,14 +1728,8 @@ pagebuf_delwri_flush( pagebuf_lock(pb); pagebuf_iostrategy(pb); - if (++flush_cnt > 32) { - blk_run_queues(); - flush_cnt = 0; - } } - blk_run_queues(); - while (!list_empty(&tmp)) { pb = list_entry(tmp.next, page_buf_t, pb_list); @@ -1751,6 +1740,9 @@ pagebuf_delwri_flush( pagebuf_rele(pb); } + if (flags & PBDF_WAIT) + blk_run_address_space(target->pbr_mapping); + if (pinptr) *pinptr = pincount; } diff -puN include/linux/backing-dev.h~per-backing_dev-unplugging include/linux/backing-dev.h --- 25/include/linux/backing-dev.h~per-backing_dev-unplugging 2004-03-14 13:55:17.327443056 -0800 +++ 25-akpm/include/linux/backing-dev.h 2004-03-14 13:55:17.365437280 -0800 @@ -28,6 +28,8 @@ struct backing_dev_info { int memory_backed; /* Cannot clean pages with writepage */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ + void (*unplug_io_fn)(struct backing_dev_info *); + void *unplug_io_data; }; extern struct backing_dev_info default_backing_dev_info; diff -puN include/linux/bio.h~per-backing_dev-unplugging include/linux/bio.h --- 25/include/linux/bio.h~per-backing_dev-unplugging 2004-03-14 13:55:17.329442752 -0800 +++ 25-akpm/include/linux/bio.h 2004-03-14 13:55:17.365437280 -0800 @@ -124,6 +124,7 @@ struct bio { #define BIO_RW_AHEAD 1 #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 +#define BIO_RW_SYNC 4 /* * various member access, note that bio_data should of course not be used @@ -138,6 +139,7 @@ struct bio { #define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) +#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) /* * will die diff -puN include/linux/blkdev.h~per-backing_dev-unplugging include/linux/blkdev.h --- 25/include/linux/blkdev.h~per-backing_dev-unplugging 2004-03-14 13:55:17.330442600 -0800 +++ 25-akpm/include/linux/blkdev.h 2004-03-14 13:55:17.366437128 -0800 @@ -243,7 +243,7 @@ typedef int (merge_requests_fn) (request typedef void (request_fn_proc) (request_queue_t *q); typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef int (prep_rq_fn) (request_queue_t *, struct request *); -typedef void (unplug_fn) (void *q); +typedef void (unplug_fn) (request_queue_t *); struct bio_vec; typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); @@ -315,8 +315,6 @@ struct request_queue unsigned long bounce_pfn; int bounce_gfp; - struct list_head plug_list; - /* * various queue flags, see QUEUE_* below */ @@ -370,8 +368,9 @@ struct request_queue #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ +#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ -#define blk_queue_plugged(q) !list_empty(&(q)->plug_list) +#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -515,7 +514,7 @@ extern int scsi_cmd_ioctl(struct gendisk extern void blk_start_queue(request_queue_t *q); extern void blk_stop_queue(request_queue_t *q); extern void __blk_stop_queue(request_queue_t *q); -extern void blk_run_queue(request_queue_t *q); +extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); extern int blk_rq_unmap_user(struct request *, void __user *, unsigned int); @@ -526,6 +525,18 @@ static inline request_queue_t *bdev_get_ return bdev->bd_disk->queue; } +static inline void blk_run_backing_dev(struct backing_dev_info *bdi) +{ + if (bdi) + bdi->unplug_io_fn(bdi); +} + +static inline void blk_run_address_space(struct address_space *mapping) +{ + if (mapping) + blk_run_backing_dev(mapping->backing_dev_info); +} + /* * end_request() and friends. Must be called with the request queue spinlock * acquired. All functions called within end_request() _must_be_ atomic. @@ -572,7 +583,7 @@ extern struct backing_dev_info *blk_get_ extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -extern void generic_unplug_device(void *); +extern void generic_unplug_device(request_queue_t *); extern long nr_blockdev_pages(void); int blk_get_queue(request_queue_t *); diff -puN include/linux/fs.h~per-backing_dev-unplugging include/linux/fs.h --- 25/include/linux/fs.h~per-backing_dev-unplugging 2004-03-14 13:55:17.331442448 -0800 +++ 25-akpm/include/linux/fs.h 2004-03-14 13:55:17.368436824 -0800 @@ -83,6 +83,8 @@ extern int leases_enable, dir_notify_ena #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ #define SPECIAL 4 /* For non-blockdevice requests in request queue */ +#define READ_SYNC (READ | BIO_RW_SYNC) +#define WRITE_SYNC (WRITE | BIO_RW_SYNC) #define SEL_IN 1 #define SEL_OUT 2 diff -puN include/linux/raid/md_k.h~per-backing_dev-unplugging include/linux/raid/md_k.h --- 25/include/linux/raid/md_k.h~per-backing_dev-unplugging 2004-03-14 13:55:17.332442296 -0800 +++ 25-akpm/include/linux/raid/md_k.h 2004-03-14 13:55:17.368436824 -0800 @@ -326,7 +326,6 @@ do { \ if (condition) \ break; \ spin_unlock_irq(&lock); \ - blk_run_queues(); \ schedule(); \ spin_lock_irq(&lock); \ } \ @@ -341,30 +340,5 @@ do { \ __wait_event_lock_irq(wq, condition, lock); \ } while (0) - -#define __wait_disk_event(wq, condition) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - blk_run_queues(); \ - schedule(); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_disk_event(wq, condition) \ -do { \ - if (condition) \ - break; \ - __wait_disk_event(wq, condition); \ -} while (0) - #endif diff -puN kernel/power/disk.c~per-backing_dev-unplugging kernel/power/disk.c --- 25/kernel/power/disk.c~per-backing_dev-unplugging 2004-03-14 13:55:17.334441992 -0800 +++ 25-akpm/kernel/power/disk.c 2004-03-14 13:55:17.369436672 -0800 @@ -84,7 +84,6 @@ static void free_some_memory(void) while (shrink_all_memory(10000)) printk("."); printk("|\n"); - blk_run_queues(); } diff -puN kernel/power/pmdisk.c~per-backing_dev-unplugging kernel/power/pmdisk.c --- 25/kernel/power/pmdisk.c~per-backing_dev-unplugging 2004-03-14 13:55:17.335441840 -0800 +++ 25-akpm/kernel/power/pmdisk.c 2004-03-14 13:55:17.370436520 -0800 @@ -859,7 +859,6 @@ static int end_io(struct bio * bio, unsi static void wait_io(void) { - blk_run_queues(); while(atomic_read(&io_done)) io_schedule(); } @@ -898,7 +897,7 @@ static int submit(int rw, pgoff_t page_o if (rw == WRITE) bio_set_pages_dirty(bio); start_io(); - submit_bio(rw,bio); + submit_bio(rw|BIO_RW_SYNC,bio); wait_io(); Done: bio_put(bio); diff -puN kernel/power/swsusp.c~per-backing_dev-unplugging kernel/power/swsusp.c --- 25/kernel/power/swsusp.c~per-backing_dev-unplugging 2004-03-14 13:55:17.337441536 -0800 +++ 25-akpm/kernel/power/swsusp.c 2004-03-14 13:55:17.370436520 -0800 @@ -707,11 +707,6 @@ int software_suspend(void) free_some_memory(); - /* No need to invalidate any vfsmnt list -- - * they will be valid after resume, anyway. - */ - blk_run_queues(); - /* Save state of all device drivers, and stop them. */ if ((res = device_suspend(4))==0) /* If stopping device drivers worked, we proceed basically into diff -puN mm/mempool.c~per-backing_dev-unplugging mm/mempool.c --- 25/mm/mempool.c~per-backing_dev-unplugging 2004-03-14 13:55:17.338441384 -0800 +++ 25-akpm/mm/mempool.c 2004-03-14 13:55:17.371436368 -0800 @@ -234,8 +234,6 @@ repeat_alloc: if (!(gfp_mask & __GFP_WAIT)) return NULL; - blk_run_queues(); - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); mb(); if (!pool->curr_nr) diff -puN mm/readahead.c~per-backing_dev-unplugging mm/readahead.c --- 25/mm/readahead.c~per-backing_dev-unplugging 2004-03-14 13:55:17.339441232 -0800 +++ 25-akpm/mm/readahead.c 2004-03-14 13:55:17.371436368 -0800 @@ -15,9 +15,14 @@ #include #include +static void default_unplug_io_fn(struct backing_dev_info *bdi) +{ +} + struct backing_dev_info default_backing_dev_info = { .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE, .state = 0, + .unplug_io_fn = default_unplug_io_fn, }; EXPORT_SYMBOL_GPL(default_backing_dev_info); _