From: Jens Axboe This patch modularizes the io schedulers completely, allowing them to be modular. Additionally it enables online switching of io schedulers. See also http://lwn.net/Articles/102593/ . Signed-off-by: Jens Axboe Signed-off-by: Andrew Morton --- 25-akpm/drivers/block/Kconfig.iosched | 8 25-akpm/drivers/block/as-iosched.c | 109 ++++++---- 25-akpm/drivers/block/cfq-iosched.c | 113 +++++++---- 25-akpm/drivers/block/deadline-iosched.c | 88 +++++--- 25-akpm/drivers/block/elevator.c | 316 +++++++++++++++++++++++++------ 25-akpm/drivers/block/ll_rw_blk.c | 140 ++++++++----- 25-akpm/drivers/block/noop-iosched.c | 33 ++- 25-akpm/drivers/s390/block/dasd.c | 4 25-akpm/drivers/s390/char/tape_block.c | 4 25-akpm/include/linux/blkdev.h | 10 25-akpm/include/linux/elevator.h | 55 +++-- 11 files changed, 620 insertions(+), 260 deletions(-) diff -puN drivers/block/as-iosched.c~switchable-and-modular-io-schedulers drivers/block/as-iosched.c --- 25/drivers/block/as-iosched.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/as-iosched.c Thu Sep 30 15:39:51 2004 @@ -614,7 +614,7 @@ static void as_antic_stop(struct as_data static void as_antic_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); @@ -945,7 +945,7 @@ static void update_write_batch(struct as */ static void as_completed_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); WARN_ON(!list_empty(&rq->queuelist)); @@ -1030,7 +1030,7 @@ static void as_remove_queued_request(req { struct as_rq *arq = RQ_DATA(rq); const int data_dir = arq->is_sync; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; WARN_ON(arq->state != AS_RQ_QUEUED); @@ -1361,7 +1361,7 @@ fifo_expired: static struct request *as_next_request(request_queue_t *q) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct request *rq = NULL; /* @@ -1469,7 +1469,7 @@ static void as_add_request(struct as_dat */ static void as_requeue_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (arq) { @@ -1509,7 +1509,7 @@ static void as_account_queued_request(st static void as_insert_request(request_queue_t *q, struct request *rq, int where) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (arq) { @@ -1562,7 +1562,7 @@ as_insert_request(request_queue_t *q, st */ static int as_queue_empty(request_queue_t *q) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; if (!list_empty(&ad->fifo_list[REQ_ASYNC]) || !list_empty(&ad->fifo_list[REQ_SYNC]) @@ -1601,7 +1601,7 @@ as_latter_request(request_queue_t *q, st static int as_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; int ret; @@ -1656,7 +1656,7 @@ out_insert: static void as_merged_request(request_queue_t *q, struct request *req) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); /* @@ -1701,7 +1701,7 @@ static void as_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); struct as_rq *anext = RQ_DATA(next); @@ -1788,7 +1788,7 @@ static void as_work_handler(void *data) static void as_put_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (!arq) { @@ -1807,7 +1807,7 @@ static void as_put_request(request_queue static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); if (arq) { @@ -1829,7 +1829,7 @@ static int as_set_request(request_queue_ static int as_may_queue(request_queue_t *q, int rw) { int ret = 0; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { @@ -1842,7 +1842,7 @@ static int as_may_queue(request_queue_t return ret; } -static void as_exit(request_queue_t *q, elevator_t *e) +static void as_exit_queue(elevator_t *e) { struct as_data *ad = e->elevator_data; @@ -1862,7 +1862,7 @@ static void as_exit(request_queue_t *q, * initialize elevator private data (as_data), and alloc a arq for * each request on the free lists */ -static int as_init(request_queue_t *q, elevator_t *e) +static int as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; int i; @@ -2070,39 +2070,64 @@ static struct kobj_type as_ktype = { .default_attrs = default_attrs, }; -static int __init as_slab_setup(void) +static struct elevator_type iosched_as = { + .ops = { + .elevator_merge_fn = as_merge, + .elevator_merged_fn = as_merged_request, + .elevator_merge_req_fn = as_merged_requests, + .elevator_next_req_fn = as_next_request, + .elevator_add_req_fn = as_insert_request, + .elevator_remove_req_fn = as_remove_request, + .elevator_requeue_req_fn = as_requeue_request, + .elevator_queue_empty_fn = as_queue_empty, + .elevator_completed_req_fn = as_completed_request, + .elevator_former_req_fn = as_former_request, + .elevator_latter_req_fn = as_latter_request, + .elevator_set_req_fn = as_set_request, + .elevator_put_req_fn = as_put_request, + .elevator_may_queue_fn = as_may_queue, + .elevator_init_fn = as_init_queue, + .elevator_exit_fn = as_exit_queue, + }, + + .elevator_ktype = &as_ktype, + .elevator_name = "anticipatory", + .elevator_owner = THIS_MODULE, +}; + +int as_init(void) { + int ret; + arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), 0, 0, NULL, NULL); - if (!arq_pool) - panic("as: can't init slab pool\n"); + return -ENOMEM; - return 0; -} + ret = elv_register(&iosched_as); + if (!ret) { + /* + * don't allow AS to get unregistered, since we would have + * to browse all tasks in the system and release their + * as_io_context first + */ + __module_get(THIS_MODULE); + return 0; + } -subsys_initcall(as_slab_setup); + kmem_cache_destroy(arq_pool); + return ret; +} -elevator_t iosched_as = { - .elevator_merge_fn = as_merge, - .elevator_merged_fn = as_merged_request, - .elevator_merge_req_fn = as_merged_requests, - .elevator_next_req_fn = as_next_request, - .elevator_add_req_fn = as_insert_request, - .elevator_remove_req_fn = as_remove_request, - .elevator_requeue_req_fn = as_requeue_request, - .elevator_queue_empty_fn = as_queue_empty, - .elevator_completed_req_fn = as_completed_request, - .elevator_former_req_fn = as_former_request, - .elevator_latter_req_fn = as_latter_request, - .elevator_set_req_fn = as_set_request, - .elevator_put_req_fn = as_put_request, - .elevator_may_queue_fn = as_may_queue, - .elevator_init_fn = as_init, - .elevator_exit_fn = as_exit, +void as_exit(void) +{ + kmem_cache_destroy(arq_pool); + elv_unregister(&iosched_as); +} - .elevator_ktype = &as_ktype, - .elevator_name = "anticipatory", -}; +module_init(as_init); +module_exit(as_exit); -EXPORT_SYMBOL(iosched_as); +MODULE_AUTHOR("Nick Piggin"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("anticipatory IO scheduler"); diff -puN drivers/block/cfq-iosched.c~switchable-and-modular-io-schedulers drivers/block/cfq-iosched.c --- 25/drivers/block/cfq-iosched.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/cfq-iosched.c Thu Sep 30 15:39:51 2004 @@ -246,7 +246,7 @@ out: static void cfq_remove_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); if (crq) { @@ -267,7 +267,7 @@ static void cfq_remove_request(request_q static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; int ret; @@ -305,7 +305,7 @@ out_insert: static void cfq_merged_request(request_queue_t *q, struct request *req) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(req); cfq_del_crq_hash(crq); @@ -404,7 +404,7 @@ restart: static struct request *cfq_next_request(request_queue_t *q) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct request *rq; if (!list_empty(cfqd->dispatch)) { @@ -531,7 +531,7 @@ static void cfq_enqueue(struct cfq_data static void cfq_insert_request(request_queue_t *q, struct request *rq, int where) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); switch (where) { @@ -562,7 +562,7 @@ cfq_insert_request(request_queue_t *q, s static int cfq_queue_empty(request_queue_t *q) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list)) return 1; @@ -596,7 +596,7 @@ cfq_latter_request(request_queue_t *q, s static int cfq_may_queue(request_queue_t *q, int rw) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; int ret = 1; @@ -621,7 +621,7 @@ out: static void cfq_put_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); struct request_list *rl; int other_rw; @@ -654,7 +654,7 @@ static void cfq_put_request(request_queu static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; struct cfq_rq *crq; @@ -679,7 +679,7 @@ static int cfq_set_request(request_queue return 1; } -static void cfq_exit(request_queue_t *q, elevator_t *e) +static void cfq_exit_queue(elevator_t *e) { struct cfq_data *cfqd = e->elevator_data; @@ -690,7 +690,7 @@ static void cfq_exit(request_queue_t *q, kfree(cfqd); } -static int cfq_init(request_queue_t *q, elevator_t *e) +static int cfq_init_queue(request_queue_t *q, elevator_t *e) { struct cfq_data *cfqd; int i; @@ -732,7 +732,6 @@ static int cfq_init(request_queue_t *q, cfqd->cfq_queued = cfq_queued; cfqd->cfq_quantum = cfq_quantum; - return 0; out_crqpool: kfree(cfqd->cfq_hash); @@ -743,30 +742,38 @@ out_crqhash: return -ENOMEM; } -static int __init cfq_slab_setup(void) +static void cfq_slab_kill(void) +{ + if (crq_pool) + kmem_cache_destroy(crq_pool); + if (cfq_mpool) + mempool_destroy(cfq_mpool); + if (cfq_pool) + kmem_cache_destroy(cfq_pool); +} + +static int cfq_slab_setup(void) { crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, NULL, NULL); - if (!crq_pool) - panic("cfq_iosched: can't init crq pool\n"); + goto fail; cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, NULL, NULL); - if (!cfq_pool) - panic("cfq_iosched: can't init cfq pool\n"); + goto fail; cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool); - if (!cfq_mpool) - panic("cfq_iosched: can't init cfq mpool\n"); + goto fail; return 0; +fail: + cfq_slab_kill(); + return -ENOMEM; } -subsys_initcall(cfq_slab_setup); - /* * sysfs parts below --> */ @@ -868,23 +875,51 @@ struct kobj_type cfq_ktype = { .default_attrs = default_attrs, }; -elevator_t iosched_cfq = { - .elevator_name = "cfq", - .elevator_ktype = &cfq_ktype, - .elevator_merge_fn = cfq_merge, - .elevator_merged_fn = cfq_merged_request, - .elevator_merge_req_fn = cfq_merged_requests, - .elevator_next_req_fn = cfq_next_request, - .elevator_add_req_fn = cfq_insert_request, - .elevator_remove_req_fn = cfq_remove_request, - .elevator_queue_empty_fn = cfq_queue_empty, - .elevator_former_req_fn = cfq_former_request, - .elevator_latter_req_fn = cfq_latter_request, - .elevator_set_req_fn = cfq_set_request, - .elevator_put_req_fn = cfq_put_request, - .elevator_may_queue_fn = cfq_may_queue, - .elevator_init_fn = cfq_init, - .elevator_exit_fn = cfq_exit, +static struct elevator_type iosched_cfq = { + .ops = { + .elevator_merge_fn = cfq_merge, + .elevator_merged_fn = cfq_merged_request, + .elevator_merge_req_fn = cfq_merged_requests, + .elevator_next_req_fn = cfq_next_request, + .elevator_add_req_fn = cfq_insert_request, + .elevator_remove_req_fn = cfq_remove_request, + .elevator_queue_empty_fn = cfq_queue_empty, + .elevator_former_req_fn = cfq_former_request, + .elevator_latter_req_fn = cfq_latter_request, + .elevator_set_req_fn = cfq_set_request, + .elevator_put_req_fn = cfq_put_request, + .elevator_may_queue_fn = cfq_may_queue, + .elevator_init_fn = cfq_init_queue, + .elevator_exit_fn = cfq_exit_queue, + }, + .elevator_ktype = &cfq_ktype, + .elevator_name = "cfq", + .elevator_owner = THIS_MODULE, }; -EXPORT_SYMBOL(iosched_cfq); +int cfq_init(void) +{ + int ret; + + if (cfq_slab_setup()) + return -ENOMEM; + + ret = elv_register(&iosched_cfq); + if (ret) + cfq_slab_kill(); + + return ret; +} + +void cfq_exit(void) +{ + cfq_slab_kill(); + elv_unregister(&iosched_cfq); +} + +module_init(cfq_init); +module_exit(cfq_exit); + +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); diff -puN drivers/block/deadline-iosched.c~switchable-and-modular-io-schedulers drivers/block/deadline-iosched.c --- 25/drivers/block/deadline-iosched.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/deadline-iosched.c Thu Sep 30 15:39:51 2004 @@ -289,7 +289,7 @@ deadline_find_first_drq(struct deadline_ static inline void deadline_add_request(struct request_queue *q, struct request *rq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); const int data_dir = rq_data_dir(drq->request); @@ -317,7 +317,7 @@ static void deadline_remove_request(requ struct deadline_rq *drq = RQ_DATA(rq); if (drq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; list_del_init(&drq->fifo); deadline_remove_merge_hints(q, drq); @@ -328,7 +328,7 @@ static void deadline_remove_request(requ static int deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct request *__rq; int ret; @@ -383,7 +383,7 @@ out_insert: static void deadline_merged_request(request_queue_t *q, struct request *req) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); /* @@ -407,7 +407,7 @@ static void deadline_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); struct deadline_rq *dnext = RQ_DATA(next); @@ -604,7 +604,7 @@ dispatch_request: static struct request *deadline_next_request(request_queue_t *q) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct request *rq; /* @@ -625,7 +625,7 @@ dispatch: static void deadline_insert_request(request_queue_t *q, struct request *rq, int where) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; /* barriers must flush the reorder queue */ if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) @@ -653,7 +653,7 @@ deadline_insert_request(request_queue_t static int deadline_queue_empty(request_queue_t *q) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; if (!list_empty(&dd->fifo_list[WRITE]) || !list_empty(&dd->fifo_list[READ]) @@ -687,7 +687,7 @@ deadline_latter_request(request_queue_t return NULL; } -static void deadline_exit(request_queue_t *q, elevator_t *e) +static void deadline_exit_queue(elevator_t *e) { struct deadline_data *dd = e->elevator_data; @@ -703,7 +703,7 @@ static void deadline_exit(request_queue_ * initialize elevator private data (deadline_data), and alloc a drq for * each request on the free lists */ -static int deadline_init(request_queue_t *q, elevator_t *e) +static int deadline_init_queue(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; int i; @@ -748,7 +748,7 @@ static int deadline_init(request_queue_t static void deadline_put_request(request_queue_t *q, struct request *rq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); if (drq) { @@ -760,7 +760,7 @@ static void deadline_put_request(request static int deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq; drq = mempool_alloc(dd->drq_pool, gfp_mask); @@ -906,36 +906,54 @@ struct kobj_type deadline_ktype = { .default_attrs = default_attrs, }; -static int __init deadline_slab_setup(void) +static struct elevator_type iosched_deadline = { + .ops = { + .elevator_merge_fn = deadline_merge, + .elevator_merged_fn = deadline_merged_request, + .elevator_merge_req_fn = deadline_merged_requests, + .elevator_next_req_fn = deadline_next_request, + .elevator_add_req_fn = deadline_insert_request, + .elevator_remove_req_fn = deadline_remove_request, + .elevator_queue_empty_fn = deadline_queue_empty, + .elevator_former_req_fn = deadline_former_request, + .elevator_latter_req_fn = deadline_latter_request, + .elevator_set_req_fn = deadline_set_request, + .elevator_put_req_fn = deadline_put_request, + .elevator_init_fn = deadline_init_queue, + .elevator_exit_fn = deadline_exit_queue, + }, + + .elevator_ktype = &deadline_ktype, + .elevator_name = "deadline", + .elevator_owner = THIS_MODULE, +}; + +int deadline_init(void) { + int ret; + drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), 0, 0, NULL, NULL); if (!drq_pool) - panic("deadline: can't init slab pool\n"); + return -ENOMEM; - return 0; -} + ret = elv_register(&iosched_deadline); + if (ret) + kmem_cache_destroy(drq_pool); -subsys_initcall(deadline_slab_setup); + return ret; +} -elevator_t iosched_deadline = { - .elevator_merge_fn = deadline_merge, - .elevator_merged_fn = deadline_merged_request, - .elevator_merge_req_fn = deadline_merged_requests, - .elevator_next_req_fn = deadline_next_request, - .elevator_add_req_fn = deadline_insert_request, - .elevator_remove_req_fn = deadline_remove_request, - .elevator_queue_empty_fn = deadline_queue_empty, - .elevator_former_req_fn = deadline_former_request, - .elevator_latter_req_fn = deadline_latter_request, - .elevator_set_req_fn = deadline_set_request, - .elevator_put_req_fn = deadline_put_request, - .elevator_init_fn = deadline_init, - .elevator_exit_fn = deadline_exit, +void deadline_exit(void) +{ + kmem_cache_destroy(drq_pool); + elv_unregister(&iosched_deadline); +} - .elevator_ktype = &deadline_ktype, - .elevator_name = "deadline", -}; +module_init(deadline_init); +module_exit(deadline_exit); -EXPORT_SYMBOL(iosched_deadline); +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("deadline IO scheduler"); diff -puN drivers/block/elevator.c~switchable-and-modular-io-schedulers drivers/block/elevator.c --- 25/drivers/block/elevator.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/elevator.c Thu Sep 30 15:39:51 2004 @@ -37,6 +37,9 @@ #include +static spinlock_t elv_list_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(elv_list); + /* * can we safely merge with this request? */ @@ -60,6 +63,7 @@ inline int elv_rq_merge_ok(struct reques return 0; } +EXPORT_SYMBOL(elv_rq_merge_ok); inline int elv_try_merge(struct request *__rq, struct bio *bio) { @@ -77,6 +81,7 @@ inline int elv_try_merge(struct request return ret; } +EXPORT_SYMBOL(elv_try_merge); inline int elv_try_last_merge(request_queue_t *q, struct bio *bio) { @@ -85,31 +90,117 @@ inline int elv_try_last_merge(request_qu return ELEVATOR_NO_MERGE; } +EXPORT_SYMBOL(elv_try_last_merge); -/* - * general block -> elevator interface starts here - */ -int elevator_init(request_queue_t *q, elevator_t *type) +struct elevator_type *elevator_find(const char *name) +{ + struct elevator_type *e = NULL; + struct list_head *entry; + + spin_lock_irq(&elv_list_lock); + list_for_each(entry, &elv_list) { + struct elevator_type *__e; + + __e = list_entry(entry, struct elevator_type, list); + + if (!strcmp(__e->elevator_name, name)) { + e = __e; + break; + } + } + spin_unlock_irq(&elv_list_lock); + + return e; +} + +static int elevator_attach(request_queue_t *q, struct elevator_type *e, + struct elevator_queue *eq) { - elevator_t *e = &q->elevator; + int ret = 0; - memcpy(e, type, sizeof(*e)); + if (!try_module_get(e->elevator_owner)) + return -EINVAL; + + memset(eq, 0, sizeof(*eq)); + eq->ops = &e->ops; + eq->elevator_type = e; INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; + q->elevator = eq; + + if (eq->ops->elevator_init_fn) + ret = eq->ops->elevator_init_fn(q, eq); + + return ret; +} + +static char chosen_elevator[16]; + +static void elevator_setup_default(void) +{ + /* + * check if default is set and exists + */ + if (chosen_elevator[0] && elevator_find(chosen_elevator)) + return; - if (e->elevator_init_fn) - return e->elevator_init_fn(q, e); +#if defined(CONFIG_IOSCHED_AS) + strcpy(chosen_elevator, "anticipatory"); +#elif defined(CONFIG_IOSCHED_DEADLINE) + strcpy(chosen_elevator, "deadline"); +#elif defined(CONFIG_IOSCHED_CFQ) + strcpy(chosen_elevator, "cfq"); +#elif defined(CONFIG_IOSCHED_NOOP) + strcpy(chosen_elevator, "noop"); +#else +#error "You must build at least 1 IO scheduler into the kernel" +#endif + printk("elevator: using %s as default io scheduler\n", chosen_elevator); +} +static int __init elevator_setup(char *str) +{ + strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); return 0; } -void elevator_exit(request_queue_t *q) +__setup("elevator=", elevator_setup); + +int elevator_init(request_queue_t *q, char *name) +{ + struct elevator_type *e = NULL; + struct elevator_queue *eq; + int ret = 0; + + elevator_setup_default(); + + if (!name) + name = chosen_elevator; + + e = elevator_find(name); + if (!e) + return -EINVAL; + + eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); + if (!eq) + return -ENOMEM; + + ret = elevator_attach(q, e, eq); + if (ret) + kfree(eq); + + return ret; +} + +void elevator_exit(elevator_t *e) { - elevator_t *e = &q->elevator; + if (e->ops->elevator_exit_fn) + e->ops->elevator_exit_fn(e); - if (e->elevator_exit_fn) - e->elevator_exit_fn(q, e); + module_put(e->elevator_type->elevator_owner); + e->elevator_type = NULL; + kfree(e); } int elevator_global_init(void) @@ -119,32 +210,32 @@ int elevator_global_init(void) int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_merge_fn) - return e->elevator_merge_fn(q, req, bio); + if (e->ops->elevator_merge_fn) + return e->ops->elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } void elv_merged_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_merged_fn) - e->elevator_merged_fn(q, rq); + if (e->ops->elevator_merged_fn) + e->ops->elevator_merged_fn(q, rq); } void elv_merge_requests(request_queue_t *q, struct request *rq, struct request *next) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (q->last_merge == next) q->last_merge = NULL; - if (e->elevator_merge_req_fn) - e->elevator_merge_req_fn(q, rq, next); + if (e->ops->elevator_merge_req_fn) + e->ops->elevator_merge_req_fn(q, rq, next); } void elv_requeue_request(request_queue_t *q, struct request *rq) @@ -160,8 +251,8 @@ void elv_requeue_request(request_queue_t * if iosched has an explicit requeue hook, then use that. otherwise * just put the request at the front of the queue */ - if (q->elevator.elevator_requeue_req_fn) - q->elevator.elevator_requeue_req_fn(q, rq); + if (q->elevator->ops->elevator_requeue_req_fn) + q->elevator->ops->elevator_requeue_req_fn(q, rq); else __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); } @@ -180,7 +271,7 @@ void __elv_add_request(request_queue_t * blk_plug_device(q); rq->q = q; - q->elevator.elevator_add_req_fn(q, rq, where); + q->elevator->ops->elevator_add_req_fn(q, rq, where); if (blk_queue_plugged(q)) { int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight; @@ -203,7 +294,7 @@ void elv_add_request(request_queue_t *q, static inline struct request *__elv_next_request(request_queue_t *q) { - return q->elevator.elevator_next_req_fn(q); + return q->elevator->ops->elevator_next_req_fn(q); } struct request *elv_next_request(request_queue_t *q) @@ -252,7 +343,7 @@ struct request *elv_next_request(request void elv_remove_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; /* * the time frame between a request being removed from the lists @@ -274,16 +365,16 @@ void elv_remove_request(request_queue_t if (rq == q->last_merge) q->last_merge = NULL; - if (e->elevator_remove_req_fn) - e->elevator_remove_req_fn(q, rq); + if (e->ops->elevator_remove_req_fn) + e->ops->elevator_remove_req_fn(q, rq); } int elv_queue_empty(request_queue_t *q) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_queue_empty_fn) - return e->elevator_queue_empty_fn(q); + if (e->ops->elevator_queue_empty_fn) + return e->ops->elevator_queue_empty_fn(q); return list_empty(&q->queue_head); } @@ -292,10 +383,10 @@ struct request *elv_latter_request(reque { struct list_head *next; - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_latter_req_fn) - return e->elevator_latter_req_fn(q, rq); + if (e->ops->elevator_latter_req_fn) + return e->ops->elevator_latter_req_fn(q, rq); next = rq->queuelist.next; if (next != &q->queue_head && next != &rq->queuelist) @@ -308,10 +399,10 @@ struct request *elv_former_request(reque { struct list_head *prev; - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_former_req_fn) - return e->elevator_former_req_fn(q, rq); + if (e->ops->elevator_former_req_fn) + return e->ops->elevator_former_req_fn(q, rq); prev = rq->queuelist.prev; if (prev != &q->queue_head && prev != &rq->queuelist) @@ -322,10 +413,10 @@ struct request *elv_former_request(reque int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_set_req_fn) - return e->elevator_set_req_fn(q, rq, gfp_mask); + if (e->ops->elevator_set_req_fn) + return e->ops->elevator_set_req_fn(q, rq, gfp_mask); rq->elevator_private = NULL; return 0; @@ -333,25 +424,25 @@ int elv_set_request(request_queue_t *q, void elv_put_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_put_req_fn) - e->elevator_put_req_fn(q, rq); + if (e->ops->elevator_put_req_fn) + e->ops->elevator_put_req_fn(q, rq); } int elv_may_queue(request_queue_t *q, int rw) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_may_queue_fn) - return e->elevator_may_queue_fn(q, rw); + if (e->ops->elevator_may_queue_fn) + return e->ops->elevator_may_queue_fn(q, rw); return 0; } void elv_completed_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; /* * request is released from the driver, io must be done @@ -359,22 +450,20 @@ void elv_completed_request(request_queue if (blk_account_rq(rq)) q->in_flight--; - if (e->elevator_completed_req_fn) - e->elevator_completed_req_fn(q, rq); + if (e->ops->elevator_completed_req_fn) + e->ops->elevator_completed_req_fn(q, rq); } int elv_register_queue(struct request_queue *q) { - elevator_t *e; - - e = &q->elevator; + elevator_t *e = q->elevator; e->kobj.parent = kobject_get(&q->kobj); if (!e->kobj.parent) return -EBUSY; snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - e->kobj.ktype = e->elevator_ktype; + e->kobj.ktype = e->elevator_type->elevator_ktype; return kobject_register(&e->kobj); } @@ -382,12 +471,131 @@ int elv_register_queue(struct request_qu void elv_unregister_queue(struct request_queue *q) { if (q) { - elevator_t * e = &q->elevator; + elevator_t *e = q->elevator; kobject_unregister(&e->kobj); kobject_put(&q->kobj); } } +int elv_register(struct elevator_type *e) +{ + if (elevator_find(e->elevator_name)) + BUG(); + + spin_lock_irq(&elv_list_lock); + list_add_tail(&e->list, &elv_list); + spin_unlock_irq(&elv_list_lock); + + printk("io scheduler %s registered\n", e->elevator_name); + return 0; +} +EXPORT_SYMBOL_GPL(elv_register); + +void elv_unregister(struct elevator_type *e) +{ + spin_lock_irq(&elv_list_lock); + list_del_init(&e->list); + spin_unlock_irq(&elv_list_lock); +} +EXPORT_SYMBOL_GPL(elv_unregister); + +/* + * switch to new_e io scheduler. be careful not to introduce deadlocks - + * we don't free the old io scheduler, before we have allocated what we + * need for the new one. this way we have a chance of going back to the old + * one, if the new one fails init for some reason + */ +static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) +{ + elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); + elevator_t *old_elevator; + + if (!e) { + printk("elevator: out of memory\n"); + return; + } + + blk_wait_queue_drained(q); + + /* + * unregister old elevator data + */ + elv_unregister_queue(q); + old_elevator = q->elevator; + + /* + * attach and start new elevator + */ + if (elevator_attach(q, new_e, e)) + goto fail; + + if (elv_register_queue(q)) + goto fail_register; + + /* + * finally exit old elevator and start queue again + */ + elevator_exit(old_elevator); + blk_finish_queue_drain(q); + return; + +fail_register: + /* + * switch failed, exit the new io scheduler and reattach the old + * one again (along with re-adding the sysfs dir) + */ + elevator_exit(e); +fail: + q->elevator = old_elevator; + elv_register_queue(q); + blk_finish_queue_drain(q); + printk("elevator: switch to %s failed\n", new_e->elevator_name); +} + +ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) +{ + char elevator_name[ELV_NAME_MAX]; + struct elevator_type *e; + + memset(elevator_name, 0, sizeof(elevator_name)); + strncpy(elevator_name, name, sizeof(elevator_name)); + + if (elevator_name[strlen(elevator_name) - 1] == '\n') + elevator_name[strlen(elevator_name) - 1] = '\0'; + + e = elevator_find(elevator_name); + if (!e) { + printk("elevator: type %s not found\n", elevator_name); + return count; + } + + elevator_switch(q, e); + return count; +} + +ssize_t elv_iosched_show(request_queue_t *q, char *name) +{ + elevator_t *e = q->elevator; + struct elevator_type *elv = e->elevator_type; + struct list_head *entry; + int len = 0; + + spin_lock_irq(q->queue_lock); + list_for_each(entry, &elv_list) { + struct elevator_type *__e; + + __e = list_entry(entry, struct elevator_type, list); + if (!strcmp(elv->elevator_name, __e->elevator_name)) + len += sprintf(name+len, "[%s] ", elv->elevator_name); + else + len += sprintf(name+len, "%s ", __e->elevator_name); + } + spin_unlock_irq(q->queue_lock); + + len += sprintf(len+name, "\n"); + return len; +} + module_init(elevator_global_init); EXPORT_SYMBOL(elv_add_request); diff -puN drivers/block/Kconfig.iosched~switchable-and-modular-io-schedulers drivers/block/Kconfig.iosched --- 25/drivers/block/Kconfig.iosched~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/Kconfig.iosched Thu Sep 30 15:39:51 2004 @@ -1,5 +1,5 @@ config IOSCHED_NOOP - bool "No-op I/O scheduler" if EMBEDDED + tristate "No-op I/O scheduler" default y ---help--- The no-op I/O scheduler is a minimal scheduler that does basic merging @@ -9,7 +9,7 @@ config IOSCHED_NOOP the kernel. config IOSCHED_AS - bool "Anticipatory I/O scheduler" if EMBEDDED + tristate "Anticipatory I/O scheduler" default y ---help--- The anticipatory I/O scheduler is the default disk scheduler. It is @@ -18,7 +18,7 @@ config IOSCHED_AS slower in some cases especially some database loads. config IOSCHED_DEADLINE - bool "Deadline I/O scheduler" if EMBEDDED + tristate "Deadline I/O scheduler" default y ---help--- The deadline I/O scheduler is simple and compact, and is often as @@ -28,7 +28,7 @@ config IOSCHED_DEADLINE anticipatory I/O scheduler and so is a good choice. config IOSCHED_CFQ - bool "CFQ I/O scheduler" if EMBEDDED + tristate "CFQ I/O scheduler" default y ---help--- The CFQ I/O scheduler tries to distribute bandwidth equally diff -puN drivers/block/ll_rw_blk.c~switchable-and-modular-io-schedulers drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/ll_rw_blk.c Thu Sep 30 15:39:51 2004 @@ -1396,7 +1396,8 @@ void blk_cleanup_queue(request_queue_t * if (!atomic_dec_and_test(&q->refcnt)) return; - elevator_exit(q); + if (q->elevator) + elevator_exit(q->elevator); del_timer_sync(&q->unplug_timer); kblockd_flush(); @@ -1419,6 +1420,7 @@ static int blk_init_free_list(request_qu rl->count[READ] = rl->count[WRITE] = 0; init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[WRITE]); + init_waitqueue_head(&rl->drain); rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); @@ -1430,45 +1432,6 @@ static int blk_init_free_list(request_qu static int __make_request(request_queue_t *, struct bio *); -static elevator_t *chosen_elevator = -#if defined(CONFIG_IOSCHED_AS) - &iosched_as; -#elif defined(CONFIG_IOSCHED_DEADLINE) - &iosched_deadline; -#elif defined(CONFIG_IOSCHED_CFQ) - &iosched_cfq; -#elif defined(CONFIG_IOSCHED_NOOP) - &elevator_noop; -#else - NULL; -#error "You must have at least 1 I/O scheduler selected" -#endif - -#if defined(CONFIG_IOSCHED_AS) || defined(CONFIG_IOSCHED_DEADLINE) || defined (CONFIG_IOSCHED_NOOP) -static int __init elevator_setup(char *str) -{ -#ifdef CONFIG_IOSCHED_DEADLINE - if (!strcmp(str, "deadline")) - chosen_elevator = &iosched_deadline; -#endif -#ifdef CONFIG_IOSCHED_AS - if (!strcmp(str, "as")) - chosen_elevator = &iosched_as; -#endif -#ifdef CONFIG_IOSCHED_CFQ - if (!strcmp(str, "cfq")) - chosen_elevator = &iosched_cfq; -#endif -#ifdef CONFIG_IOSCHED_NOOP - if (!strcmp(str, "noop")) - chosen_elevator = &elevator_noop; -#endif - return 1; -} - -__setup("elevator=", elevator_setup); -#endif /* CONFIG_IOSCHED_AS || CONFIG_IOSCHED_DEADLINE || CONFIG_IOSCHED_NOOP */ - request_queue_t *blk_alloc_queue(int gfp_mask) { request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask); @@ -1521,21 +1484,14 @@ EXPORT_SYMBOL(blk_alloc_queue); **/ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { - request_queue_t *q; - static int printed; + request_queue_t *q = blk_alloc_queue(GFP_KERNEL); - q = blk_alloc_queue(GFP_KERNEL); if (!q) return NULL; if (blk_init_free_list(q)) goto out_init; - if (!printed) { - printed = 1; - printk("Using %s io scheduler\n", chosen_elevator->elevator_name); - } - q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; @@ -1556,7 +1512,7 @@ request_queue_t *blk_init_queue(request_ /* * all done */ - if (!elevator_init(q, chosen_elevator)) + if (!elevator_init(q, NULL)) return q; blk_cleanup_queue(q); @@ -1650,6 +1606,9 @@ static void freed_request(request_queue_ if (!waitqueue_active(&rl->wait[rw])) blk_clear_queue_full(q, rw); } + if (unlikely(waitqueue_active(&rl->drain)) && + !rl->count[READ] && !rl->count[WRITE]) + wake_up(&rl->drain); } #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) @@ -1662,6 +1621,9 @@ static struct request *get_request(reque struct request_list *rl = &q->rq; struct io_context *ioc = get_io_context(gfp_mask); + if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) + return NULL; + spin_lock_irq(q->queue_lock); if (rl->count[rw]+1 >= q->nr_requests) { /* @@ -2507,6 +2469,70 @@ static inline void blk_partition_remap(s } } +void blk_finish_queue_drain(request_queue_t *q) +{ + struct request_list *rl = &q->rq; + + clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + wake_up(&rl->wait[0]); + wake_up(&rl->wait[1]); + wake_up(&rl->drain); +} + +/* + * We rely on the fact that only requests allocated through blk_alloc_request() + * have io scheduler private data structures associated with them. Any other + * type of request (allocated on stack or through kmalloc()) should not go + * to the io scheduler core, but be attached to the queue head instead. + */ +void blk_wait_queue_drained(request_queue_t *q) +{ + struct request_list *rl = &q->rq; + DEFINE_WAIT(wait); + + spin_lock_irq(q->queue_lock); + set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + + while (rl->count[READ] || rl->count[WRITE]) { + prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE); + + if (rl->count[READ] || rl->count[WRITE]) { + __generic_unplug_device(q); + spin_unlock_irq(q->queue_lock); + io_schedule(); + spin_lock_irq(q->queue_lock); + } + + finish_wait(&rl->drain, &wait); + } while (1); + + spin_unlock_irq(q->queue_lock); +} + +/* + * block waiting for the io scheduler being started again. + */ +static inline void block_wait_queue_running(request_queue_t *q) +{ + DEFINE_WAIT(wait); + + while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { + struct request_list *rl = &q->rq; + + prepare_to_wait_exclusive(&rl->drain, &wait, + TASK_UNINTERRUPTIBLE); + + /* + * re-check the condition. avoids using prepare_to_wait() + * in the fast path (queue is running) + */ + if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) + io_schedule(); + + finish_wait(&rl->drain, &wait); + } +} + /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. @@ -2596,6 +2622,8 @@ end_io: if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) goto end_io; + block_wait_queue_running(q); + /* * If this device has partitions, remap block n * of partition p to block n+start(p) of the disk. @@ -3019,6 +3047,7 @@ void kblockd_flush(void) { flush_workqueue(kblockd_workqueue); } +EXPORT_SYMBOL(kblockd_flush); int __init blk_dev_init(void) { @@ -3037,6 +3066,7 @@ int __init blk_dev_init(void) blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; + return 0; } @@ -3056,6 +3086,7 @@ void put_io_context(struct io_context *i kmem_cache_free(iocontext_cachep, ioc); } } +EXPORT_SYMBOL(put_io_context); /* Called by the exitting task */ void exit_io_context(void) @@ -3107,6 +3138,7 @@ struct io_context *get_io_context(int gf local_irq_restore(flags); return ret; } +EXPORT_SYMBOL(get_io_context); void copy_io_context(struct io_context **pdst, struct io_context **psrc) { @@ -3120,6 +3152,7 @@ void copy_io_context(struct io_context * *pdst = src; } } +EXPORT_SYMBOL(copy_io_context); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) { @@ -3128,7 +3161,7 @@ void swap_io_context(struct io_context * *ioc1 = *ioc2; *ioc2 = temp; } - +EXPORT_SYMBOL(swap_io_context); /* * sysfs parts below @@ -3286,11 +3319,18 @@ static struct queue_sysfs_entry queue_ma .show = queue_max_hw_sectors_show, }; +static struct queue_sysfs_entry queue_iosched_entry = { + .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, + .show = elv_iosched_show, + .store = elv_iosched_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, + &queue_iosched_entry.attr, NULL, }; diff -puN drivers/block/noop-iosched.c~switchable-and-modular-io-schedulers drivers/block/noop-iosched.c --- 25/drivers/block/noop-iosched.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/block/noop-iosched.c Thu Sep 30 15:39:51 2004 @@ -83,12 +83,31 @@ struct request *elevator_noop_next_reque return NULL; } -elevator_t elevator_noop = { - .elevator_merge_fn = elevator_noop_merge, - .elevator_merge_req_fn = elevator_noop_merge_requests, - .elevator_next_req_fn = elevator_noop_next_request, - .elevator_add_req_fn = elevator_noop_add_request, - .elevator_name = "noop", +static struct elevator_type elevator_noop = { + .ops = { + .elevator_merge_fn = elevator_noop_merge, + .elevator_merge_req_fn = elevator_noop_merge_requests, + .elevator_next_req_fn = elevator_noop_next_request, + .elevator_add_req_fn = elevator_noop_add_request, + }, + .elevator_name = "noop", + .elevator_owner = THIS_MODULE, }; -EXPORT_SYMBOL(elevator_noop); +int noop_init(void) +{ + return elv_register(&elevator_noop); +} + +void noop_exit(void) +{ + elv_unregister(&elevator_noop); +} + +module_init(noop_init); +module_exit(noop_exit); + + +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("No-op IO scheduler"); diff -puN drivers/s390/block/dasd.c~switchable-and-modular-io-schedulers drivers/s390/block/dasd.c --- 25/drivers/s390/block/dasd.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/s390/block/dasd.c Thu Sep 30 15:39:51 2004 @@ -1595,8 +1595,8 @@ dasd_alloc_queue(struct dasd_device * de device->request_queue->queuedata = device; #if 0 - elevator_exit(device->request_queue); - rc = elevator_init(device->request_queue, &elevator_noop); + elevator_exit(device->request_queue->elevator); + rc = elevator_init(device->request_queue, "noop"); if (rc) { blk_cleanup_queue(device->request_queue); return rc; diff -puN drivers/s390/char/tape_block.c~switchable-and-modular-io-schedulers drivers/s390/char/tape_block.c --- 25/drivers/s390/char/tape_block.c~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/drivers/s390/char/tape_block.c Thu Sep 30 15:39:51 2004 @@ -225,8 +225,8 @@ tapeblock_setup_device(struct tape_devic if (!blkdat->request_queue) return -ENOMEM; - elevator_exit(blkdat->request_queue); - rc = elevator_init(blkdat->request_queue, &elevator_noop); + elevator_exit(blkdat->request_queue->elevator); + rc = elevator_init(blkdat->request_queue, "noop"); if (rc) goto cleanup_queue; diff -puN include/linux/blkdev.h~switchable-and-modular-io-schedulers include/linux/blkdev.h --- 25/include/linux/blkdev.h~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/include/linux/blkdev.h Thu Sep 30 15:39:51 2004 @@ -19,8 +19,8 @@ struct request_queue; typedef struct request_queue request_queue_t; -struct elevator_s; -typedef struct elevator_s elevator_t; +struct elevator_queue; +typedef struct elevator_queue elevator_t; struct request_pm_state; #define BLKDEV_MIN_RQ 4 @@ -80,6 +80,7 @@ struct request_list { int count[2]; mempool_t *rq_pool; wait_queue_head_t wait[2]; + wait_queue_head_t drain; }; #define BLK_MAX_CDB 16 @@ -279,7 +280,7 @@ struct request_queue */ struct list_head queue_head; struct request *last_merge; - elevator_t elevator; + elevator_t *elevator; /* * the queue request freelist, one for reads and one for writes @@ -381,6 +382,7 @@ struct request_queue #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ORDERED 8 /* supports ordered writes */ +#define QUEUE_FLAG_DRAIN 9 /* draining queue for sched switch */ #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) @@ -617,6 +619,8 @@ extern void blk_dump_rq_flags(struct req extern void generic_unplug_device(request_queue_t *); extern void __generic_unplug_device(request_queue_t *); extern long nr_blockdev_pages(void); +extern void blk_wait_queue_drained(request_queue_t *); +extern void blk_finish_queue_drain(request_queue_t *); int blk_get_queue(request_queue_t *); request_queue_t *blk_alloc_queue(int); diff -puN include/linux/elevator.h~switchable-and-modular-io-schedulers include/linux/elevator.h --- 25/include/linux/elevator.h~switchable-and-modular-io-schedulers Thu Sep 30 15:39:51 2004 +++ 25-akpm/include/linux/elevator.h Thu Sep 30 15:39:51 2004 @@ -22,9 +22,9 @@ typedef int (elevator_set_req_fn) (reque typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); -typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); +typedef void (elevator_exit_fn) (elevator_t *); -struct elevator_s +struct elevator_ops { elevator_merge_fn *elevator_merge_fn; elevator_merged_fn *elevator_merged_fn; @@ -48,12 +48,32 @@ struct elevator_s elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; +}; - void *elevator_data; +#define ELV_NAME_MAX (16) - struct kobject kobj; +/* + * identifies an elevator type, such as AS or deadline + */ +struct elevator_type +{ + struct list_head list; + struct elevator_ops ops; + struct elevator_type *elevator_type; struct kobj_type *elevator_ktype; - const char *elevator_name; + char elevator_name[ELV_NAME_MAX]; + struct module *elevator_owner; +}; + +/* + * each queue has an elevator_queue assoicated with it + */ +struct elevator_queue +{ + struct elevator_ops *ops; + void *elevator_data; + struct kobject kobj; + struct elevator_type *elevator_type; }; /* @@ -79,28 +99,19 @@ extern int elv_set_request(request_queue extern void elv_put_request(request_queue_t *, struct request *); /* - * noop I/O scheduler. always merges, always inserts new request at tail - */ -extern elevator_t elevator_noop; - -/* - * deadline i/o scheduler. uses request time outs to prevent indefinite - * starvation - */ -extern elevator_t iosched_deadline; - -/* - * anticipatory I/O scheduler + * io scheduler registration */ -extern elevator_t iosched_as; +extern int elv_register(struct elevator_type *); +extern void elv_unregister(struct elevator_type *); /* - * completely fair queueing I/O scheduler + * io scheduler sysfs switching */ -extern elevator_t iosched_cfq; +extern ssize_t elv_iosched_show(request_queue_t *, char *); +extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t); -extern int elevator_init(request_queue_t *, elevator_t *); -extern void elevator_exit(request_queue_t *); +extern int elevator_init(request_queue_t *, char *); +extern void elevator_exit(elevator_t *); extern int elv_rq_merge_ok(struct request *, struct bio *); extern int elv_try_merge(struct request *, struct bio *); extern int elv_try_last_merge(request_queue_t *, struct bio *); _