From: Nick Piggin The following patch gets batching working how it should be. After a process is woken up, it is allowed to allocate up to 32 requests for 20ms. It does not stop other processes submitting requests if it isn't submitting though. This should allow less context switches, and allow batches of requests from each process to be sent to the io scheduler instead of 1 request from each process. tiobench sequential writes are more than tripled, random writes are nearly doubled over mm1. In earlier tests I generally saw better CPU efficiency but it doesn't show here. There is still debug to be taken out. Its also only on UP. Avg Maximum Lat% Lat% CPU Identifier Rate (CPU%) Latency Latency >2s >10s Eff ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 11.13 3.783% 46.10 24668.01 0.84 0.02 294 +2.5.71-mm1 13.21 4.489% 37.37 5691.66 0.76 0.00 294 Random Reads ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 0.97 0.582% 519.86 6444.66 11.93 0.00 167 +2.5.71-mm1 1.01 0.604% 484.59 6604.93 10.73 0.00 167 Sequential Writes ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 4.85 4.456% 77.80 99359.39 0.18 0.13 109 +2.5.71-mm1 14.11 14.19% 10.07 22805.47 0.09 0.04 99 Random Writes ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 0.46 0.371% 14.48 6173.90 0.23 0.00 125 +2.5.71-mm1 0.86 0.744% 24.08 8753.66 0.31 0.00 115 drivers/block/ll_rw_blk.c | 94 ++++++++++++++++++++++++++++++++++------------ include/linux/blkdev.h | 6 ++ 2 files changed, 76 insertions(+), 24 deletions(-) diff -puN drivers/block/ll_rw_blk.c~blk-request-batching drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~blk-request-batching 2003-06-18 00:17:23.000000000 -0700 +++ 25-akpm/drivers/block/ll_rw_blk.c 2003-06-18 00:17:23.000000000 -0700 @@ -51,10 +51,11 @@ static struct workqueue_struct *kblockd_ unsigned long blk_max_low_pfn, blk_max_pfn; -static inline int batch_requests(struct request_queue *q) -{ - return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1; -} +/* Amount of time in which a process may batch requests */ +#define BLK_BATCH_TIME (HZ/50UL) + +/* Number of requests a "batching" process may submit */ +#define BLK_BATCH_REQ 32 /* * Return the threshold (number of used requests) at which the queue is @@ -1309,24 +1310,56 @@ static inline struct request *blk_alloc_ return NULL; } +/* + * ioc_batching returns true if the ioc is a valid batching request and + * should be given priority access to a request. + */ +static inline int ioc_batching(struct io_context *ioc) +{ + if (!ioc) + return 0; + + return ioc->nr_batch_requests == BLK_BATCH_REQ || + (ioc->nr_batch_requests > 0 + && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); +} + +/* + * ioc_set_batching sets ioc to be a new "batcher" if it is not one + */ +void ioc_set_batching(struct io_context *ioc) +{ + if (!ioc || ioc_batching(ioc)) + return; + + ioc->nr_batch_requests = BLK_BATCH_REQ; + ioc->last_waited = jiffies; +} + #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must not be held */ -static struct request * -get_request(request_queue_t *q, int rw, int gfp_mask, int force) +static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; + struct io_context *ioc = get_io_context(); spin_lock_irq(q->queue_lock); - if (rl->count[rw] == q->nr_requests) - blk_set_queue_full(q, rw); - - if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) { + if (rl->count[rw]+1 >= q->nr_requests) { + if (!blk_queue_full(q, rw)) { + ioc_set_batching(ioc); + blk_set_queue_full(q, rw); + } + } + + if (blk_queue_full(q, rw) + && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } + rl->count[rw]++; if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); @@ -1339,10 +1372,11 @@ get_request(request_queue_t *q, int rw, if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if (rl->count[rw] <= batch_requests(q)) { + if (rl->count[rw]+1 <= q->nr_requests) { + smp_mb(); if (waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); - else + if (!waitqueue_active(&rl->wait[rw])) blk_clear_queue_full(q, rw); } @@ -1371,6 +1405,7 @@ get_request(request_queue_t *q, int rw, rq->sense = NULL; out: + put_io_context(ioc); return rq; } @@ -1382,7 +1417,6 @@ static struct request *get_request_wait( { DEFINE_WAIT(wait); struct request *rq; - int waited = 0; generic_unplug_device(q); do { @@ -1391,11 +1425,15 @@ static struct request *get_request_wait( prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - rq = get_request(q, rw, GFP_NOIO, waited); + rq = get_request(q, rw, GFP_NOIO); if (!rq) { + struct io_context *ioc; + io_schedule(); - waited = 1; + ioc = get_io_context(); + ioc_set_batching(ioc); + put_io_context(ioc); } finish_wait(&rl->wait[rw], &wait); } while (!rq); @@ -1412,7 +1450,7 @@ struct request *blk_get_request(request_ if (gfp_mask & __GFP_WAIT) rq = get_request_wait(q, rw); else - rq = get_request(q, rw, gfp_mask, 0); + rq = get_request(q, rw, gfp_mask); return rq; } @@ -1564,10 +1602,11 @@ void __blk_put_request(request_queue_t * if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if (rl->count[rw] <= batch_requests(q)) { + if (rl->count[rw]+1 <= q->nr_requests) { + smp_mb(); if (waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); - else + if (!waitqueue_active(&rl->wait[rw])) blk_clear_queue_full(q, rw); } } @@ -1812,7 +1851,7 @@ get_rq: freereq = NULL; } else { spin_unlock_irq(q->queue_lock); - if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) { + if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { /* * READA bit set */ @@ -2383,6 +2422,8 @@ int __init blk_dev_init(void) } +static atomic_t nr_io_contexts = ATOMIC_INIT(0); + /* * IO Context helper functions */ @@ -2397,6 +2438,7 @@ void put_io_context(struct io_context *i if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); kfree(ioc); + atomic_dec(&nr_io_contexts); } } @@ -2413,7 +2455,8 @@ void exit_io_context(void) ioc->aic->exit(ioc->aic); put_io_context(ioc); current->io_context = NULL; - } + } else + WARN_ON(1); local_irq_restore(flags); } @@ -2436,8 +2479,11 @@ struct io_context *get_io_context(void) if (ret == NULL) { ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (ret) { + atomic_inc(&nr_io_contexts); atomic_set(&ret->refcount, 1); ret->pid = tsk->pid; + ret->last_waited = jiffies; /* doesn't matter... */ + ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; tsk->io_context = ret; } @@ -2519,16 +2565,16 @@ queue_requests_store(struct request_queu if (rl->count[READ] >= q->nr_requests) blk_set_queue_full(q, READ); - else if (rl->count[READ] <= batch_requests(q)) { + else if (rl->count[READ]+1 <= q->nr_requests) { blk_clear_queue_full(q, READ); - wake_up_all(&rl->wait[READ]); + wake_up(&rl->wait[READ]); } if (rl->count[WRITE] >= q->nr_requests) blk_set_queue_full(q, WRITE); - else if (rl->count[WRITE] <= batch_requests(q)) { + else if (rl->count[WRITE]+1 <= q->nr_requests) { blk_clear_queue_full(q, WRITE); - wake_up_all(&rl->wait[WRITE]); + wake_up(&rl->wait[WRITE]); } return ret; diff -puN include/linux/blkdev.h~blk-request-batching include/linux/blkdev.h --- 25/include/linux/blkdev.h~blk-request-batching 2003-06-18 00:17:23.000000000 -0700 +++ 25-akpm/include/linux/blkdev.h 2003-06-18 00:17:23.000000000 -0700 @@ -59,6 +59,12 @@ struct io_context { atomic_t refcount; pid_t pid; + /* + * For request batching + */ + unsigned long last_waited; /* Time last woken after wait for request */ + int nr_batch_requests; /* Number of requests left in the batch */ + struct as_io_context *aic; }; _