From: Jens Axboe There's a small discrepancy in when we decide to unplug a queue based on q->unplug_thresh. Basically it doesn't work for tagged queues, since q->rq.count[READ] + q->rq.count[WRITE] is just the number of allocated requests, not the number of requests stuck in the io scheduler. We could just change the nr_queued == to a nr_queued >=, however that is still suboptimal. This patch adds accounting for requests that have been dequeued from the io scheduler, but not freed yet. These are q->in_flight. allocated_requests - q->in_flight == requests_in_scheduler. So the condition correctly becomes if (requests_in_scheduler == q->unplug_thresh) instead. I did a quick round of testing, and for dbench on a SCSI disk the number of timer induced unplugs was reduced from 13 to 5 :-). Not a huge number, but there might be cases where it's more significant. Either way, it gets ->unplug_thresh always right, which the old logic didn't. --- 25-akpm/drivers/block/elevator.c | 29 +++++++++++++++++++++++++++++ 25-akpm/drivers/block/ll_rw_blk.c | 4 ++-- 25-akpm/include/linux/blkdev.h | 5 +++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff -puN drivers/block/elevator.c~correct-unplugs-on-nr_queued drivers/block/elevator.c --- 25/drivers/block/elevator.c~correct-unplugs-on-nr_queued Thu Apr 8 15:18:57 2004 +++ 25-akpm/drivers/block/elevator.c Thu Apr 8 15:18:57 2004 @@ -150,6 +150,15 @@ void elv_merge_requests(request_queue_t void elv_requeue_request(request_queue_t *q, struct request *rq) { /* + * it already went through dequeue, we need to decrement the + * in_flight count again + */ + if (blk_account_rq(rq)) { + WARN_ON(q->in_flight == 0); + q->in_flight--; + } + + /* * if iosched has an explicit requeue hook, then use that. otherwise * just put the request at the front of the queue */ @@ -233,6 +242,18 @@ void elv_remove_request(request_queue_t elevator_t *e = &q->elevator; /* + * the time frame between a request being removed from the lists + * and to it is freed is accounted as io that is in progress at + * the driver side. note that we only account requests that the + * driver has seen (REQ_STARTED set), to avoid false accounting + * for request-request merges + */ + if (blk_account_rq(rq)) { + q->in_flight++; + WARN_ON(q->in_flight > 2 * q->nr_requests); + } + + /* * the main clearing point for q->last_merge is on retrieval of * request by driver (it calls elv_next_request()), but it _can_ * also happen here if a request is added to the queue but later @@ -321,6 +342,14 @@ void elv_completed_request(request_queue { elevator_t *e = &q->elevator; + /* + * request is released from the driver, io must be done + */ + if (blk_account_rq(rq)) { + WARN_ON(q->in_flight == 0); + q->in_flight--; + } + if (e->elevator_completed_req_fn) e->elevator_completed_req_fn(q, rq); } diff -puN drivers/block/ll_rw_blk.c~correct-unplugs-on-nr_queued drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~correct-unplugs-on-nr_queued Thu Apr 8 15:18:57 2004 +++ 25-akpm/drivers/block/ll_rw_blk.c Thu Apr 8 15:18:57 2004 @@ -2278,9 +2278,9 @@ out: __blk_put_request(q, freereq); if (blk_queue_plugged(q)) { - int nr_queued = q->rq.count[READ] + q->rq.count[WRITE]; + int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight; - if (nr_queued == q->unplug_thresh || bio_sync(bio)) + if (nrq == q->unplug_thresh || bio_sync(bio)) __generic_unplug_device(q); } spin_unlock_irq(q->queue_lock); diff -puN include/linux/blkdev.h~correct-unplugs-on-nr_queued include/linux/blkdev.h --- 25/include/linux/blkdev.h~correct-unplugs-on-nr_queued Thu Apr 8 15:18:57 2004 +++ 25-akpm/include/linux/blkdev.h Thu Apr 8 15:18:57 2004 @@ -348,6 +348,8 @@ struct request_queue atomic_t refcnt; + unsigned int in_flight; + /* * sg stuff */ @@ -377,6 +379,9 @@ struct request_queue #define blk_fs_request(rq) ((rq)->flags & REQ_CMD) #define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) #define blk_noretry_request(rq) ((rq)->flags & REQ_FAILFAST) +#define blk_rq_started(rq) ((rq)->flags & REQ_STARTED) + +#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) #define blk_pm_suspend_request(rq) ((rq)->flags & REQ_PM_SUSPEND) #define blk_pm_resume_request(rq) ((rq)->flags & REQ_PM_RESUME) _