From: Jens Axboe <axboe@suse.de>

There's a small discrepancy in when we decide to unplug a queue based on
q->unplug_thresh.  Basically it doesn't work for tagged queues, since
q->rq.count[READ] + q->rq.count[WRITE] is just the number of allocated
requests, not the number of requests stuck in the io scheduler.  We could
just change the nr_queued == to a nr_queued >=, however that is still
suboptimal.

This patch adds accounting for requests that have been dequeued from the io
scheduler, but not freed yet.  These are q->in_flight.  allocated_requests
- q->in_flight == requests_in_scheduler.  So the condition correctly
becomes

	if (requests_in_scheduler == q->unplug_thresh)

instead.  I did a quick round of testing, and for dbench on a SCSI disk the
number of timer induced unplugs was reduced from 13 to 5 :-).  Not a huge
number, but there might be cases where it's more significant.  Either way,
it gets ->unplug_thresh always right, which the old logic didn't.


---

 25-akpm/drivers/block/elevator.c  |   29 +++++++++++++++++++++++++++++
 25-akpm/drivers/block/ll_rw_blk.c |    4 ++--
 25-akpm/include/linux/blkdev.h    |    5 +++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff -puN drivers/block/elevator.c~correct-unplugs-on-nr_queued drivers/block/elevator.c
--- 25/drivers/block/elevator.c~correct-unplugs-on-nr_queued	Thu Apr  8 15:18:57 2004
+++ 25-akpm/drivers/block/elevator.c	Thu Apr  8 15:18:57 2004
@@ -150,6 +150,15 @@ void elv_merge_requests(request_queue_t 
 void elv_requeue_request(request_queue_t *q, struct request *rq)
 {
 	/*
+	 * it already went through dequeue, we need to decrement the
+	 * in_flight count again
+	 */
+	if (blk_account_rq(rq)) {
+		WARN_ON(q->in_flight == 0);
+		q->in_flight--;
+	}
+
+	/*
 	 * if iosched has an explicit requeue hook, then use that. otherwise
 	 * just put the request at the front of the queue
 	 */
@@ -233,6 +242,18 @@ void elv_remove_request(request_queue_t 
 	elevator_t *e = &q->elevator;
 
 	/*
+	 * the time frame between a request being removed from the lists
+	 * and to it is freed is accounted as io that is in progress at
+	 * the driver side. note that we only account requests that the
+	 * driver has seen (REQ_STARTED set), to avoid false accounting
+	 * for request-request merges
+	 */
+	if (blk_account_rq(rq)) {
+		q->in_flight++;
+		WARN_ON(q->in_flight > 2 * q->nr_requests);
+	}
+
+	/*
 	 * the main clearing point for q->last_merge is on retrieval of
 	 * request by driver (it calls elv_next_request()), but it _can_
 	 * also happen here if a request is added to the queue but later
@@ -321,6 +342,14 @@ void elv_completed_request(request_queue
 {
 	elevator_t *e = &q->elevator;
 
+	/*
+	 * request is released from the driver, io must be done
+	 */
+	if (blk_account_rq(rq)) {
+		WARN_ON(q->in_flight == 0);
+		q->in_flight--;
+	}
+
 	if (e->elevator_completed_req_fn)
 		e->elevator_completed_req_fn(q, rq);
 }
diff -puN drivers/block/ll_rw_blk.c~correct-unplugs-on-nr_queued drivers/block/ll_rw_blk.c
--- 25/drivers/block/ll_rw_blk.c~correct-unplugs-on-nr_queued	Thu Apr  8 15:18:57 2004
+++ 25-akpm/drivers/block/ll_rw_blk.c	Thu Apr  8 15:18:57 2004
@@ -2278,9 +2278,9 @@ out:
 		__blk_put_request(q, freereq);
 
 	if (blk_queue_plugged(q)) {
-		int nr_queued = q->rq.count[READ] + q->rq.count[WRITE];
+		int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight;
 
-		if (nr_queued == q->unplug_thresh || bio_sync(bio))
+		if (nrq == q->unplug_thresh || bio_sync(bio))
 			__generic_unplug_device(q);
 	}
 	spin_unlock_irq(q->queue_lock);
diff -puN include/linux/blkdev.h~correct-unplugs-on-nr_queued include/linux/blkdev.h
--- 25/include/linux/blkdev.h~correct-unplugs-on-nr_queued	Thu Apr  8 15:18:57 2004
+++ 25-akpm/include/linux/blkdev.h	Thu Apr  8 15:18:57 2004
@@ -348,6 +348,8 @@ struct request_queue
 
 	atomic_t		refcnt;
 
+	unsigned int		in_flight;
+
 	/*
 	 * sg stuff
 	 */
@@ -377,6 +379,9 @@ struct request_queue
 #define blk_fs_request(rq)	((rq)->flags & REQ_CMD)
 #define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
 #define blk_noretry_request(rq)	((rq)->flags & REQ_FAILFAST)
+#define blk_rq_started(rq)	((rq)->flags & REQ_STARTED)
+
+#define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
 
 #define blk_pm_suspend_request(rq)	((rq)->flags & REQ_PM_SUSPEND)
 #define blk_pm_resume_request(rq)	((rq)->flags & REQ_PM_RESUME)

_