From: Nick Piggin This patch fixes the request batching fairness/starvation issue. Its not clear what is going on with 2.4, but it seems that its a problem around this area. Anyway, previously: * request queue fills up * process 1 calls get_request, sleeps * a couple of requests are freed * process 2 calls get_request, proceeds * a couple of requests are freed * process 2 calls get_request... Now as unlikely as it seems, it could be a problem. Its a fairness problem that process 2 can skip ahead of process 1 anyway. With the patch: * request queue fills up * any process calling get_request will sleep * once the queue gets below the batch watermark, processes start being worken, and may allocate. drivers/block/ll_rw_blk.c | 41 ++++++++++++++++++++++++++++++++++++----- include/linux/blkdev.h | 26 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff -puN drivers/block/ll_rw_blk.c~blk-fair-batches drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~blk-fair-batches 2003-06-14 14:52:37.000000000 -0700 +++ 25-akpm/drivers/block/ll_rw_blk.c 2003-06-14 14:52:37.000000000 -0700 @@ -54,7 +54,7 @@ static wait_queue_head_t congestion_wqh[ static inline int batch_requests(struct request_queue *q) { - return q->nr_requests - min(q->nr_requests / 8, 8UL); + return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1; } /* @@ -1325,7 +1325,10 @@ static struct request *get_request(reque struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] >= q->nr_requests || !elv_may_queue(q, rw)) { + if (rl->count[rw] == q->nr_requests) + blk_set_queue_full(q, rw); + + if (blk_queue_full(q, rw) || !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } @@ -1340,6 +1343,15 @@ static struct request *get_request(reque rl->count[rw]--; if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); + + if (rl->count[rw] <= batch_requests(q)) { + if (rl->count[rw] == batch_requests(q)) + blk_clear_queue_full(q, rw); + + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + } + spin_unlock_irq(q->queue_lock); goto out; } @@ -1561,9 +1573,14 @@ void __blk_put_request(request_queue_t * rl->count[rw]--; if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if (rl->count[rw] < batch_requests(q) && - waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); + + if (rl->count[rw] <= batch_requests(q)) { + if (rl->count[rw] == batch_requests(q)) + blk_clear_queue_full(q, rw); + + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + } } } @@ -2424,6 +2441,20 @@ queue_requests_store(struct request_queu set_queue_congested(q, WRITE); else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) clear_queue_congested(q, WRITE); + + if (rl->count[READ] >= q->nr_requests) + blk_set_queue_full(q, READ); + else if (rl->count[READ] <= batch_requests(q)) { + blk_clear_queue_full(q, READ); + wake_up(&rl->wait[READ]); + } + + if (rl->count[WRITE] >= q->nr_requests) + blk_set_queue_full(q, WRITE); + else if (rl->count[WRITE] <= batch_requests(q)) { + blk_clear_queue_full(q, WRITE); + wake_up(&rl->wait[WRITE]); + } return ret; } diff -puN include/linux/blkdev.h~blk-fair-batches include/linux/blkdev.h --- 25/include/linux/blkdev.h~blk-fair-batches 2003-06-14 14:52:37.000000000 -0700 +++ 25-akpm/include/linux/blkdev.h 2003-06-14 14:52:37.000000000 -0700 @@ -307,6 +307,8 @@ struct request_queue #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ +#define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ +#define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ #define blk_queue_plugged(q) !list_empty(&(q)->plug_list) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) @@ -322,6 +324,30 @@ struct request_queue #define rq_data_dir(rq) ((rq)->flags & 1) +static inline int blk_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + +static inline void blk_set_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + else + set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + +static inline void blk_clear_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + + /* * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may * it already be started by driver. _