[PATCH] possible rq starvation on oom

I stumbled across this the other day. The block layer only uses a single memory pool for request allocation, so it's very possible for eg writes to have allocated them all at any point in time. If that is the case and the machine is low on memory, a reader attempting to allocate a request and failing in blk_alloc_request() can get stuck for a long time since no one is there to wake it up. The solution is either to add the extra mempool so both reads and writes have one, or attempt to handle the situation. I chose the latter, to save the extra memory required for the additional mempool with BLKDEV_MIN_RQ statically allocated requests per-queue. If a read allocation fails and we have no readers in flight for this queue, mark us rq-starved so that the next write being freed will wake up the sleeping reader(s). Same situation would happen for writes as well of course, it's just a lot more unlikely. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Jens Axboe <axboe@suse.de> 2005-01-12 17:00:45 -0800
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-01-12 17:00:45 -0800
commit: 80ce63d3a9a3ca3bcd0232caeb3bfb127af5ad35 (patch)
tree: cb08dce7e0495e07c979f8481c8d05edd5eadc9f /drivers
parent: 6ddb58de00dfff82ccd08d9d14ab3c615ce52456 (diff)
download: history-80ce63d3a9a3ca3bcd0232caeb3bfb127af5ad35.tar.gz
1 files changed, 40 insertions, 11 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 98db1776ba31b0..3994af7e555b14 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1438,6 +1438,7 @@ static int blk_init_free_list(request_queue_t *q)
 	struct request_list *rl = &q->rq;
 
 	rl->count[READ] = rl->count[WRITE] = 0;
+	rl->starved[READ] = rl->starved[WRITE] = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
 	init_waitqueue_head(&rl->drain);
@@ -1618,6 +1619,22 @@ void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
 	ioc->last_waited = jiffies;
 }
 
+static void __freed_request(request_queue_t *q, int rw)
+{
+	struct request_list *rl = &q->rq;
+
+	if (rl->count[rw] < queue_congestion_off_threshold(q))
+		clear_queue_congested(q, rw);
+
+	if (rl->count[rw] + 1 <= q->nr_requests) {
+		smp_mb();
+		if (waitqueue_active(&rl->wait[rw]))
+			wake_up(&rl->wait[rw]);
+
+		blk_clear_queue_full(q, rw);
+	}
+}
+
 /*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
@@ -1627,17 +1644,17 @@ static void freed_request(request_queue_t *q, int rw)
 	struct request_list *rl = &q->rq;
 
 	rl->count[rw]--;
-	if (rl->count[rw] < queue_congestion_off_threshold(q))
-		clear_queue_congested(q, rw);
-	if (rl->count[rw]+1 <= q->nr_requests) {
+
+	__freed_request(q, rw);
+
+	if (unlikely(rl->starved[rw ^ 1]))
+		__freed_request(q, rw ^ 1);
+
+	if (!rl->count[READ] && !rl->count[WRITE]) {
 		smp_mb();
-		if (waitqueue_active(&rl->wait[rw]))
-			wake_up(&rl->wait[rw]);
-		blk_clear_queue_full(q, rw);
+		if (unlikely(waitqueue_active(&rl->drain)))
+			wake_up(&rl->drain);
 	}
-	if (unlikely(waitqueue_active(&rl->drain)) &&
-	    !rl->count[READ] && !rl->count[WRITE])
-		wake_up(&rl->drain);
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1669,8 +1686,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 
 	switch (elv_may_queue(q, rw)) {
 		case ELV_MQUEUE_NO:
-			spin_unlock_irq(q->queue_lock);
-			goto out;
+			goto rq_starved;
 		case ELV_MQUEUE_MAY:
 			break;
 		case ELV_MQUEUE_MUST:
@@ -1688,6 +1704,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 
 get_rq:
 	rl->count[rw]++;
+	rl->starved[rw] = 0;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
 	spin_unlock_irq(q->queue_lock);
@@ -1703,6 +1720,18 @@ get_rq:
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, rw);
+
+		/*
+		 * in the very unlikely event that allocation failed and no
+		 * requests for this direction was pending, mark us starved
+		 * so that freeing of a request in the other direction will
+		 * notice us. another possible fix would be to split the
+		 * rq mempool into READ and WRITE
+		 */
+rq_starved:
+		if (unlikely(rl->count[rw] == 0))
+			rl->starved[rw] = 1;
+
 		spin_unlock_irq(q->queue_lock);
 		goto out;
 	}
author	Jens Axboe <axboe@suse.de>	2005-01-12 17:00:45 -0800
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-01-12 17:00:45 -0800
commit	80ce63d3a9a3ca3bcd0232caeb3bfb127af5ad35 (patch)
tree	cb08dce7e0495e07c979f8481c8d05edd5eadc9f /drivers
parent	6ddb58de00dfff82ccd08d9d14ab3c615ce52456 (diff)
download	history-80ce63d3a9a3ca3bcd0232caeb3bfb127af5ad35.tar.gz