diff -urN get_request-ref/drivers/block/ll_rw_blk.c get_request-/drivers/block/ll_rw_blk.c --- get_request-ref/drivers/block/ll_rw_blk.c Wed Feb 20 02:18:47 2002 +++ get_request-/drivers/block/ll_rw_blk.c Wed Feb 20 02:29:39 2002 @@ -126,10 +126,14 @@ char * blkdev_varyio[MAX_BLKDEV]; /* - * How many reqeusts do we allocate per queue, - * and how many do we "batch" on freeing them? + * The total number of requests in each queue. */ -static int queue_nr_requests, batch_requests; +static int queue_nr_requests; + +/* + * The threshold around which we make wakeup decisions + */ +static int batch_requests; unsigned long blk_max_low_pfn, blk_max_pfn; int blk_nohighio = 0; @@ -414,7 +418,8 @@ q->rq[i&1].count++; } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->wait_for_requests[0]); + init_waitqueue_head(&q->wait_for_requests[1]); spin_lock_init(&q->queue_lock); } @@ -482,9 +487,9 @@ #define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); /* * Get a free request. io_request_lock must be held and interrupts - * disabled on the way in. + * disabled on the way in. Returns NULL if there are no free requests. */ -static inline struct request *get_request(request_queue_t *q, int rw) +static struct request *get_request(request_queue_t *q, int rw) { struct request *rq = NULL; struct request_list *rl = q->rq + rw; @@ -503,40 +508,84 @@ } /* - * No available requests for this queue, unplug the device. + * Here's the request allocation design: + * + * 1: Blocking on request exhaustion is a key part of I/O throttling. + * + * 2: We want to be `fair' to all requesters. We must avoid starvation, and + * attempt to ensure that all requesters sleep for a similar duration. Hence + * no stealing requests when there are other processes waiting. + * + * 3: We also wish to support `batching' of requests. So when a process is + * woken, we want to allow it to allocate a decent number of requests + * before it blocks again, so they can be nicely merged (this only really + * matters if the process happens to be adding requests near the head of + * the queue). + * + * 4: We want to avoid scheduling storms. This isn't really important, because + * the system will be I/O bound anyway. But it's easy. + * + * There is tension between requirements 2 and 3. Once a task has woken, + * we don't want to allow it to sleep as soon as it takes its second request. + * But we don't want currently-running tasks to steal all the requests + * from the sleepers. We handle this with wakeup hysteresis around + * 0 .. batch_requests and with the assumption that request taking is much, + * much faster than request freeing. + * + * So here's what we do: + * + * a) A READA requester fails if free_requests < batch_requests + * + * We don't want READA requests to prevent sleepers from ever + * waking. Note that READA is used extremely rarely - a few + * filesystems use it for directory readahead. + * + * When a process wants a new request: + * + * b) If free_requests == 0, the requester sleeps in FIFO manner. + * + * b) If 0 < free_requests < batch_requests and there are waiters, + * we still take a request non-blockingly. This provides batching. + * + * c) If free_requests >= batch_requests, the caller is immediately + * granted a new request. + * + * When a request is released: + * + * d) If free_requests < batch_requests, do nothing. + * + * f) If free_requests >= batch_requests, wake up a single waiter. + * + * The net effect is that when a process is woken at the batch_requests level, + * it will be able to take approximately (batch_requests) requests before + * blocking again (at the tail of the queue). + * + * This all assumes that the rate of taking requests is much, much higher + * than the rate of releasing them. Which is very true. + * + * -akpm, Feb 2002. */ + static struct request *__get_request_wait(request_queue_t *q, int rw) { register struct request *rq; DECLARE_WAITQUEUE(wait, current); generic_unplug_device(q); - add_wait_queue(&q->wait_for_request, &wait); + add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - if (q->rq[rw].count < batch_requests) + if (q->rq[rw].count == 0) schedule(); spin_lock_irq(&io_request_lock); rq = get_request(q, rw); spin_unlock_irq(&io_request_lock); } while (rq == NULL); - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->wait_for_requests[rw], &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -611,7 +660,7 @@ /* * Must be called with io_request_lock held and interrupts disabled */ -inline void blkdev_release_request(struct request *req) +void blkdev_release_request(struct request *req) { request_queue_t *q = req->q; int rw = req->cmd; @@ -625,8 +674,9 @@ */ if (q) { list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) - wake_up(&q->wait_for_request); + if (++q->rq[rw].count >= batch_requests && + waitqueue_active(&q->wait_for_requests[rw])) + wake_up(&q->wait_for_requests[rw]); } } @@ -810,22 +860,30 @@ BUG(); } - /* - * Grab a free request from the freelist - if that is empty, check - * if we are doing read ahead and abort instead of blocking for - * a free slot. - */ get_rq: if (freereq) { req = freereq; freereq = NULL; - } else if ((req = get_request(q, rw)) == NULL) { - spin_unlock_irq(&io_request_lock); - if (rw_ahead) - goto end_io; - - freereq = __get_request_wait(q, rw); - goto again; + } else { + /* + * See description above __get_request_wait() + */ + if (rw_ahead) { + if (q->rq[rw].count < batch_requests) { + spin_unlock_irq(&io_request_lock); + goto end_io; + } + req = get_request(q, rw); + if (req == NULL) + BUG(); + } else { + req = get_request(q, rw); + if (req == NULL) { + spin_unlock_irq(&io_request_lock); + freereq = __get_request_wait(q, rw); + goto again; + } + } } /* fill up the request-info, and add it to the queue */ diff -urN get_request-ref/include/linux/blkdev.h get_request-/include/linux/blkdev.h --- get_request-ref/include/linux/blkdev.h Wed Feb 20 02:18:47 2002 +++ get_request-/include/linux/blkdev.h Wed Feb 20 02:24:12 2002 @@ -123,9 +123,9 @@ spinlock_t queue_lock; /* - * Tasks wait here for free request + * Tasks wait here for free read and write requests */ - wait_queue_head_t wait_for_request; + wait_queue_head_t wait_for_requests[2]; }; extern unsigned long blk_max_low_pfn, blk_max_pfn;