diff -urNp x-ref/drivers/block/cciss.c x/drivers/block/cciss.c --- x-ref/drivers/block/cciss.c 2002-12-19 01:29:06.000000000 +0100 +++ x/drivers/block/cciss.c 2002-12-19 01:29:35.000000000 +0100 @@ -1999,14 +1999,14 @@ static void start_io( ctlr_info_t *h) } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers(struct request * req, struct buffer_head *bh, int status) { struct buffer_head *xbh; while(bh) { xbh = bh->b_reqnext; bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); + blk_finished_io(req, bh->b_size >> 9); bh->b_end_io(bh, status); bh = xbh; } @@ -2149,7 +2149,7 @@ static inline void complete_command( ctl pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, ddir); } - complete_buffers(cmd->rq->bh, status); + complete_buffers(cmd->rq, cmd->rq->bh, status); #ifdef CCISS_DEBUG printk("Done with %p\n", cmd->rq); #endif /* CCISS_DEBUG */ @@ -2233,7 +2233,7 @@ next: printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq, creq->bh, 0); end_that_request_last(creq); goto startio; } diff -urNp x-ref/drivers/block/cpqarray.c x/drivers/block/cpqarray.c --- x-ref/drivers/block/cpqarray.c 2002-12-19 01:29:04.000000000 +0100 +++ x/drivers/block/cpqarray.c 2002-12-19 01:29:19.000000000 +0100 @@ -169,7 +169,7 @@ static void start_io(ctlr_info_t *h); static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct request * req, struct buffer_head *bh, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -981,7 +981,7 @@ next: printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq, creq->bh, 0); end_that_request_last(creq); goto startio; } @@ -1082,14 +1082,14 @@ static void start_io(ctlr_info_t *h) } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct request * req, struct buffer_head *bh, int ok) { struct buffer_head *xbh; while(bh) { xbh = bh->b_reqnext; bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); + blk_finished_io(req, bh->b_size >> 9); bh->b_end_io(bh, ok); bh = xbh; @@ -1131,7 +1131,7 @@ static inline void complete_command(cmdl (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } - complete_buffers(cmd->rq->bh, ok); + complete_buffers(cmd->rq, cmd->rq->bh, ok); DBGPX(printk("Done with %p\n", cmd->rq);); req_finished_io(cmd->rq); end_that_request_last(cmd->rq); diff -urNp x-ref/drivers/block/ll_rw_blk.c x/drivers/block/ll_rw_blk.c --- x-ref/drivers/block/ll_rw_blk.c 2002-12-19 01:29:17.000000000 +0100 +++ x/drivers/block/ll_rw_blk.c 2002-12-19 01:29:19.000000000 +0100 @@ -183,11 +183,12 @@ void blk_cleanup_queue(request_queue_t * { int count = q->nr_requests; - count -= __blk_cleanup_queue(&q->rq[READ]); - count -= __blk_cleanup_queue(&q->rq[WRITE]); + count -= __blk_cleanup_queue(&q->rq); if (count) printk("blk_cleanup_queue: leaked requests (%d)\n", count); + if (atomic_read(&q->nr_sectors)) + printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors)); memset(q, 0, sizeof(*q)); } @@ -396,7 +397,7 @@ void generic_unplug_device(void *data) * * Returns the (new) number of requests which the queue has available. */ -int blk_grow_request_list(request_queue_t *q, int nr_requests) +int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors) { unsigned long flags; /* Several broken drivers assume that this function doesn't sleep, @@ -414,13 +415,15 @@ int blk_grow_request_list(request_queue_ memset(rq, 0, sizeof(*rq)); rq->rq_status = RQ_INACTIVE; rw = q->nr_requests & 1; - list_add(&rq->queue, &q->rq[rw].free); - q->rq[rw].count++; + list_add(&rq->queue, &q->rq.free); + q->rq.count++; q->nr_requests++; } - q->batch_requests = q->nr_requests / 4; - if (q->batch_requests > 32) - q->batch_requests = 32; + q->batch_requests = q->nr_requests; + q->max_queue_sectors = max_queue_sectors; + q->batch_sectors = max_queue_sectors / 2; + BUG_ON(!q->batch_sectors); + atomic_set(&q->nr_sectors, 0); spin_unlock_irqrestore(q->queue_lock, flags); return q->nr_requests; } @@ -429,23 +432,26 @@ static void blk_init_free_list(request_q { struct sysinfo si; int megs; /* Total memory, in megabytes */ - int nr_requests; + int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS; - INIT_LIST_HEAD(&q->rq[READ].free); - INIT_LIST_HEAD(&q->rq[WRITE].free); - q->rq[READ].count = 0; - q->rq[WRITE].count = 0; + INIT_LIST_HEAD(&q->rq.free); + q->rq.count = 0; q->nr_requests = 0; si_meminfo(&si); megs = si.totalram >> (20 - PAGE_SHIFT); - nr_requests = 128; - if (megs < 32) + nr_requests = MAX_NR_REQUESTS; + if (megs < 30) { nr_requests /= 2; - blk_grow_request_list(q, nr_requests); + max_queue_sectors /= 2; + } + /* notice early if anybody screwed the defaults */ + BUG_ON(!nr_requests); + BUG_ON(!max_queue_sectors); + + blk_grow_request_list(q, nr_requests, max_queue_sectors); - init_waitqueue_head(&q->wait_for_requests[0]); - init_waitqueue_head(&q->wait_for_requests[1]); + init_waitqueue_head(&q->wait_for_requests); } static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); @@ -514,11 +520,16 @@ void blk_init_queue(request_queue_t * q, * Get a free request. io_request_lock must be held and interrupts * disabled on the way in. Returns NULL if there are no free requests. */ +static struct request * FASTCALL(get_request(request_queue_t *q, int rw)); static struct request *get_request(request_queue_t *q, int rw) { struct request *rq = NULL; - struct request_list *rl = q->rq + rw; + struct request_list *rl; + + if (blk_oversized_queue(q)) + goto out; + rl = &q->rq; if (!list_empty(&rl->free)) { rq = blkdev_free_rq(&rl->free); list_del(&rq->queue); @@ -529,6 +540,7 @@ static struct request *get_request(reque rq->q = q; } + out: return rq; } @@ -596,10 +608,10 @@ static struct request *__get_request_wai register struct request *rq; DECLARE_WAITQUEUE(wait, current); - add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); + add_wait_queue_exclusive(&q->wait_for_requests, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - if (q->rq[rw].count == 0) { + if (q->rq.count == 0 || blk_oversized_queue(q)) { /* * All we care about is not to stall if any request * is been released after we set TASK_UNINTERRUPTIBLE. @@ -614,7 +626,7 @@ static struct request *__get_request_wai rq = get_request(q, rw); spin_unlock_irq(q->queue_lock); } while (rq == NULL); - remove_wait_queue(&q->wait_for_requests[rw], &wait); + remove_wait_queue(&q->wait_for_requests, &wait); current->state = TASK_RUNNING; return rq; } @@ -626,8 +638,8 @@ static void get_request_wait_wakeup(requ * generic_unplug_device while our __get_request_wait was running * w/o the queue_lock held and w/ our request out of the queue. */ - if (waitqueue_active(&q->wait_for_requests[rw])) - wake_up(&q->wait_for_requests[rw]); + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); } /* RO fail safe mechanism */ @@ -843,7 +855,6 @@ static inline void add_request(request_q void blkdev_release_request(struct request *req) { request_queue_t *q = req->q; - int rw = req->cmd; req->rq_status = RQ_INACTIVE; req->q = NULL; @@ -853,11 +864,11 @@ void blkdev_release_request(struct reque * assume it has free buffers and check waiters */ if (q) { - list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= q->batch_requests) { + list_add(&req->queue, &q->rq.free); + if (++q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) { smp_mb(); - if (waitqueue_active(&q->wait_for_requests[rw])) - wake_up(&q->wait_for_requests[rw]); + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); } } } @@ -1003,7 +1014,7 @@ again: req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); + blk_started_io(req, count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); req_new_io(req, 1, count); attempt_back_merge(q, req, max_sectors, max_segments); @@ -1025,7 +1036,7 @@ again: req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); + blk_started_io(req, count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); req_new_io(req, 1, count); attempt_front_merge(q, head, req, max_sectors, max_segments); @@ -1058,7 +1069,7 @@ get_rq: * See description above __get_request_wait() */ if (rw_ahead) { - if (q->rq[rw].count < q->batch_requests) { + if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) { spin_unlock_irq(q->queue_lock); goto end_io; } @@ -1094,7 +1105,7 @@ get_rq: req->rq_dev = bh->b_rdev; req->start_time = jiffies; req_new_io(req, 0, count); - blk_started_io(count); + blk_started_io(req, count); add_request(q, req, insert_here); out: if (freereq) @@ -1387,7 +1398,7 @@ int end_that_request_first (struct reque if ((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; - blk_finished_io(nsect); + blk_finished_io(req, nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate); diff -urNp x-ref/drivers/scsi/scsi_lib.c x/drivers/scsi/scsi_lib.c --- x-ref/drivers/scsi/scsi_lib.c 2002-12-19 01:29:14.000000000 +0100 +++ x/drivers/scsi/scsi_lib.c 2002-12-19 01:29:19.000000000 +0100 @@ -384,7 +384,7 @@ static Scsi_Cmnd *__scsi_end_request(Scs do { if ((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; - blk_finished_io(nsect); + blk_finished_io(req, nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; sectors -= nsect; diff -urNp x-ref/include/linux/blkdev.h x/include/linux/blkdev.h --- x-ref/include/linux/blkdev.h 2002-12-19 01:29:14.000000000 +0100 +++ x/include/linux/blkdev.h 2002-12-19 01:29:19.000000000 +0100 @@ -80,7 +80,7 @@ struct request_queue /* * the queue request freelist, one for reads and one for writes */ - struct request_list rq[2]; + struct request_list rq; /* * The total number of requests on each queue @@ -93,6 +93,21 @@ struct request_queue int batch_requests; /* + * The total number of 512byte blocks on each queue + */ + atomic_t nr_sectors; + + /* + * Batching threshold for sleep/wakeup decisions + */ + int batch_sectors; + + /* + * The max number of 512byte blocks on each queue + */ + int max_queue_sectors; + + /* * Together with queue_head for cacheline sharing */ struct list_head queue_head; @@ -137,7 +152,7 @@ struct request_queue /* * Tasks wait here for free read and write requests */ - wait_queue_head_t wait_for_requests[2]; + wait_queue_head_t wait_for_requests; }; #define blk_queue_plugged(q) (q)->plugged @@ -221,7 +236,7 @@ extern void blkdev_release_request(struc /* * Access functions for manipulating queue properties */ -extern int blk_grow_request_list(request_queue_t *q, int nr_requests); +extern int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors); extern void blk_init_queue(request_queue_t *, request_fn_proc *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); @@ -245,6 +260,8 @@ extern char * blkdev_varyio[MAX_BLKDEV]; #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 +#define MAX_QUEUE_SECTORS (2 << (20 - 9)) /* 2 mbytes when full sized */ +#define MAX_NR_REQUESTS (MAX_QUEUE_SECTORS >> (10 - 9)) /* 1mbyte queue when all requests are 1k */ #define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) @@ -271,8 +288,40 @@ static inline int get_hardsect_size(kdev return retval; } -#define blk_finished_io(nsects) do { } while (0) -#define blk_started_io(nsects) do { } while (0) +static inline int blk_oversized_queue(request_queue_t * q) +{ + return atomic_read(&q->nr_sectors) > q->max_queue_sectors; +} + +static inline int blk_oversized_queue_batch(request_queue_t * q) +{ + return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors; +} + +static inline void blk_started_io(struct request * req, int nsects) +{ + request_queue_t * q = req->q; + + if (q) + atomic_add(nsects, &q->nr_sectors); + BUG_ON(atomic_read(&q->nr_sectors) < 0); +} + +static inline void blk_finished_io(struct request * req, int nsects) +{ + request_queue_t * q = req->q; + + /* special requests belongs to a null queue */ + if (q) { + atomic_sub(nsects, &q->nr_sectors); + if (q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) { + smp_mb(); + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); + } + } + BUG_ON(atomic_read(&q->nr_sectors) < 0); +} static inline unsigned int blksize_bits(unsigned int size) { diff -urNp x-ref/include/linux/elevator.h x/include/linux/elevator.h --- x-ref/include/linux/elevator.h 2002-11-29 02:23:18.000000000 +0100 +++ x/include/linux/elevator.h 2002-12-19 01:29:19.000000000 +0100 @@ -80,7 +80,7 @@ static inline int elevator_request_laten return latency; } -#define ELV_LINUS_SEEK_COST 16 +#define ELV_LINUS_SEEK_COST 1 #define ELEVATOR_NOOP \ ((elevator_t) { \ @@ -93,8 +93,8 @@ static inline int elevator_request_laten #define ELEVATOR_LINUS \ ((elevator_t) { \ - 2048, /* read passovers */ \ - 8192, /* write passovers */ \ + 128, /* read passovers */ \ + 512, /* write passovers */ \ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ diff -urNp x-ref/include/linux/nbd.h x/include/linux/nbd.h --- x-ref/include/linux/nbd.h 2002-12-19 00:45:47.000000000 +0100 +++ x/include/linux/nbd.h 2002-12-19 01:29:19.000000000 +0100 @@ -48,7 +48,7 @@ nbd_end_request(struct request *req) spin_lock_irqsave(&io_request_lock, flags); while((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; - blk_finished_io(nsect); + blk_finished_io(req, nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate);