diff -urNp x-ref/drivers/block/DAC960.c x/drivers/block/DAC960.c --- x-ref/drivers/block/DAC960.c 2002-11-29 02:22:58.000000000 +0100 +++ x/drivers/block/DAC960.c 2003-02-14 05:30:08.000000000 +0100 @@ -19,8 +19,8 @@ */ -#define DAC960_DriverVersion "2.4.11" -#define DAC960_DriverDate "11 October 2001" +#define DAC960_DriverVersion "2.4.20aa1" +#define DAC960_DriverDate "4 December 2002" #include @@ -2975,8 +2975,9 @@ static boolean DAC960_ProcessRequest(DAC Command->SegmentCount = Request->nr_segments; Command->BufferHeader = Request->bh; Command->RequestBuffer = Request->buffer; + Command->Request = Request; blkdev_dequeue_request(Request); - blkdev_release_request(Request); + /* blkdev_release_request(Request); */ DAC960_QueueReadWriteCommand(Command); return true; } @@ -3023,11 +3024,12 @@ static void DAC960_RequestFunction(Reque individual Buffer. */ -static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, +static inline void DAC960_ProcessCompletedBuffer(IO_Request_T *Req, BufferHeader_T *BufferHeader, boolean SuccessfulIO) { - blk_finished_io(BufferHeader->b_size >> 9); + blk_finished_io(Req, BufferHeader->b_size >> 9); BufferHeader->b_end_io(BufferHeader, SuccessfulIO); + } @@ -3116,9 +3118,10 @@ static void DAC960_V1_ProcessCompletedCo { BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; BufferHeader->b_reqnext = NULL; - DAC960_ProcessCompletedBuffer(BufferHeader, true); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, true); BufferHeader = NextBufferHeader; } + blkdev_release_request(Command->Request); if (Command->Completion != NULL) { complete(Command->Completion); @@ -3161,7 +3164,7 @@ static void DAC960_V1_ProcessCompletedCo { BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; BufferHeader->b_reqnext = NULL; - DAC960_ProcessCompletedBuffer(BufferHeader, false); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, false); BufferHeader = NextBufferHeader; } if (Command->Completion != NULL) @@ -3169,6 +3172,7 @@ static void DAC960_V1_ProcessCompletedCo complete(Command->Completion); Command->Completion = NULL; } + blkdev_release_request(Command->Request); } } else if (CommandType == DAC960_ReadRetryCommand || @@ -3180,12 +3184,12 @@ static void DAC960_V1_ProcessCompletedCo Perform completion processing for this single buffer. */ if (CommandStatus == DAC960_V1_NormalCompletion) - DAC960_ProcessCompletedBuffer(BufferHeader, true); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, true); else { if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) DAC960_V1_ReadWriteError(Command); - DAC960_ProcessCompletedBuffer(BufferHeader, false); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, false); } if (NextBufferHeader != NULL) { @@ -3203,6 +3207,7 @@ static void DAC960_V1_ProcessCompletedCo DAC960_QueueCommand(Command); return; } + blkdev_release_request(Command->Request); } else if (CommandType == DAC960_MonitoringCommand || CommandOpcode == DAC960_V1_Enquiry || @@ -4222,9 +4227,10 @@ static void DAC960_V2_ProcessCompletedCo { BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; BufferHeader->b_reqnext = NULL; - DAC960_ProcessCompletedBuffer(BufferHeader, true); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, true); BufferHeader = NextBufferHeader; } + blkdev_release_request(Command->Request); if (Command->Completion != NULL) { complete(Command->Completion); @@ -4267,9 +4273,10 @@ static void DAC960_V2_ProcessCompletedCo { BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; BufferHeader->b_reqnext = NULL; - DAC960_ProcessCompletedBuffer(BufferHeader, false); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, false); BufferHeader = NextBufferHeader; } + blkdev_release_request(Command->Request); if (Command->Completion != NULL) { complete(Command->Completion); @@ -4286,12 +4293,12 @@ static void DAC960_V2_ProcessCompletedCo Perform completion processing for this single buffer. */ if (CommandStatus == DAC960_V2_NormalCompletion) - DAC960_ProcessCompletedBuffer(BufferHeader, true); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, true); else { if (Command->V2.RequestSense.SenseKey != DAC960_SenseKey_NotReady) DAC960_V2_ReadWriteError(Command); - DAC960_ProcessCompletedBuffer(BufferHeader, false); + DAC960_ProcessCompletedBuffer(Command->Request, BufferHeader, false); } if (NextBufferHeader != NULL) { @@ -4319,6 +4326,7 @@ static void DAC960_V2_ProcessCompletedCo DAC960_QueueCommand(Command); return; } + blkdev_release_request(Command->Request); } else if (CommandType == DAC960_MonitoringCommand) { diff -urNp x-ref/drivers/block/DAC960.h x/drivers/block/DAC960.h --- x-ref/drivers/block/DAC960.h 2002-01-22 18:54:52.000000000 +0100 +++ x/drivers/block/DAC960.h 2003-02-14 05:30:08.000000000 +0100 @@ -2282,6 +2282,7 @@ typedef struct DAC960_Command unsigned int SegmentCount; BufferHeader_T *BufferHeader; void *RequestBuffer; + IO_Request_T *Request; union { struct { DAC960_V1_CommandMailbox_T CommandMailbox; @@ -4265,12 +4266,4 @@ static void DAC960_Message(DAC960_Messag static void DAC960_CreateProcEntries(void); static void DAC960_DestroyProcEntries(void); - -/* - Export the Kernel Mode IOCTL interface. -*/ - -EXPORT_SYMBOL(DAC960_KernelIOCTL); - - #endif /* DAC960_DriverVersion */ diff -urNp x-ref/drivers/block/cciss.c x/drivers/block/cciss.c --- x-ref/drivers/block/cciss.c 2003-02-14 05:25:45.000000000 +0100 +++ x/drivers/block/cciss.c 2003-02-14 05:26:06.000000000 +0100 @@ -1995,14 +1995,14 @@ static void start_io( ctlr_info_t *h) } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers(struct request * req, struct buffer_head *bh, int status) { struct buffer_head *xbh; while(bh) { xbh = bh->b_reqnext; bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); + blk_finished_io(req, bh->b_size >> 9); bh->b_end_io(bh, status); bh = xbh; } @@ -2145,7 +2145,7 @@ static inline void complete_command( ctl pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, ddir); } - complete_buffers(cmd->rq->bh, status); + complete_buffers(cmd->rq, cmd->rq->bh, status); #ifdef CCISS_DEBUG printk("Done with %p\n", cmd->rq); #endif /* CCISS_DEBUG */ @@ -2229,7 +2229,7 @@ next: printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq, creq->bh, 0); end_that_request_last(creq); goto startio; } diff -urNp x-ref/drivers/block/cpqarray.c x/drivers/block/cpqarray.c --- x-ref/drivers/block/cpqarray.c 2003-02-14 05:25:43.000000000 +0100 +++ x/drivers/block/cpqarray.c 2003-02-14 05:26:06.000000000 +0100 @@ -169,7 +169,7 @@ static void start_io(ctlr_info_t *h); static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct request * req, struct buffer_head *bh, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -981,7 +981,7 @@ next: printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq, creq->bh, 0); end_that_request_last(creq); goto startio; } @@ -1082,14 +1082,14 @@ static void start_io(ctlr_info_t *h) } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct request * req, struct buffer_head *bh, int ok) { struct buffer_head *xbh; while(bh) { xbh = bh->b_reqnext; bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); + blk_finished_io(req, bh->b_size >> 9); bh->b_end_io(bh, ok); bh = xbh; @@ -1131,7 +1131,7 @@ static inline void complete_command(cmdl (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } - complete_buffers(cmd->rq->bh, ok); + complete_buffers(cmd->rq, cmd->rq->bh, ok); DBGPX(printk("Done with %p\n", cmd->rq);); req_finished_io(cmd->rq); end_that_request_last(cmd->rq); diff -urNp x-ref/drivers/block/ll_rw_blk.c x/drivers/block/ll_rw_blk.c --- x-ref/drivers/block/ll_rw_blk.c 2003-02-14 05:26:05.000000000 +0100 +++ x/drivers/block/ll_rw_blk.c 2003-02-14 06:52:06.000000000 +0100 @@ -183,11 +183,12 @@ void blk_cleanup_queue(request_queue_t * { int count = q->nr_requests; - count -= __blk_cleanup_queue(&q->rq[READ]); - count -= __blk_cleanup_queue(&q->rq[WRITE]); + count -= __blk_cleanup_queue(&q->rq); if (count) printk("blk_cleanup_queue: leaked requests (%d)\n", count); + if (atomic_read(&q->nr_sectors)) + printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors)); memset(q, 0, sizeof(*q)); } @@ -396,7 +397,7 @@ void generic_unplug_device(void *data) * * Returns the (new) number of requests which the queue has available. */ -int blk_grow_request_list(request_queue_t *q, int nr_requests) +int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors) { unsigned long flags; /* Several broken drivers assume that this function doesn't sleep, @@ -406,21 +407,31 @@ int blk_grow_request_list(request_queue_ spin_lock_irqsave(q->queue_lock, flags); while (q->nr_requests < nr_requests) { struct request *rq; - int rw; rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC); if (rq == NULL) break; memset(rq, 0, sizeof(*rq)); rq->rq_status = RQ_INACTIVE; - rw = q->nr_requests & 1; - list_add(&rq->queue, &q->rq[rw].free); - q->rq[rw].count++; + list_add(&rq->queue, &q->rq.free); + q->rq.count++; q->nr_requests++; } + + /* + * Wakeup waiters after both one quarter of the + * max-in-fligh queue and one quarter of the requests + * are available again. + */ q->batch_requests = q->nr_requests / 4; if (q->batch_requests > 32) q->batch_requests = 32; + q->batch_sectors = max_queue_sectors / 4; + + q->max_queue_sectors = max_queue_sectors; + + BUG_ON(!q->batch_sectors); + atomic_set(&q->nr_sectors, 0); spin_unlock_irqrestore(q->queue_lock, flags); return q->nr_requests; } @@ -429,23 +440,26 @@ static void blk_init_free_list(request_q { struct sysinfo si; int megs; /* Total memory, in megabytes */ - int nr_requests; + int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS; - INIT_LIST_HEAD(&q->rq[READ].free); - INIT_LIST_HEAD(&q->rq[WRITE].free); - q->rq[READ].count = 0; - q->rq[WRITE].count = 0; + INIT_LIST_HEAD(&q->rq.free); + q->rq.count = 0; q->nr_requests = 0; si_meminfo(&si); megs = si.totalram >> (20 - PAGE_SHIFT); - nr_requests = 128; - if (megs < 32) + nr_requests = MAX_NR_REQUESTS; + if (megs < 30) { nr_requests /= 2; - blk_grow_request_list(q, nr_requests); + max_queue_sectors /= 2; + } + /* notice early if anybody screwed the defaults */ + BUG_ON(!nr_requests); + BUG_ON(!max_queue_sectors); + + blk_grow_request_list(q, nr_requests, max_queue_sectors); - init_waitqueue_head(&q->wait_for_requests[0]); - init_waitqueue_head(&q->wait_for_requests[1]); + init_waitqueue_head(&q->wait_for_requests); } static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); @@ -514,11 +528,16 @@ void blk_init_queue(request_queue_t * q, * Get a free request. io_request_lock must be held and interrupts * disabled on the way in. Returns NULL if there are no free requests. */ +static struct request * FASTCALL(get_request(request_queue_t *q, int rw)); static struct request *get_request(request_queue_t *q, int rw) { struct request *rq = NULL; - struct request_list *rl = q->rq + rw; + struct request_list *rl; + if (blk_oversized_queue(q)) + goto out; + + rl = &q->rq; if (!list_empty(&rl->free)) { rq = blkdev_free_rq(&rl->free); list_del(&rq->queue); @@ -529,6 +548,7 @@ static struct request *get_request(reque rq->q = q; } + out: return rq; } @@ -596,10 +616,25 @@ static struct request *__get_request_wai register struct request *rq; DECLARE_WAITQUEUE(wait, current); - add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); + add_wait_queue_exclusive(&q->wait_for_requests, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - if (q->rq[rw].count == 0) { + + /* + * We must read rq.count and blk_oversized_queue() + * and unplug the queue atomically (with the + * spinlock being held for the whole duration of the + * operation). Otherwise we risk to unplug the queue + * before the request is visible in the I/O queue. + * + * On the __make_request side we depend on get_request, + * get_request_wait_wakeup and blk_started_io to run + * under the q->queue_lock and to never release it + * until the request is visible in the I/O queue + * (i.e. after add_request). + */ + spin_lock_irq(q->queue_lock); + if (q->rq.count == 0 || blk_oversized_queue(q)) { /* * All we care about is not to stall if any request * is been released after we set TASK_UNINTERRUPTIBLE. @@ -607,14 +642,16 @@ static struct request *__get_request_wai * in case we hit the race and we can get the request * without waiting. */ - generic_unplug_device(q); + __generic_unplug_device(q); + + spin_unlock_irq(q->queue_lock); schedule(); + spin_lock_irq(q->queue_lock); } - spin_lock_irq(q->queue_lock); rq = get_request(q, rw); spin_unlock_irq(q->queue_lock); } while (rq == NULL); - remove_wait_queue(&q->wait_for_requests[rw], &wait); + remove_wait_queue(&q->wait_for_requests, &wait); current->state = TASK_RUNNING; return rq; } @@ -626,8 +663,8 @@ static void get_request_wait_wakeup(requ * generic_unplug_device while our __get_request_wait was running * w/o the queue_lock held and w/ our request out of the queue. */ - if (waitqueue_active(&q->wait_for_requests[rw])) - wake_up(&q->wait_for_requests[rw]); + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); } /* RO fail safe mechanism */ @@ -843,7 +880,6 @@ static inline void add_request(request_q void blkdev_release_request(struct request *req) { request_queue_t *q = req->q; - int rw = req->cmd; req->rq_status = RQ_INACTIVE; req->q = NULL; @@ -853,11 +889,11 @@ void blkdev_release_request(struct reque * assume it has free buffers and check waiters */ if (q) { - list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= q->batch_requests) { + list_add(&req->queue, &q->rq.free); + if (++q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) { smp_mb(); - if (waitqueue_active(&q->wait_for_requests[rw])) - wake_up(&q->wait_for_requests[rw]); + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); } } } @@ -1003,7 +1039,7 @@ again: req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); + blk_started_io(req, count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); req_new_io(req, 1, count); attempt_back_merge(q, req, max_sectors, max_segments); @@ -1025,7 +1061,7 @@ again: req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); + blk_started_io(req, count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); req_new_io(req, 1, count); attempt_front_merge(q, head, req, max_sectors, max_segments); @@ -1058,7 +1094,7 @@ get_rq: * See description above __get_request_wait() */ if (rw_ahead) { - if (q->rq[rw].count < q->batch_requests) { + if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) { spin_unlock_irq(q->queue_lock); goto end_io; } @@ -1094,7 +1130,7 @@ get_rq: req->rq_dev = bh->b_rdev; req->start_time = jiffies; req_new_io(req, 0, count); - blk_started_io(count); + blk_started_io(req, count); add_request(q, req, insert_here); out: if (freereq) @@ -1384,7 +1420,7 @@ int end_that_request_first (struct reque if ((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; - blk_finished_io(nsect); + blk_finished_io(req, nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate); diff -urNp x-ref/drivers/scsi/scsi_lib.c x/drivers/scsi/scsi_lib.c --- x-ref/drivers/scsi/scsi_lib.c 2003-02-14 05:26:04.000000000 +0100 +++ x/drivers/scsi/scsi_lib.c 2003-02-14 05:59:01.000000000 +0100 @@ -384,7 +384,7 @@ static Scsi_Cmnd *__scsi_end_request(Scs do { if ((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; - blk_finished_io(nsect); + blk_finished_io(req, nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; sectors -= nsect; diff -urNp x-ref/include/linux/blkdev.h x/include/linux/blkdev.h --- x-ref/include/linux/blkdev.h 2003-02-14 05:26:04.000000000 +0100 +++ x/include/linux/blkdev.h 2003-02-14 06:48:27.000000000 +0100 @@ -80,7 +80,7 @@ struct request_queue /* * the queue request freelist, one for reads and one for writes */ - struct request_list rq[2]; + struct request_list rq; /* * The total number of requests on each queue @@ -93,6 +93,21 @@ struct request_queue int batch_requests; /* + * The total number of 512byte blocks on each queue + */ + atomic_t nr_sectors; + + /* + * Batching threshold for sleep/wakeup decisions + */ + int batch_sectors; + + /* + * The max number of 512byte blocks on each queue + */ + int max_queue_sectors; + + /* * Together with queue_head for cacheline sharing */ struct list_head queue_head; @@ -137,7 +152,7 @@ struct request_queue /* * Tasks wait here for free read and write requests */ - wait_queue_head_t wait_for_requests[2]; + wait_queue_head_t wait_for_requests; }; #define blk_queue_plugged(q) (q)->plugged @@ -221,7 +236,7 @@ extern void blkdev_release_request(struc /* * Access functions for manipulating queue properties */ -extern int blk_grow_request_list(request_queue_t *q, int nr_requests); +extern int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors); extern void blk_init_queue(request_queue_t *, request_fn_proc *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); @@ -245,6 +260,8 @@ extern char * blkdev_varyio[MAX_BLKDEV]; #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 +#define MAX_QUEUE_SECTORS (2 << (20 - 9)) /* 2 mbytes when full sized */ +#define MAX_NR_REQUESTS 512 /* 256k when in 512 units, normally min is 512k in 1k units */ #define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) @@ -271,8 +288,40 @@ static inline int get_hardsect_size(kdev return retval; } -#define blk_finished_io(nsects) do { } while (0) -#define blk_started_io(nsects) do { } while (0) +static inline int blk_oversized_queue(request_queue_t * q) +{ + return atomic_read(&q->nr_sectors) > q->max_queue_sectors; +} + +static inline int blk_oversized_queue_batch(request_queue_t * q) +{ + return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors; +} + +static inline void blk_started_io(struct request * req, int nsects) +{ + request_queue_t * q = req->q; + + if (q) + atomic_add(nsects, &q->nr_sectors); + BUG_ON(atomic_read(&q->nr_sectors) < 0); +} + +static inline void blk_finished_io(struct request * req, int nsects) +{ + request_queue_t * q = req->q; + + /* special requests belongs to a null queue */ + if (q) { + atomic_sub(nsects, &q->nr_sectors); + if (q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) { + smp_mb(); + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); + } + } + BUG_ON(atomic_read(&q->nr_sectors) < 0); +} static inline unsigned int blksize_bits(unsigned int size) { diff -urNp x-ref/include/linux/elevator.h x/include/linux/elevator.h --- x-ref/include/linux/elevator.h 2002-11-29 02:23:18.000000000 +0100 +++ x/include/linux/elevator.h 2003-02-14 06:50:40.000000000 +0100 @@ -80,7 +80,7 @@ static inline int elevator_request_laten return latency; } -#define ELV_LINUS_SEEK_COST 16 +#define ELV_LINUS_SEEK_COST 1 #define ELEVATOR_NOOP \ ((elevator_t) { \ @@ -93,8 +93,8 @@ static inline int elevator_request_laten #define ELEVATOR_LINUS \ ((elevator_t) { \ - 2048, /* read passovers */ \ - 8192, /* write passovers */ \ + 128, /* read passovers */ \ + 512, /* write passovers */ \ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ diff -urNp x-ref/include/linux/nbd.h x/include/linux/nbd.h --- x-ref/include/linux/nbd.h 2003-01-15 00:18:55.000000000 +0100 +++ x/include/linux/nbd.h 2003-02-14 05:26:06.000000000 +0100 @@ -48,7 +48,7 @@ nbd_end_request(struct request *req) spin_lock_irqsave(&io_request_lock, flags); while((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; - blk_finished_io(nsect); + blk_finished_io(req, nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate);