diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss.c Sun May 20 14:05:11 2001 +++ linux/drivers/block/cciss.c Mon May 21 23:55:42 2001 @@ -1061,17 +1061,18 @@ } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers( struct bio *bio, int status) { - struct buffer_head *xbh; + struct bio *xbh; - while(bh) + while(bio) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, status); - bh = xbh; + xbh = bio->bi_next; + bio->bi_next = NULL; + blk_finished_io(bio_sectors(bio)); + bio->bi_flags |= !!status; + bio->bi_end_io(bio); + bio = xbh; } } /* checks the status of the job and calls complete buffers to mark all @@ -1156,49 +1157,7 @@ status=0; } } - complete_buffers(cmd->bh, status); -} - - -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < MAXSGENTRIES) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > MAXSGENTRIES) - return 0; - - rq->nr_segments = total_segments; - return 1; + complete_buffers(cmd->bio, status); } /* @@ -1213,15 +1172,20 @@ CommandList_struct *c; int log_unit, start_blk, seg, sect; char *lastdataend; - struct buffer_head *bh; + struct bio *bio; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - // Loop till the queue is empty if or it is plugged + if (blk_queue_plugged(q)) { + start_io(h); + return; + } + + // Loop till the queue is empty while (1) { - if (q->plugged || list_empty(queue_head)) { + if (list_empty(queue_head)) { start_io(h); return; } @@ -1235,7 +1199,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); start_io(h); return; } @@ -1246,7 +1210,7 @@ return; } c->cmd_type = CMD_RWREQ; - bh = c->bh = creq->bh; + bio = c->bio = creq->bio; /* fill in the request */ log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; @@ -1263,34 +1227,34 @@ c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE; start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector; #ifdef CCISS_DEBUG - if (bh == NULL) - panic("cciss: bh== NULL?"); + if (bio == NULL) + panic("cciss: bio== NULL?"); printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ seg = 0; lastdataend = NULL; sect = 0; - while(bh) + while(bio) { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) + sect += bio_sectors(bio); + if (bio_data(bio) == lastdataend) { // tack it on to the last segment - c->SG[seg-1].Len +=bh->b_size; - lastdataend += bh->b_size; + c->SG[seg-1].Len += bio_size(bio); + lastdataend += bio_size(bio); } else { if (seg == MAXSGENTRIES) BUG(); - c->SG[seg].Len = bh->b_size; - temp64.val = (__u64) virt_to_bus(bh->b_data); + c->SG[seg].Len = bio_size(bio); + temp64.val = (__u64) virt_to_bus(bio_data(bio)); c->SG[seg].Addr.lower = temp64.val32.lower; c->SG[seg].Addr.upper = temp64.val32.upper; c->SG[0].Ext = 0; // we are not chaining - lastdataend = bh->b_data + bh->b_size; + lastdataend = bio_data(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* track how many SG entries we are using */ if( seg > h->maxSG) @@ -1380,10 +1344,11 @@ } } } + /* * See if we can queue up some more IO */ - do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); spin_unlock_irqrestore(&io_request_lock, flags); } /* @@ -1876,17 +1841,12 @@ q->queuedata = hba[i]; blk_init_queue(q, do_cciss_request); blk_queue_headactive(q, 0); + q->max_segments = MAXSGENTRIES; /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; read_ahead[MAJOR_NR+i] = READ_AHEAD; - - /* Set the pointers to queue functions */ - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - /* Fill in the gendisk data */ hba[i]->gendisk.major = MAJOR_NR + i; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h --- /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss_cmd.h Sun May 20 14:05:11 2001 +++ linux/drivers/block/cciss_cmd.h Wed May 16 19:02:03 2001 @@ -227,7 +227,7 @@ int cmd_type; struct _CommandList_struct *prev; struct _CommandList_struct *next; - struct buffer_head * bh; + struct bio * bio; } CommandList_struct; //Configuration Table Structure diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/cpqarray.c Sun May 20 14:05:11 2001 +++ linux/drivers/block/cpqarray.c Mon May 21 23:54:54 2001 @@ -145,7 +145,7 @@ static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct bio *bio, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -346,47 +346,6 @@ } #endif /* MODULE */ -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < SG_MAX) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > SG_MAX) - return 0; - - rq->nr_segments = total_segments; - return 1; -} - /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -521,14 +480,11 @@ q->queuedata = hba[i]; blk_init_queue(q, do_ida_request); blk_queue_headactive(q, 0); + q->max_segments = SG_MAX; blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -905,13 +861,18 @@ int seg, sect; char *lastdataend; struct list_head * queue_head = &q->queue_head; - struct buffer_head *bh; + struct bio *bio; struct request *creq; -// Loop till the queue is empty if or it is plugged + if (blk_queue_plugged(q)) { + start_io(h); + return; + } + +// Loop till the queue is empty while (1) { - if (q->plugged || list_empty(queue_head)) { + if (list_empty(queue_head)) { start_io(h); return; } @@ -925,7 +886,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); start_io(h); return; } @@ -936,7 +897,7 @@ return; } - bh = creq->bh; + bio = creq->bio; c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; @@ -944,29 +905,29 @@ c->size += sizeof(rblk_t); c->req.hdr.blk = ida[(h->ctlr<rq_dev)].start_sect + creq->sector; - c->bh = bh; + c->bio = bio; DBGPX( - if (bh == NULL) + if (bio == NULL) panic("bh == NULL?"); printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); seg = 0; lastdataend = NULL; sect = 0; - while(bh) { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) { - c->req.sg[seg-1].size += bh->b_size; - lastdataend += bh->b_size; + while(bio) { + sect += bio_sectors(bio); + if (bio_data(bio) == lastdataend) { + c->req.sg[seg-1].size += bio_size(bio); + lastdataend += bio_size(bio); } else { if (seg == SG_MAX) BUG(); - c->req.sg[seg].size = bh->b_size; - c->req.sg[seg].addr = (__u32)virt_to_bus(bh->b_data); - lastdataend = bh->b_data + bh->b_size; + c->req.sg[seg].size = bio_size(bio); + c->req.sg[seg].addr = (__u32)virt_to_bus(bio_data(bio)); + lastdataend = bio_data(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); c->req.hdr.sg_cnt = seg; @@ -1028,17 +989,18 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct bio *bio, int ok) { - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); + blk_finished_io(bio_sectors(bio)); + bio->bi_flags |= !!ok; + bio->bi_end_io(bio); - bh = xbh; + bio = xbh; } } /* @@ -1067,7 +1029,7 @@ ok = 0; } if (timeout) ok = 0; - complete_buffers(cmd->bh, ok); + complete_buffers(cmd->bio, ok); } /* @@ -1126,7 +1088,7 @@ /* * See if we can queue up some more IO */ - do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); spin_unlock_irqrestore(&io_request_lock, flags); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/elevator.c linux/drivers/block/elevator.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/elevator.c Fri Feb 16 01:58:34 2001 +++ linux/drivers/block/elevator.c Mon May 21 22:58:16 2001 @@ -28,13 +28,13 @@ #include /* - * This is a bit tricky. It's given that bh and rq are for the same + * This is a bit tricky. It's given that bio and rq are for the same * device, but the next request might of course not be. Run through * the tests below to check if we want to insert here if we can't merge - * bh into an existing request + * bio into an existing request */ -inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, - struct list_head *head) +inline int bio_rq_in_between(struct bio *bio, struct request *rq, + struct list_head *head) { struct list_head *next; struct request *next_rq; @@ -45,21 +45,21 @@ /* * if the device is different (usually on a different partition), - * just check if bh is after rq + * just check if bio is after rq */ next_rq = blkdev_entry_to_request(next); if (next_rq->rq_dev != rq->rq_dev) - return bh->b_rsector > rq->sector; + return bio->bi_sector > rq->sector; /* - * ok, rq, next_rq and bh are on the same device. if bh is in between + * ok, rq, next_rq and bio are on the same device. if bio is in between * the two, this is the sweet spot */ - if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector) return 1; /* - * next_rq is ordered wrt rq, but bh is not in between the two + * next_rq is ordered wrt rq, but bio is not in between the two */ if (next_rq->sector > rq->sector) return 0; @@ -68,23 +68,43 @@ * next_rq and rq not ordered, if we happen to be either before * next_rq or after rq insert here anyway */ - if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector) return 1; return 0; } - int elevator_linus_merge(request_queue_t *q, struct request **req, struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct bio *bio, int rw) { struct list_head *entry = &q->queue_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + unsigned int count = bio_sectors(bio), ret = ELEVATOR_NO_MERGE; + elevator_t *e = &q->elevator; + struct request *__rq; + + /* + * first check it we can merge with the last inserted request. + * this cuts down queue scans considerably, and removes the biggest + * reason to pass bigger/more bios in trough ->make_request + */ + if ((__rq = e->last_merge)) { + if (&__rq->queue == head || __rq->cmd != rw + || __rq->rq_dev != bio->bi_dev + || __rq->nr_sectors + count > q->max_sectors) + *req = NULL; + else if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } else if (__rq->sector - count == bio->bi_sector) { + __rq->elevator_sequence -= count; + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + } while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + __rq = blkdev_entry_to_request(entry); /* * simply "aging" of requests in queue @@ -94,24 +114,24 @@ if (__rq->sem) continue; - if (__rq->rq_dev != bh->b_rdev) + if (__rq->rq_dev != bio->bi_dev) continue; - if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) + if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) *req = __rq; if (__rq->cmd != rw) continue; - if (__rq->nr_sectors + count > max_sectors) + if (__rq->nr_sectors + count > q->max_sectors) continue; if (__rq->elevator_sequence < count) break; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { ret = ELEVATOR_BACK_MERGE; - *req = __rq; + e->last_merge = *req = __rq; break; - } else if (__rq->sector - count == bh->b_rsector) { + } else if (__rq->sector - count == bio->bi_sector) { ret = ELEVATOR_FRONT_MERGE; __rq->elevator_sequence -= count; - *req = __rq; + e->last_merge = *req = __rq; break; } } @@ -143,11 +163,10 @@ */ int elevator_noop_merge(request_queue_t *q, struct request **req, struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct bio *bio, int rw) { struct list_head *entry; - unsigned int count = bh->b_size >> 9; + unsigned int count = bio_sectors(bio); if (list_empty(&q->queue_head)) return ELEVATOR_NO_MERGE; @@ -158,16 +177,16 @@ if (__rq->cmd != rw) continue; - if (__rq->rq_dev != bh->b_rdev) + if (__rq->rq_dev != bio->bi_dev) continue; - if (__rq->nr_sectors + count > max_sectors) + if (__rq->nr_sectors + count > q->max_sectors) continue; if (__rq->sem) continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { *req = __rq; return ELEVATOR_BACK_MERGE; - } else if (__rq->sector - count == bh->b_rsector) { + } else if (__rq->sector - count == bio->bi_sector) { *req = __rq; return ELEVATOR_FRONT_MERGE; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/floppy.c linux/drivers/block/floppy.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/floppy.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/block/floppy.c Wed May 16 14:00:53 2001 @@ -570,7 +570,7 @@ static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; -static unsigned char sector_t; /* sector in track */ +static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ @@ -2382,7 +2382,7 @@ printk("rt=%d t=%d\n", R_TRACK, TRACK); printk("heads=%d eoc=%d\n", heads, eoc); printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK, - sector_t, ssize); + fsector_t, ssize); printk("in_sector_offset=%d\n", in_sector_offset); } #endif @@ -2429,7 +2429,7 @@ } else if (CT(COMMAND) == FD_READ){ buffer_track = raw_cmd->track; buffer_drive = current_drive; - INFBOUND(buffer_max, nr_sectors + sector_t); + INFBOUND(buffer_max, nr_sectors + fsector_t); } cont->redo(); } @@ -2437,19 +2437,19 @@ /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct buffer_head *bh; + struct bio *bio; int size; char *base; base = CURRENT->buffer; size = CURRENT->current_nr_sectors << 9; - bh = CURRENT->bh; + bio = CURRENT->bio; - if (bh){ - bh = bh->b_reqnext; - while (bh && bh->b_data == base + size){ - size += bh->b_size; - bh = bh->b_reqnext; + if (bio){ + bio = bio->bi_next; + while (bio && bio_data(bio) == base + size){ + size += bio_size(bio); + bio = bio->bi_next; } } return size >> 9; @@ -2458,13 +2458,13 @@ /* Compute the maximal transfer size */ static int transfer_size(int ssize, int max_sector, int max_size) { - SUPBOUND(max_sector, sector_t + max_size); + SUPBOUND(max_sector, fsector_t + max_size); /* alignment */ max_sector -= (max_sector % _floppy->sect) % ssize; /* transfer size, beginning not aligned */ - current_count_sectors = max_sector - sector_t ; + current_count_sectors = max_sector - fsector_t ; return max_sector; } @@ -2475,7 +2475,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct buffer_head *bh; + struct bio *bio; char *buffer, *dma_buffer; int size; @@ -2484,8 +2484,8 @@ CURRENT->nr_sectors); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > sector_t + CURRENT->nr_sectors) - current_count_sectors = minimum(buffer_max - sector_t, + buffer_max > fsector_t + CURRENT->nr_sectors) + current_count_sectors = minimum(buffer_max - fsector_t, CURRENT->nr_sectors); remaining = current_count_sectors << 9; @@ -2505,9 +2505,9 @@ buffer_max = maximum(max_sector, buffer_max); - dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9); + dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - bh = CURRENT->bh; + bio = CURRENT->bio; size = CURRENT->current_nr_sectors << 9; buffer = CURRENT->buffer; @@ -2519,8 +2519,8 @@ dma_buffer < floppy_track_buffer){ DPRINT("buffer overrun in copy buffer %d\n", (int) ((floppy_track_buffer - dma_buffer) >>9)); - printk("sector_t=%d buffer_min=%d\n", - sector_t, buffer_min); + printk("fsector_t=%d buffer_min=%d\n", + fsector_t, buffer_min); printk("current_count_sectors=%ld\n", current_count_sectors); if (CT(COMMAND) == FD_READ) @@ -2541,15 +2541,15 @@ break; dma_buffer += size; - bh = bh->b_reqnext; + bio = bio->bi_next; #ifdef FLOPPY_SANITY_CHECK - if (!bh){ + if (!bio){ DPRINT("bh=null in copy buffer after copy\n"); break; } #endif - size = bh->b_size; - buffer = bh->b_data; + size = bio_size(bio); + buffer = bio_data(bio); } #ifdef FLOPPY_SANITY_CHECK if (remaining){ @@ -2641,7 +2641,7 @@ max_sector = _floppy->sect * _floppy->head; TRACK = CURRENT->sector / max_sector; - sector_t = CURRENT->sector % max_sector; + fsector_t = CURRENT->sector % max_sector; if (_floppy->track && TRACK >= _floppy->track) { if (CURRENT->current_nr_sectors & 1) { current_count_sectors = 1; @@ -2649,17 +2649,17 @@ } else return 0; } - HEAD = sector_t / _floppy->sect; + HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) && - sector_t < _floppy->sect) + fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){ max_sector = 2 * _floppy->sect / 3; - if (sector_t >= max_sector){ - current_count_sectors = minimum(_floppy->sect - sector_t, + if (fsector_t >= max_sector){ + current_count_sectors = minimum(_floppy->sect - fsector_t, CURRENT->nr_sectors); return 1; } @@ -2681,7 +2681,7 @@ GAP = _floppy->gap; CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1; + SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1; /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -2689,11 +2689,11 @@ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect){ SECT_PER_TRACK ++; - if (tracksize <= sector_t % _floppy->sect) + if (tracksize <= fsector_t % _floppy->sect) SECTOR--; /* if we are beyond tracksize, fill up using smaller sectors */ - while (tracksize <= sector_t % _floppy->sect){ + while (tracksize <= fsector_t % _floppy->sect){ while(tracksize + ssize > _floppy->sect){ SIZECODE--; ssize >>= 1; @@ -2709,12 +2709,12 @@ max_sector = _floppy->sect; } - in_sector_offset = (sector_t % _floppy->sect) % ssize; - aligned_sector_t = sector_t - in_sector_offset; + in_sector_offset = (fsector_t % _floppy->sect) % ssize; + aligned_sector_t = fsector_t - in_sector_offset; max_size = CURRENT->nr_sectors; if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && - (sector_t >= buffer_min) && (sector_t < buffer_max)) { + (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ if (CT(COMMAND) == FD_READ) { copy_buffer(1, max_sector, buffer_max); @@ -2722,8 +2722,8 @@ } } else if (in_sector_offset || CURRENT->nr_sectors < ssize){ if (CT(COMMAND) == FD_WRITE){ - if (sector_t + CURRENT->nr_sectors > ssize && - sector_t + CURRENT->nr_sectors < ssize + ssize) + if (fsector_t + CURRENT->nr_sectors > ssize && + fsector_t + CURRENT->nr_sectors < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2736,7 +2736,7 @@ int direct, indirect; indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) - - sector_t; + fsector_t; /* * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide @@ -2751,7 +2751,7 @@ if (CROSS_64KB(CURRENT->buffer, max_size << 9)) max_size = (K_64 - ((unsigned long)CURRENT->buffer) % K_64)>>9; - direct = transfer_size(ssize,max_sector,max_size) - sector_t; + direct = transfer_size(ssize,max_sector,max_size) - fsector_t; /* * We try to read tracks, but if we get too many errors, we * go back to reading just one sector at a time. @@ -2770,8 +2770,8 @@ raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0){ DPRINT("zero dma transfer attempted from make_raw_request\n"); - DPRINT("indirect=%d direct=%d sector_t=%d", - indirect, direct, sector_t); + DPRINT("indirect=%d direct=%d fsector_t=%d", + indirect, direct, fsector_t); return 0; } /* check_dma_crossing(raw_cmd->kernel_data, @@ -2789,12 +2789,12 @@ /* claim buffer track if needed */ if (buffer_track != raw_cmd->track || /* bad track */ buffer_drive !=current_drive || /* bad drive */ - sector_t > buffer_max || - sector_t < buffer_min || + fsector_t > buffer_max || + fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || (!in_sector_offset && CURRENT->nr_sectors >= ssize))&& max_sector > 2 * max_buffer_sectors + buffer_min && - max_size + sector_t > 2 * max_buffer_sectors + buffer_min) + max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */){ buffer_track = -1; buffer_drive = current_drive; @@ -2841,7 +2841,7 @@ floppy_track_buffer) >> 9), current_count_sectors); printk("st=%d ast=%d mse=%d msi=%d\n", - sector_t, aligned_sector_t, max_sector, max_size); + fsector_t, aligned_sector_t, max_sector, max_size); printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", COMMAND, SECTOR, HEAD, TRACK); @@ -2859,8 +2859,8 @@ raw_cmd->kernel_data + raw_cmd->length > floppy_track_buffer + (max_buffer_sectors << 10)){ DPRINT("buffer overrun in schedule dma\n"); - printk("sector_t=%d buffer_min=%d current_count=%ld\n", - sector_t, buffer_min, + printk("fsector_t=%d buffer_min=%d current_count=%ld\n", + fsector_t, buffer_min, raw_cmd->length >> 9); printk("current_count_sectors=%ld\n", current_count_sectors); @@ -2913,8 +2913,6 @@ } if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); - if (CURRENT->bh && !buffer_locked(CURRENT->bh)) - panic(DEVICE_NAME ": block not locked"); device = CURRENT->rq_dev; set_fdc(DRIVE(device)); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/ida_cmd.h linux/drivers/block/ida_cmd.h --- /opt/kernel/linux-2.4.5-pre4/drivers/block/ida_cmd.h Mon Dec 11 21:50:39 2000 +++ linux/drivers/block/ida_cmd.h Wed May 16 18:54:09 2001 @@ -96,7 +96,7 @@ int ctlr; struct cmdlist *prev; struct cmdlist *next; - struct buffer_head *bh; + struct bio *bio; int type; } cmdlist_t; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/ll_rw_blk.c Thu Apr 12 21:15:52 2001 +++ linux/drivers/block/ll_rw_blk.c Mon May 21 23:26:20 2001 @@ -6,6 +6,7 @@ * Elevator latency, (C) 2000 Andrea Arcangeli SuSE * Queue request tables / lock, selectable elevator, Jens Axboe * kernel-doc documentation started by NeilBrown - July2000 + * bio rewrite, highmem i/o, etc, Jens Axboe - may 2001 */ /* @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -113,11 +115,6 @@ int * max_readahead[MAX_BLKDEV]; /* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -/* * queued sectors for all devices, used to make sure we don't fill all * of memory with locked buffers */ @@ -130,14 +127,18 @@ static int batch_requests, queue_nr_requests; static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait); -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} - -inline request_queue_t *__blk_get_queue(kdev_t dev) +/** + * blk_get_queue: - return the queue that matches the given device + * @dev: device + * + * Description: + * Given a specific device, return the queue that will hold I/O + * for it. This is either a &struct blk_dev_struct lookup and a + * call to the ->queue() function defined, or the default queue + * stored in the same location. + * + **/ +inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -147,22 +148,6 @@ return &blk_dev[MAJOR(dev)].request_queue; } -/* - * NOTE: the device-specific queue() functions - * have to be atomic! - */ -request_queue_t *blk_get_queue(kdev_t dev) -{ - request_queue_t *ret; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock,flags); - ret = __blk_get_queue(dev); - spin_unlock_irqrestore(&io_request_lock,flags); - - return ret; -} - static int __blk_cleanup_queue(struct list_head *head) { struct request *rq; @@ -233,10 +218,9 @@ * * When a queue is plugged the head will be assumed to be inactive. **/ - void blk_queue_headactive(request_queue_t * q, int active) { - q->head_active = active; + set_bit(QUEUE_FLAG_HEADACTIVE, &q->queue_flags); } /** @@ -261,15 +245,52 @@ * a kernel mapping, to by calling create_bounce() to create a * buffer in normal memory. **/ - void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { + q->max_segments = MAX_SEGMENTS; + q->max_sectors = MAX_SECTORS; q->make_request_fn = mfn; } -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +/** + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @page: highest page we can do I/O to + * + * Description: + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page, ie + * one the driver can still call bio_page() and get a valid address on. + **/ +void blk_queue_bounce_limit(request_queue_t *q, struct page *page) +{ + q->bounce_limit = page; +} + +/** + * blk_queue_max_setors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_sectors: max sectors in the usual 512b unit + * + * Description: + * Enables a low level driver to set an upper limit on the size of + * received requests. + **/ +void blk_queue_max_sectors(request_queue_t *q, int max_sectors) +{ + q->max_sectors = max_sectors; +} + +/* + * the standard queue merge functions, can be overridden with device + * specific ones if so desired + */ +static inline int ll_new_segment(request_queue_t *q, struct request *req) { - if (req->nr_segments < max_segments) { + if (req->nr_segments < q->max_segments) { req->nr_segments++; return 1; } @@ -277,36 +298,57 @@ } static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (BIO_CONTIG(req->biotail, bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (BIO_CONTIG(bio, req->bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) + struct request *next) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (BIO_CONTIG(req->biotail, next->bio)) total_segments--; - if (total_segments > max_segments) + if (total_segments > q->max_segments) return 0; req->nr_segments = total_segments; return 1; } +/** + * blk_wake_queue - restart a queue that wasn't fully emptied at request_fn time + * @q: The &request_queue_t in question + * + * Description: + * Sometimes hardware can run out of resources, so no more commands can + * be queued. If a driver breaks out of request_fn while there are still + * requests left on there to be serviced, it will be left in a state where + * it is still unplugged but not be recalled by the block layer. + * not be replugged, and thus request_fn will be run. Once a driver has + * freed enough resources to start queueing new requests again, it must + * call blk_wake_queue to start processing again. + **/ +void inline blk_wake_queue(request_queue_t *q) +{ + if (!blk_set_plugged(q)) + queue_task(&q->plug_tq, &tq_disk); +} + /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests @@ -315,16 +357,12 @@ * This is called with interrupts off and no requests on the queue. * (and with the request spinlock acquired) */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) +static void blk_plug_device(request_queue_t *q) { - /* - * no need to replug device - */ - if (!list_empty(&q->queue_head) || q->plugged) + if (!list_empty(&q->queue_head)) return; - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); + blk_wake_queue(q); } /* @@ -332,13 +370,22 @@ */ static inline void __generic_unplug_device(request_queue_t *q) { - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) - q->request_fn(q); - } + if (blk_set_unplugged(q) && !list_empty(&q->queue_head)) + q->request_fn(q); } +/** + * generic_unplug_device - fire a request queue + * @q: The &request_queue_t in question + * + * Description: + * Linux uses plugging to build bigger requests queues before letting + * the device have at them. If a queue is plugged, the I/O scheduler + * is still adding and merging requests on the queue. Once the queue + * gets unplugged (either by manually calling this function, or by + * running the tq_disk task queue), the request_fn defined for the + * queue is invoked and transfers started. + **/ void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; @@ -367,14 +414,18 @@ rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->table, &q->request_freelist[i & 1]); + if (i < queue_nr_requests / 2) + list_add(&rq->table, &q->request_freelist[READ]); + else + list_add(&rq->table, &q->request_freelist[WRITE]); } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->wait_for_request[READ]); + init_waitqueue_head(&q->wait_for_request[WRITE]); spin_lock_init(&q->queue_lock); } -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); +static int __make_request(request_queue_t *, int, struct bio *); /** * blk_init_queue - prepare a request queue for use with a block device @@ -418,19 +469,18 @@ q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->plugged = 0; + blk_set_unplugged(q); + blk_mark_headactive(q); + /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. + * by default assume old behaviour and bounce for any highmem page */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; + blk_queue_bounce_limit(q, max_low_pfn + mem_map); + + blk_queue_make_request(q, __make_request); } #define blkdev_free_rq(list) list_entry((list)->next, struct request, table); @@ -461,7 +511,7 @@ register struct request *rq; DECLARE_WAITQUEUE(wait, current); - add_wait_queue_exclusive(&q->wait_for_request, &wait); + add_wait_queue_exclusive(&q->wait_for_request[rw], &wait); for (;;) { __set_current_state(TASK_UNINTERRUPTIBLE); spin_lock_irq(&io_request_lock); @@ -472,23 +522,11 @@ generic_unplug_device(q); schedule(); } - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->wait_for_request[rw], &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -546,9 +584,12 @@ static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { + elevator_t *e = &q->elevator; + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + if (!blk_queue_plugged(q) && blk_queue_headlive(q) + && insert_here == &q->queue_head) { spin_unlock_irq(&io_request_lock); BUG(); } @@ -558,6 +599,7 @@ * inserted at elevator_merge time */ list_add(&req->queue, insert_here); + e->last_merge = req; } inline void blk_refill_freelist(request_queue_t *q, int rw) @@ -600,7 +642,7 @@ if (++q->pending_free[rw] >= batch_requests) { int wake_up = q->pending_free[rw]; blk_refill_freelist(q, rw); - wake_up_nr(&q->wait_for_request, wake_up); + wake_up_nr(&q->wait_for_request[rw], wake_up); } } } @@ -608,10 +650,7 @@ /* * Has to be called with the request spinlock acquired */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static void attempt_merge(request_queue_t *q, struct request *req) { struct request *next; @@ -620,7 +659,7 @@ return; if (req->cmd != next->cmd || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors + || req->nr_sectors + next->nr_sectors > q->max_sectors || next->sem) return; /* @@ -629,90 +668,91 @@ * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if (!q->merge_requests_fn(q, req, next, max_segments)) + if (!q->merge_requests_fn(q, req, next)) return; q->elevator.elevator_merge_req_fn(req, next); - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; + req->biotail->bi_next = next->bio; + req->biotail = next->biotail; req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); + blkdev_dequeue_request(next); blkdev_release_request(next); } -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_back_merge(request_queue_t *q, struct request *rq) { - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); + if (&rq->queue != q->queue_head.prev) + attempt_merge(q, rq); } -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_front_merge(request_queue_t *q, + struct list_head *head, + struct request *rq) { - struct list_head * prev; + struct list_head *prev = rq->queue.prev; - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); + if (prev != head) + attempt_merge(q, blkdev_entry_to_request(prev)); } -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) +static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + if (rq->queue.next != &q->queue_head) + attempt_merge(q, rq); +} +/** + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device + * @rq: The head request (usually) + * + * Description: + * For head-active devices, the queue can easily be unplugged so quickly + * that proper merging is not done on the front request. This may hurt + * performance greatly for some devices. The block layer cannot safely + * do merging on that first request, but the driver can allow us to do + * it since it knows when it is safe to do so. + **/ +void blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + __blk_attempt_remerge(q, rq); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +static int __make_request(request_queue_t *q, int rw, struct bio *bio) { unsigned int sector, count; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; + struct request *req, *freereq = NULL; + int rw_ahead, el_ret, lat = 0; struct list_head *head, *insert_here; - int latency; elevator_t *elevator = &q->elevator; - count = bh->b_size >> 9; - sector = bh->b_rsector; + sector = bio->bi_sector; + count = bio_sectors(bio); rw_ahead = 0; /* normal case; gets changed below for READA */ switch (rw) { case READA: - rw_ahead = 1; rw = READ; /* drop into READ */ + rw_ahead = 1; case READ: case WRITE: - latency = elevator_request_latency(elevator, rw); + if (!(bio->bi_flags & BIO_BARRIER)) + lat = elevator_request_latency(elevator, rw); break; default: BUG(); goto end_io; } - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even + * ISA dma) */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif - -/* look for a free request. */ - /* - * Try to coalesce the new request with old requests - */ - max_sectors = get_max_sectors(bh->b_rdev); + bio = blk_queue_bounce(q, bio); again: req = NULL; @@ -725,39 +765,44 @@ insert_here = head->prev; if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + blk_plug_device(q); goto get_rq; - } else if (q->head_active && !q->plugged) + } else if (blk_queue_headlive(q) && !blk_queue_plugged(q)) head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + el_ret = elevator->elevator_merge_fn(q, &req, head, bio, rw); switch (el_ret) { case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) + if (!q->back_merge_fn(q, req, bio)) break; elevator->elevator_merge_cleanup_fn(q, req, count); - req->bhtail->b_reqnext = bh; - req->bhtail = bh; + req->biotail->bi_next = bio; + req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_back_merge(q, req, max_sectors, max_segments); + attempt_back_merge(q, req); goto out; case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) + if (!q->front_merge_fn(q, req, bio)) break; elevator->elevator_merge_cleanup_fn(q, req, count); - bh->b_reqnext = req->bh; - req->bh = bh; - req->buffer = bh->b_data; + bio->bi_next = req->bio; + req->bio = bio; + /* + * may not be valid, if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); req->current_nr_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_front_merge(q, head, req, max_sectors, max_segments); + attempt_front_merge(q, head, req); goto out; /* @@ -797,7 +842,7 @@ } /* fill up the request-info, and add it to the queue */ - req->elevator_sequence = latency; + req->elevator_sequence = lat; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; @@ -805,11 +850,10 @@ req->current_nr_sectors = count; req->nr_segments = 1; /* Always 1 for a new request. */ req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; + req->buffer = bio_data(bio); /* see ->buffer comment above */ req->sem = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; + req->bio = req->biotail = bio; + req->rq_dev = bio->bi_dev; blk_started_io(count); add_request(q, req, insert_here); out: @@ -817,8 +861,9 @@ blkdev_release_request(freereq); spin_unlock_irq(&io_request_lock); return 0; + end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + bio->bi_end_io(bio); return 0; } @@ -856,34 +901,32 @@ * particular, no other flags, are changed by generic_make_request or * any lower level drivers. * */ -void generic_make_request (int rw, struct buffer_head * bh) +void generic_make_request (int rw, struct bio *bio) { - int major = MAJOR(bh->b_rdev); + int major = MAJOR(bio->bi_dev); + int minor = MINOR(bio->bi_dev); request_queue_t *q; - if (!bh->b_end_io) - BUG(); - if (blk_size[major]) { - unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; + unsigned long maxsector = (blk_size[major][minor] << 1) + 1; + unsigned long sector = bio->bi_sector; + unsigned int count = bio_sectors(bio); if (maxsector < count || maxsector - count < sector) { - bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); - if (blk_size[major][MINOR(bh->b_rdev)]) { + if (blk_size[major][minor]) { - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ + /* This may well happen - the kernel calls + * bread() without checking the size of the + * device, e.g., when mounting a device. */ printk(KERN_INFO "attempt to access beyond end of device\n"); printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, + kdevname(bio->bi_dev), rw, (sector + count)>>1, - blk_size[major][MINOR(bh->b_rdev)]); + blk_size[major][minor]); } - bh->b_end_io(bh, 0); + bio->bi_flags |= BIO_EOF; + bio->bi_end_io(bio); return; } } @@ -897,17 +940,74 @@ * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + q = blk_get_queue(bio->bi_dev); if (!q) { printk(KERN_ERR "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); + kdevname(bio->bi_dev), bio->bi_sector); + bio->bi_end_io(bio); break; } - } while (q->make_request_fn(q, rw, bh)); + } while (q->make_request_fn(q, rw, bio)); +} + +/* + * Default IO end handler, used by "ll_rw_block()". + */ +static void end_buffer_io_sync(struct buffer_head *bh, int uptodate) +{ + mark_buffer_uptodate(bh, uptodate); + unlock_buffer(bh); } +/* + * our default bio end_io callback handler for a buffer_head mapping. it's + * pretty simple, because no bio will ever contain more than one bio_vec + */ +static void end_bio_bh_io_sync(struct bio *bio) +{ + struct buffer_head *bh = bio->bi_private; + + bh->b_end_io(bh, bio->bi_flags & BIO_UPTODATE); + bio_free(bio); +} + +/** + * submit_bio: submit a bio to the block device later for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bio: The &struct bio which describes the I/O + * + * submit_bio() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. Both are fairly rough + * interfaces, @bio must be presetup and ready for I/O. + * + */ +void submit_bio(int rw, struct bio *bio) +{ + int count = bio_sectors(bio); + + if (!bio->bi_end_io) + BUG(); + if (bio_size(bio) > PAGE_SIZE) { + printk("bio: invalid size %d\n", bio_size(bio)); + BUG(); + } else if ((bio_offset(bio) + bio_size(bio)) > PAGE_SIZE) { + printk("bio: size/off %d/%d\n", bio_size(bio), bio_offset(bio)); + BUG(); + } + + if (rw & WRITE) { + kstat.pgpgout += count; + bio->bi_flags |= BIO_WRITE; + } else { + kstat.pgpgin += count; + bio->bi_flags |= BIO_READ; + if (rw == READA) + bio->bi_flags |= BIO_RW_AHEAD; + } + + generic_make_request(rw, bio); +} /** * submit_bh: submit a buffer_head to the block device later for I/O @@ -924,39 +1024,33 @@ */ void submit_bh(int rw, struct buffer_head * bh) { - int count = bh->b_size >> 9; + struct bio *bio; if (!test_bit(BH_Lock, &bh->b_state)) BUG(); + if (!buffer_mapped(bh)) + BUG(); + if (!bh->b_end_io) + BUG(); set_bit(BH_Req, &bh->b_state); /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. + * from here on down, it's all bio */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; - - generic_make_request(rw, bh); + bio = bio_alloc(GFP_BUFFER, 1); - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } -} + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_dev = bh->b_dev; + bio->bi_next = NULL; + bio->bi_private = bh; + bio->bi_end_io = end_bio_bh_io_sync; + + bio->bi_io_vec.bv_page = bh->b_page; + bio->bi_io_vec.bv_len = bh->b_size; + bio->bi_io_vec.bv_offset = bh_offset(bh); -/* - * Default IO end handler, used by "ll_rw_block()". - */ -static void end_buffer_io_sync(struct buffer_head *bh, int uptodate) -{ - mark_buffer_uptodate(bh, uptodate); - unlock_buffer(bh); + submit_bio(rw, bio); } /** @@ -1012,7 +1106,7 @@ /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { + if (bh->b_size & (correct_size - 1)) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1104,7 +1198,7 @@ int end_that_request_first (struct request *req, int uptodate, char *name) { - struct buffer_head * bh; + struct bio *bio; int nsect; req->errors = 0; @@ -1112,24 +1206,25 @@ printk("end_request: I/O error, dev %s (%s), sector %lu\n", kdevname(req->rq_dev), name, req->sector); - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + if ((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio->bi_flags |= !!uptodate; + bio->bi_end_io(bio); + if ((bio = req->bio) != NULL) { req->hard_sector += nsect; req->hard_nr_sectors -= nsect; req->sector = req->hard_sector; req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; + req->current_nr_sectors = bio_sectors(bio); if (req->nr_sectors < req->current_nr_sectors) { req->nr_sectors = req->current_nr_sectors; printk("end_request: buffer-list destroyed\n"); } - req->buffer = bh->b_data; + req->buffer = bio_data(bio); return 1; } } @@ -1163,7 +1258,6 @@ memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); atomic_set(&queued_sectors, 0); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); @@ -1202,110 +1296,16 @@ low_queued_sectors / 2, queue_nr_requests); -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_BLK_DEV_RAM - rd_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) ide_init(); /* this MUST precede hd_init */ #endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) hd_init(); #endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else #if defined(__i386__) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif return 0; }; @@ -1314,11 +1314,13 @@ EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); -EXPORT_SYMBOL(__blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(blk_queue_bounce_limit); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); EXPORT_SYMBOL(queued_sectors); +EXPORT_SYMBOL(blk_wake_queue); +EXPORT_SYMBOL(blk_attempt_remerge); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/loop.c Thu Apr 12 04:05:14 2001 +++ linux/drivers/block/loop.c Mon May 21 23:07:59 2001 @@ -75,7 +75,7 @@ #define MAJOR_NR LOOP_MAJOR static int max_loop = 8; -static struct loop_device *loop_dev; +static struct loop_device *loop_dev, **loop_lookup; static int *loop_sizes; static int *loop_blksizes; static devfs_handle_t devfs_handle; /* For the directory */ @@ -86,10 +86,12 @@ static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf, char *loop_buf, int size, int real_block) { - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); + if (raw_buf != loop_buf) { + if (cmd == READ) + memcpy(loop_buf, raw_buf, size); + else + memcpy(raw_buf, loop_buf, size); + } return 0; } @@ -117,6 +119,7 @@ static int none_status(struct loop_device *lo, struct loop_info *info) { + lo->lo_flags |= LO_FLAGS_BH_REMAP; return 0; } @@ -164,8 +167,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -178,8 +180,8 @@ index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; + len = bio_size(bio); + data = bio_data(bio); while (len > 0) { int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; size = PAGE_CACHE_SIZE - offset; @@ -251,18 +253,17 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; struct file *file; cookie.lo = lo; - cookie.data = bh->b_data; + cookie.data = bio_data(bio); cookie.bsize = bsize; desc.written = 0; - desc.count = bh->b_size; + desc.count = bio_size(bio); desc.buf = (char*)&cookie; desc.error = 0; spin_lock_irq(&lo->lo_lock); @@ -298,42 +299,46 @@ return IV; } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) { loff_t pos; int ret; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + if (bio->bi_flags & BIO_WRITE) + ret = lo_send(lo, bio, loop_get_bs(lo), pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bio, loop_get_bs(lo), pos); return ret; } -static void loop_put_buffer(struct buffer_head *bh) +static void loop_end_io_transfer(struct bio *); +static void loop_put_buffer(struct bio *bio) { - if (bh) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + /* + * check bi_end_io, may just be a remapped bio + */ + if (bio && bio->bi_end_io == loop_end_io_transfer) { + __free_page(bio_page(bio)); + bio_free(bio); } } /* - * Add buffer_head to back of pending list + * Add bio to back of pending list */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_add_bio(struct loop_device *lo, struct bio *bio) { unsigned long flags; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; } else - lo->lo_bh = lo->lo_bhtail = bh; + lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); up(&lo->lo_bh_mutex); @@ -342,65 +347,57 @@ /* * Grab first pending buffer */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static struct bio *loop_get_bio(struct loop_device *lo) { - struct buffer_head *bh; + struct bio *bio; spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; + if ((bio = lo->lo_bio)) { + if (bio == lo->lo_biotail) + lo->lo_biotail = NULL; + lo->lo_bio = bio->bi_next; + bio->bi_next = NULL; } spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } /* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs + * if this was a WRITE lo->transfer stuff has already been done. for READs, + * queue it for the loop thread and let it do the transfer out of + * bi_end_io context (we don't want to do decrypt of a page with irqs * disabled) */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static void loop_end_io_transfer(struct bio *bio) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct loop_device *lo = loop_lookup[MINOR(bio->bi_dev)]; - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + if (bio->bi_flags & (BIO_UPTODATE | BIO_WRITE)) { + struct bio *rbh = bio->bi_private; - rbh->b_end_io(rbh, uptodate); + rbh->bi_flags = bio->bi_flags; + rbh->bi_end_io(rbh); if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + loop_put_buffer(bio); } else - loop_add_bh(lo, bh); + loop_add_bio(lo, bio); } -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct buffer_head *bh; - - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER); - if (bh) - break; + struct bio *bio; - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); + /* + * for xfer_funcs that can operate on the same bh, do that + */ + if (lo->lo_flags & LO_FLAGS_BH_REMAP) { + bio = rbh; + goto out_bh; + } - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; - spin_unlock_irq(&lo->lo_lock); - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bio = bio_alloc(GFP_BUFFER, 1); /* * easy way out, although it does waste some memory for < PAGE_SIZE @@ -408,35 +405,40 @@ * so can we :-) */ do { - bh->b_page = alloc_page(GFP_BUFFER); - if (bh->b_page) + bio->bi_io_vec.bv_page = alloc_page(GFP_BUFFER); + if (bio->bi_io_vec.bv_page) break; run_task_queue(&tq_disk); schedule_timeout(HZ); } while (1); - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); - init_waitqueue_head(&bh->b_wait); + bio->bi_io_vec.bv_len = bio_size(rbh); + bio->bi_io_vec.bv_offset = bio_offset(rbh); - return bh; + bio->bi_end_io = loop_end_io_transfer; + bio->bi_private = rbh; + +out_bh: + bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9); + bio->bi_flags = rbh->bi_flags; + spin_lock_irq(&lo->lo_lock); + bio->bi_dev = lo->lo_device; + spin_unlock_irq(&lo->lo_lock); + + return bio; } -static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) +static int loop_make_request(request_queue_t *q, int rw, struct bio *rbh) { - struct buffer_head *bh = NULL; + struct bio *bh = NULL; struct loop_device *lo; unsigned long IV; - if (!buffer_locked(rbh)) - BUG(); - - if (MINOR(rbh->b_rdev) >= max_loop) + if (MINOR(rbh->bi_dev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = &loop_dev[MINOR(rbh->bi_dev)]; spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; @@ -453,9 +455,7 @@ goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + rbh = blk_queue_bounce(q, rbh); /* * file backed, queue for loop_thread to handle @@ -465,9 +465,7 @@ * rbh locked at this point, noone else should clear * the dirty flag */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_bio(lo, rbh); return 0; } @@ -475,12 +473,10 @@ * piggy old buffer on original, and submit for I/O */ bh = loop_get_buffer(lo, rbh); - bh->b_private = rbh; - IV = loop_get_iv(lo, bh->b_rsector); + IV = loop_get_iv(lo, rbh->bi_sector); if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh), + bio_size(bh), IV)) goto err; } @@ -492,14 +488,15 @@ up(&lo->lo_bh_mutex); loop_put_buffer(bh); out: - buffer_IO_error(rbh); + rbh->bi_flags &= ~BIO_UPTODATE; + rbh->bi_end_io(rbh); return 0; inactive: spin_unlock_irq(&lo->lo_lock); goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { int ret; @@ -507,19 +504,21 @@ * For block backed loop, we know this is a READ */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); + ret = do_bio_filebacked(lo, bio); + if (!ret) + bio->bi_flags |= BIO_UPTODATE; + bio->bi_end_io(bio); } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + struct bio *rbh = bio->bi_private; + unsigned long IV = loop_get_iv(lo, rbh->bi_sector); - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); + ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh), + bio_size(bio), IV); - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); + if (!ret) + bio->bi_flags |= BIO_UPTODATE; + rbh->bi_end_io(rbh); + loop_put_buffer(bio); } } @@ -532,7 +531,7 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct bio *bio; daemonize(); exit_files(current); @@ -566,12 +565,12 @@ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + bio = loop_get_bio(lo); + if (!bio) { + printk("loop: missing bio\n"); continue; } - loop_handle_bh(lo, bh); + loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending @@ -600,7 +599,7 @@ error = -EBUSY; if (lo->lo_state != Lo_unbound) goto out; - + error = -EBADF; file = fget(arg); if (!file) @@ -620,7 +619,6 @@ * If we can't read - sorry. If we only can't write - well, * it's going to be read-only. */ - error = -EINVAL; if (!aops->readpage) goto out_putf; @@ -649,6 +647,7 @@ figure_loop_size(lo); lo->old_gfp_mask = inode->i_mapping->gfp_mask; inode->i_mapping->gfp_mask = GFP_BUFFER; + loop_lookup[MINOR(lo_device)] = lo; bs = 0; if (blksize_size[MAJOR(inode->i_rdev)]) @@ -658,7 +657,7 @@ set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; + lo->lo_bio = lo->lo_biotail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -983,13 +982,17 @@ if (!loop_dev) return -ENOMEM; + loop_lookup = kmalloc(max_loop*sizeof(struct loop_device *),GFP_KERNEL); + if (!loop_lookup) + goto out_mem; + loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) - goto out_sizes; + goto out_mem; loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_blksizes) - goto out_blksizes; + goto out_mem; blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); @@ -1005,6 +1008,7 @@ memset(loop_sizes, 0, max_loop * sizeof(int)); memset(loop_blksizes, 0, max_loop * sizeof(int)); + memset(loop_lookup, 0, max_loop * sizeof(struct loop_device *)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; for (i = 0; i < max_loop; i++) @@ -1013,9 +1017,9 @@ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: +out_mem: kfree(loop_dev); -out_blksizes: + kfree(loop_lookup); kfree(loop_sizes); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; @@ -1028,6 +1032,7 @@ printk(KERN_WARNING "loop: cannot unregister blkdev\n"); kfree(loop_dev); + kfree(loop_lookup); kfree(loop_sizes); kfree(loop_blksizes); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/nbd.c linux/drivers/block/nbd.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/nbd.c Sun May 20 14:05:11 2001 +++ linux/drivers/block/nbd.c Mon May 21 14:30:16 2001 @@ -251,7 +251,7 @@ goto out; } #endif - list_del(&req->queue); + blkdev_dequeue_request(req); up (&lo->queue_lock); nbd_end_request(req); @@ -286,7 +286,7 @@ } #endif req->errors++; - list_del(&req->queue); + blkdev_dequeue_request(req); up(&lo->queue_lock); nbd_end_request(req); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/rd.c linux/drivers/block/rd.c --- /opt/kernel/linux-2.4.5-pre4/drivers/block/rd.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/block/rd.c Tue May 15 15:14:24 2001 @@ -201,13 +201,11 @@ struct buffer_head *rbh; char *bdata; - minor = MINOR(sbh->b_rdev); if (minor >= NUM_RAMDISKS) goto fail; - offset = sbh->b_rsector << 9; len = sbh->b_size; @@ -225,21 +223,24 @@ /* I think that it is safe to assume that rbh is not in HighMem, though * sbh might be - NeilBrown */ - bdata = bh_kmap(sbh); - if (rw == READ) { - if (sbh != rbh) + if (sbh != rbh) { + bdata = bh_kmap(sbh); + + if (rw == READ) memcpy(bdata, rbh->b_data, rbh->b_size); - } else - if (sbh != rbh) + else memcpy(rbh->b_data, bdata, rbh->b_size); - bh_kunmap(sbh); + + bh_kunmap(sbh); + } + mark_buffer_protected(rbh); brelse(rbh); sbh->b_end_io(sbh,1); return 0; fail: - sbh->b_end_io(sbh,0); + buffer_IO_error(sbh); return 0; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/char/raw.c linux/drivers/char/raw.c --- /opt/kernel/linux-2.4.5-pre4/drivers/char/raw.c Fri Apr 27 23:23:25 2001 +++ linux/drivers/char/raw.c Mon May 21 17:49:43 2001 @@ -285,13 +285,11 @@ unsigned long blocknr, blocks; size_t transferred; int iosize; - int i; int minor; kdev_t dev; unsigned long limit; int sector_size, sector_bits, sector_mask; - int max_sectors; /* * First, a few checks on device size limits @@ -316,7 +314,6 @@ sector_size = raw_devices[minor].sector_size; sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; - max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); if (blk_size[MAJOR(dev)]) limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; @@ -334,18 +331,10 @@ if ((*offp >> sector_bits) >= limit) goto out_free; - /* - * Split the IO into KIO_MAX_SECTORS chunks, mapping and - * unmapping the single kiobuf as we go to perform each chunk of - * IO. - */ - transferred = 0; blocknr = *offp >> sector_bits; while (size > 0) { blocks = size >> sector_bits; - if (blocks > max_sectors) - blocks = max_sectors; if (blocks > limit - blocknr) blocks = limit - blocknr; if (!blocks) @@ -357,10 +346,8 @@ if (err) break; - for (i=0; i < blocks; i++) - iobuf->blocks[i] = blocknr++; - - err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size); + blocknr += blocks; if (rw == READ && err > 0) mark_dirty_kiobuf(iobuf, err); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/hd.c linux/drivers/ide/hd.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/hd.c Sun May 20 14:05:11 2001 +++ linux/drivers/ide/hd.c Mon May 21 22:58:16 2001 @@ -107,7 +107,6 @@ static int hd_sizes[MAX_HD<<6]; static int hd_blocksizes[MAX_HD<<6]; static int hd_hardsectsizes[MAX_HD<<6]; -static int hd_maxsect[MAX_HD<<6]; static struct timer_list device_timer; @@ -734,11 +733,9 @@ for(drive=0; drive < (MAX_HD << 6); drive++) { hd_blocksizes[drive] = 1024; hd_hardsectsizes[drive] = 512; - hd_maxsect[drive]=255; } blksize_size[MAJOR_NR] = hd_blocksizes; hardsect_size[MAJOR_NR] = hd_hardsectsizes; - max_sectors[MAJOR_NR] = hd_maxsect; #ifdef __i386__ if (!NR_HD) { @@ -841,6 +838,7 @@ return -1; } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ hd_gendisk.next = gendisk_head; gendisk_head = &hd_gendisk; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/ide/ide-cd.c Mon May 21 22:58:16 2001 @@ -958,7 +958,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)), + nskip = MIN ((int)(rq->current_nr_sectors - bio_sectors(rq->bio)), sectors_to_transfer); while (nskip > 0) { @@ -977,8 +977,7 @@ /* If we've filled the present buffer but there's another chained buffer after it, move on. */ - if (rq->current_nr_sectors == 0 && - rq->nr_sectors > 0) + if (rq->current_nr_sectors == 0 && rq->nr_sectors) cdrom_end_request (1, drive); /* If the buffers are full, cache the rest of the data in our @@ -1058,7 +1057,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors < bio_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, rq->sector); @@ -1097,7 +1096,7 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors != bio_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n", drive->name, rq->current_nr_sectors); @@ -1196,13 +1195,15 @@ start it over entirely, or even put it back on the request queue. */ static void restore_request (struct request *rq) { - if (rq->buffer != rq->bh->b_data) { - int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE; - rq->buffer = rq->bh->b_data; + if (rq->buffer != bio_data(rq->bio)) { + int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE; + rq->buffer = bio_data(rq->bio); rq->nr_sectors += n; rq->sector -= n; } - rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS; + rq->current_nr_sectors = bio_sectors(rq->bio); + rq->hard_nr_sectors = rq->nr_sectors; + rq->hard_sector = rq->sector; } /* @@ -1216,20 +1217,22 @@ /* If the request is relative to a partition, fix it up to refer to the absolute address. */ - if ((minor & PARTN_MASK) != 0) { + if (minor & PARTN_MASK) { rq->sector = block; minor &= ~PARTN_MASK; - rq->rq_dev = MKDEV (MAJOR(rq->rq_dev), minor); + rq->rq_dev = MKDEV(MAJOR(rq->rq_dev), minor); } /* We may be retrying this request after an error. Fix up any weirdness which might be present in the request packet. */ - restore_request (rq); + restore_request(rq); /* Satisfy whatever we can of this request from our cached sector. */ if (cdrom_read_from_buffer(drive)) return ide_stopped; + blk_attempt_remerge(&drive->queue, rq); + /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1477,7 +1480,7 @@ static ide_startstop_t cdrom_write_intr(ide_drive_t *drive) { - int stat, ireason, len, sectors_to_transfer; + int stat, ireason, len, sectors_to_transfer, uptodate; struct cdrom_info *info = drive->driver_data; int i, dma_error = 0, dma = info->dma; ide_startstop_t startstop; @@ -1498,6 +1501,9 @@ return startstop; } + /* + * using dma, transfer is complete now + */ if (dma) { if (dma_error) return ide_error(drive, "dma error", stat); @@ -1519,12 +1525,13 @@ /* If we're not done writing, complain. * Otherwise, complete the command normally. */ + uptodate = 1; if (rq->current_nr_sectors > 0) { printk("%s: write_intr: data underrun (%ld blocks)\n", - drive->name, rq->current_nr_sectors); - cdrom_end_request(0, drive); - } else - cdrom_end_request(1, drive); + drive->name, rq->current_nr_sectors); + uptodate = 0; + } + cdrom_end_request(uptodate, drive); return ide_stopped; } @@ -1533,26 +1540,42 @@ if (cdrom_write_check_ireason(drive, len, ireason)) return ide_stopped; - /* The number of sectors we need to read from the drive. */ sectors_to_transfer = len / SECTOR_SIZE; - /* Now loop while we still have data to read from the drive. DMA - * transfers will already have been complete + /* + * now loop and write out the data */ while (sectors_to_transfer > 0) { - /* If we've filled the present buffer but there's another - chained buffer after it, move on. */ - if (rq->current_nr_sectors == 0 && rq->nr_sectors > 0) - cdrom_end_request(1, drive); + int this_transfer; - atapi_output_bytes(drive, rq->buffer, rq->current_nr_sectors); - rq->nr_sectors -= rq->current_nr_sectors; - rq->current_nr_sectors = 0; - rq->sector += rq->current_nr_sectors; - sectors_to_transfer -= rq->current_nr_sectors; + if (!rq->current_nr_sectors) { + printk("ide-cd: write_intr: oops\n"); + break; + } + + /* + * Figure out how many sectors we can transfer + */ + this_transfer = MIN(sectors_to_transfer,rq->current_nr_sectors); + + while (this_transfer > 0) { + atapi_output_bytes(drive, rq->buffer, SECTOR_SIZE); + rq->buffer += SECTOR_SIZE; + --rq->nr_sectors; + --rq->current_nr_sectors; + ++rq->sector; + --this_transfer; + --sectors_to_transfer; + } + + /* + * current buffer complete, move on + */ + if (rq->current_nr_sectors == 0 && rq->nr_sectors) + cdrom_end_request (1, drive); } - /* arm handler */ + /* re-arm handler */ ide_set_handler(drive, &cdrom_write_intr, 5 * WAIT_CMD, NULL); return ide_started; } @@ -1583,10 +1606,26 @@ return cdrom_transfer_packet_command(drive, &pc, cdrom_write_intr); } -static ide_startstop_t cdrom_start_write(ide_drive_t *drive) +static ide_startstop_t cdrom_start_write(ide_drive_t *drive, struct request *rq) { struct cdrom_info *info = drive->driver_data; + /* + * writes *must* be 2kB frame aligned + */ + if ((rq->nr_sectors & 3) || (rq->sector & 3)) { + cdrom_end_request(0, drive); + return ide_stopped; + } + + /* + * for dvd-ram and such media, it's a really big deal to get + * big writes all the time. so scour the queue and attempt to + * remerge requests, often the plugging will not have had time + * to do this properly + */ + blk_attempt_remerge(&drive->queue, rq); + info->nsectors_buffered = 0; /* use dma, if possible. we don't need to check more, since we @@ -1629,7 +1668,7 @@ if (rq->cmd == READ) action = cdrom_start_read(drive, block); else - action = cdrom_start_write(drive); + action = cdrom_start_write(drive, rq); } info->last_block = block; return action; @@ -1832,6 +1871,7 @@ pc.buffer = buf; pc.buflen = buflen; + pc.quiet = 1; pc.c[0] = GPCMD_READ_TOC_PMA_ATIP; pc.c[6] = trackno; pc.c[7] = (buflen >> 8); @@ -2112,7 +2152,9 @@ pc.quiet = cgc->quiet; pc.timeout = cgc->timeout; pc.sense = cgc->sense; - return cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->buflen -= pc.buflen; + return cgc->stat; } static @@ -2621,7 +2663,6 @@ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "dsc_overlap", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->dsc_overlap, NULL); } @@ -2826,7 +2867,12 @@ drive->part[0].nr_sects = toc->capacity * SECTORS_PER_FRAME; HWIF(drive)->gd->sizes[minor] = toc->capacity * BLOCKS_PER_FRAME; + /* + * reset block size, ide_revalidate_disk incorrectly sets it to + * 1024 even for CDROM's + */ blk_size[HWIF(drive)->major] = HWIF(drive)->gd->sizes; + set_blocksize(MKDEV(HWIF(drive)->major, minor), CD_FRAMESIZE); } static diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.h Sat Apr 28 00:48:21 2001 +++ linux/drivers/ide/ide-cd.h Mon May 21 16:16:57 2001 @@ -37,11 +37,12 @@ /************************************************************************/ -#define SECTOR_SIZE 512 #define SECTOR_BITS 9 -#define SECTORS_PER_FRAME (CD_FRAMESIZE / SECTOR_SIZE) +#define SECTOR_SIZE (1 << SECTOR_BITS) +#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS) #define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32) -#define SECTORS_BUFFER (SECTOR_BUFFER_SIZE / SECTOR_SIZE) +#define SECTORS_BUFFER (SECTOR_BUFFER_SIZE >> SECTOR_BITS) +#define SECTORS_MAX (131072 >> SECTOR_BITS) #define BLOCKS_PER_FRAME (CD_FRAMESIZE / BLOCK_SIZE) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-disk.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/ide/ide-disk.c Mon May 21 22:58:16 2001 @@ -251,15 +251,15 @@ /* Do we move to the next bh after this? */ if (!rq->current_nr_sectors) { - struct buffer_head *bh = rq->bh->b_reqnext; + struct bio *bio = rq->bio->bi_next; /* end early early we ran out of requests */ - if (!bh) { + if (!bio) { mcount = 0; } else { - rq->bh = bh; - rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->bio = bio; + rq->current_nr_sectors = bio_sectors(bio); + rq->buffer = bio_data(bio); } } @@ -690,7 +690,6 @@ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-dma.c Mon Jan 15 22:08:15 2001 +++ linux/drivers/ide/ide-dma.c Wed May 16 13:53:03 2001 @@ -213,32 +213,33 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) { - struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; + struct bio *bio; int nents = 0; if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; - bh = rq->bh; + + bio = rq->bio; do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; + unsigned char *virt_addr = bio_data(bio); + unsigned int size = bio_size(bio); if (nents >= PRD_ENTRIES) return 0; - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) + while ((bio = bio->bi_next) != NULL) { + if ((virt_addr + size) != bio_data(bio)) break; - size += bh->b_size; + size += bio_size(bio); } memset(&sg[nents], 0, sizeof(*sg)); sg[nents].address = virt_addr; sg[nents].length = size; nents++; - } while (bh != NULL); + } while (bio); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-floppy.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/ide/ide-floppy.c Mon May 21 22:58:16 2001 @@ -1522,7 +1522,6 @@ ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); } @@ -1555,10 +1554,7 @@ */ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - } + blk_queue_max_sectors(&drive->queue, 64); (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-pci.c linux/drivers/ide/ide-pci.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-pci.c Sun May 20 14:05:11 2001 +++ linux/drivers/ide/ide-pci.c Sun May 20 14:08:54 2001 @@ -708,7 +708,7 @@ /* * Set up BM-DMA capability (PnP BIOS should have done this) */ - if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530) + if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530)) hwif->autodma = 0; /* default DMA off if we had to configure it here */ (void) pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_MASTER); if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) || !(pcicmd & PCI_COMMAND_MASTER)) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-probe.c Sun Mar 18 18:25:02 2001 +++ linux/drivers/ide/ide-probe.c Mon May 21 23:27:49 2001 @@ -597,6 +597,13 @@ q->queuedata = HWGROUP(drive); blk_init_queue(q, do_ide_request); + + /* IDE can do up to 128K per request. */ + blk_queue_max_sectors(q, 255); +#ifdef CONFIG_BLK_DEV_PDC4030 + if (hwif->chipset == ide_pdc4040) + blk_queue_max_sectors(q, 127); +#endif } /* @@ -749,7 +756,7 @@ { struct gendisk *gd, **gdp; unsigned int unit, units, minors; - int *bs, *max_sect, *max_ra; + int *bs, *max_ra; extern devfs_handle_t ide_devfs_handle; /* figure out maximum drive number on the interface */ @@ -762,23 +769,15 @@ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); memset(gd->part, 0, minors * sizeof(struct hd_struct)); /* cdroms and msdos f/s are examples of non-1024 blocksizes */ blksize_size[hwif->major] = bs; - max_sectors[hwif->major] = max_sect; max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif *max_ra++ = MAX_READAHEAD; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide.c Sun May 20 14:05:11 2001 +++ linux/drivers/ide/ide.c Mon May 21 22:58:16 2001 @@ -1228,7 +1228,7 @@ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - if( !drive->queue.plugged ) + if (!blk_queue_plugged(&drive->queue)) best = drive; } } @@ -1344,7 +1344,7 @@ drive->sleep = 0; drive->service_start = jiffies; - if ( drive->queue.plugged ) /* paranoia */ + if (blk_queue_plugged(&drive->queue)) /* paranoia */ printk("%s: Huh? nuking plugged queue\n", drive->name); hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); /* @@ -2057,7 +2057,6 @@ */ unregister_blkdev(hwif->major, hwif->name); kfree(blksize_size[hwif->major]); - kfree(max_sectors[hwif->major]); kfree(max_readahead[hwif->major]); blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi.h Sat Apr 28 00:49:19 2001 +++ linux/drivers/scsi/scsi.h Mon May 21 16:12:25 2001 @@ -389,9 +389,9 @@ #if defined(__mc68000__) || defined(CONFIG_APUS) #include #define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) + (virt_to_phys(bio_data(X)+bio_size(X)-1)+1==virt_to_phys(bio_data(Y))) #else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) +#define CONTIGUOUS_BUFFERS(X,Y) BIO_CONTIG(X, Y) #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_lib.c Sun May 20 14:05:16 2001 +++ linux/drivers/scsi/scsi_lib.c Sat May 19 04:10:33 2001 @@ -360,7 +360,7 @@ int frequeue) { struct request *req; - struct buffer_head *bh; + struct bio *bio; Scsi_Device * SDpnt; int nsect; @@ -373,30 +373,31 @@ kdevname(req->rq_dev), req->sector); } do { - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + if ((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; + req->bio = bio->bi_next; req->nr_sectors -= nsect; req->sector += nsect; - bh->b_reqnext = NULL; + bio->bi_next = NULL; sectors -= nsect; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->current_nr_sectors = bh->b_size >> 9; + bio->bi_flags |= !!uptodate; + bio->bi_end_io(bio); + if ((bio = req->bio) != NULL) { + req->current_nr_sectors = bio_sectors(bio); if (req->nr_sectors < req->current_nr_sectors) { req->nr_sectors = req->current_nr_sectors; printk("scsi_end_request: buffer-list destroyed\n"); } } } - } while (sectors && bh); + } while (sectors && bio); /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (req->bh) { + if (req->bio) { request_queue_t *q; if( !requeue ) @@ -406,7 +407,7 @@ q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; + req->buffer = bio_data(bio); /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -834,7 +835,7 @@ * released the lock and grabbed it again, so each time * we need to check to see if the queue is plugged or not. */ - if (SHpnt->in_recovery || q->plugged) + if (SHpnt->in_recovery || blk_queue_plugged(q)) return; /* diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_merge.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/scsi/scsi_merge.c Mon May 21 22:58:16 2001 @@ -81,7 +81,7 @@ int dma_host, int segments) { - struct buffer_head *bh; + struct bio *bio; /* * Dump the information that we have. We know we have an @@ -90,12 +90,12 @@ printk("nr_segments is %x\n", req->nr_segments); printk("counted segments is %x\n", segments); printk("Flags %d %d\n", use_clustering, dma_host); - for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) + for (bio = req->bio; bio->bi_next != NULL; bio = bio->bi_next) { printk("Segment 0x%p, blocks %d, addr 0x%lx\n", - bh, - bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); + bio, + bio_sectors(bio), + virt_to_phys(bio_data(bio) - 1)); } panic("Ththththaats all folks. Too dangerous to continue.\n"); } @@ -191,8 +191,7 @@ { int ret = 1; int reqsize = 0; - struct buffer_head *bh; - struct buffer_head *bhnext; + struct bio *bio, *bionext; if( remainder != NULL ) { reqsize = *remainder; @@ -201,21 +200,21 @@ /* * Add in the size increment for the first buffer. */ - bh = req->bh; + bio = req->bio; #ifdef DMA_SEGMENT_SIZE_LIMITED - if( reqsize + bh->b_size > PAGE_SIZE ) { + if( reqsize + bio_size(bio) > PAGE_SIZE ) { ret++; - reqsize = bh->b_size; + reqsize = bio_size(bio); } else { - reqsize += bh->b_size; + reqsize += bio_size(bio); } #else - reqsize += bh->b_size; + reqsize += bio_size(bio); #endif - for (bh = req->bh, bhnext = bh->b_reqnext; - bhnext != NULL; - bh = bhnext, bhnext = bh->b_reqnext) { + for (bio = req->bio, bionext = bio->bi_next; + bionext != NULL; + bio = bionext, bionext = bio->bi_next) { if (use_clustering) { /* * See if we can do this without creating another @@ -224,10 +223,10 @@ * the DMA threshold boundary. */ if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + virt_to_phys(bio_data(bionext)) - 1 == ISA_DMA_THRESHOLD) { ret++; - reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + reqsize = bio_size(bionext); + } else if (CONTIGUOUS_BUFFERS(bio, bionext)) { /* * This one is OK. Let it go. */ @@ -242,22 +241,22 @@ * another segment. */ if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bhnext->b_size > PAGE_SIZE ) + && virt_to_phys(bio_data(bionext)) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bio_size(bionext) > PAGE_SIZE ) { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); continue; } #endif - reqsize += bhnext->b_size; + reqsize += bio_size(bionext); continue; } ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } else { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } } if( remainder != NULL ) { @@ -304,14 +303,13 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bio_data((X))+bio_size((X)))|((long)bio_data((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE static inline int scsi_new_mergeable(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg will be able to merge these two @@ -320,7 +318,7 @@ * scsi.c allocates for this purpose * min(64,sg_tablesize) entries. */ - if (req->nr_segments >= max_segments || + if (req->nr_segments >= q->max_segments || req->nr_segments >= SHpnt->sg_tablesize) return 0; req->nr_segments++; @@ -329,8 +327,7 @@ static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg won't be able to map these two @@ -347,11 +344,10 @@ #else static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { if (req->nr_segments < SHpnt->sg_tablesize && - req->nr_segments < max_segments) { + req->nr_segments < q->max_segments) { /* * This will form the start of a new segment. Bump the * counter. @@ -371,7 +367,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot @@ -399,8 +395,7 @@ */ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -412,9 +407,11 @@ SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; + /* + * FIXME: remember to look into this /jens + */ #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif if (use_clustering) { @@ -425,16 +422,16 @@ * the DMA threshold boundary. */ if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + virt_to_phys(bio_data(req->biotail)) - 1 == ISA_DMA_THRESHOLD) { goto new_end_segment; } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (CONTIGUOUS_BUFFERS(req->biotail, bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + && virt_to_phys(bio_data(bio)) - 1 >= ISA_DMA_THRESHOLD ) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bh->b_size > PAGE_SIZE ) { + if( segment_size + bio_size(bio) > PAGE_SIZE ) { goto new_end_segment; } } @@ -447,16 +444,15 @@ } new_end_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(req->bhtail, bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(req->biotail, bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -469,8 +465,7 @@ SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif if (use_clustering) { @@ -481,14 +476,14 @@ * the DMA threshold boundary. */ if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + virt_to_phys(bio_data(bio)) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (CONTIGUOUS_BUFFERS(bio, req->bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bh->b_size; + && virt_to_phys(bio_data(bio)) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bio_size(bio); count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { goto new_start_segment; @@ -503,10 +498,10 @@ } new_start_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(bh, req->bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(bio, req->bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } /* @@ -516,7 +511,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. @@ -529,15 +524,13 @@ #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct buffer_head * bh, \ - int max_segments) \ + struct bio *bio) \ { \ int ret; \ SANITY_CHECK(req, _CLUSTER, _DMA); \ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ - bh, \ - max_segments, \ + bio, \ _CLUSTER, \ _DMA); \ return ret; \ @@ -590,7 +583,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int max_segments, int use_clustering, int dma_host) { @@ -601,13 +593,12 @@ SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; /* If it would not fit into prepared memory space for sg chain, * then don't allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > max_segments || + if (req->nr_segments + next->nr_segments - 1 > q->max_segments || req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { return 0; } @@ -636,7 +627,7 @@ * the DMA threshold boundary. */ if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + virt_to_phys(bio_data(req->biotail)) - 1 == ISA_DMA_THRESHOLD) { goto dont_combine; } #ifdef DMA_SEGMENT_SIZE_LIMITED @@ -645,8 +636,8 @@ * buffers in chunks of PAGE_SIZE or less. */ if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + && CONTIGUOUS_BUFFERS(req->biotail, next->bio) + && virt_to_phys(bio_data(req->biotail)) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -658,7 +649,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (CONTIGUOUS_BUFFERS(req->biotail, next->bio)) { /* * This one is OK. Let it go. */ @@ -671,7 +662,7 @@ } dont_combine: #ifdef DMA_CHUNK_SIZE - if (req->nr_segments + next->nr_segments > max_segments || + if (req->nr_segments + next->nr_segments > q->max_segments || req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { return 0; } @@ -679,7 +670,7 @@ * first segment in next, then the check for hw segments was * done above already, so we can always merge. */ - if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + if (MERGEABLE_BUFFERS (req->biotail, next->bio)) { req->nr_hw_segments += next->nr_hw_segments - 1; } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { return 0; @@ -694,7 +685,7 @@ * Make sure we can fix something that is the sum of the two. * A slightly stricter test than we had above. */ - if (req->nr_segments + next->nr_segments > max_segments || + if (req->nr_segments + next->nr_segments > q->max_segments || req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { return 0; } else { @@ -715,7 +706,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. @@ -728,12 +719,11 @@ #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct request * next, \ - int max_segments) \ + struct request * next) \ { \ int ret; \ SANITY_CHECK(req, _CLUSTER, _DMA); \ - ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ return ret; \ } @@ -781,8 +771,8 @@ int use_clustering, int dma_host) { - struct buffer_head * bh; - struct buffer_head * bhprev; + struct bio * bio; + struct bio * bioprev; char * buff; int count; int i; @@ -797,7 +787,7 @@ * needed any more. Need to play with it and see if we hit the * panic. If not, then don't bother. */ - if (!SCpnt->request.bh) { + if (!SCpnt->request.bio) { /* * Case of page request (i.e. raw device), or unlinked buffer * Typically used for swapping, but this isn't how we do @@ -870,15 +860,15 @@ memset(sgpnt, 0, SCpnt->sglist_len); SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = 0; - bhprev = NULL; + bioprev = NULL; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { - if (use_clustering && bhprev != NULL) { + for (count = 0, bio = SCpnt->request.bio; + bio; bio = bio->bi_next) { + if (use_clustering && bioprev != NULL) { if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + virt_to_phys(bio_data(bioprev)) - 1 == ISA_DMA_THRESHOLD) { /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + } else if (CONTIGUOUS_BUFFERS(bioprev, bio)) { /* * This one is OK. Let it go. Note that we * do not have the ability to allocate @@ -887,32 +877,32 @@ */ if( dma_host ) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD - || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + if( virt_to_phys(bio_data(bio)) - 1 < ISA_DMA_THRESHOLD + || sgpnt[count - 1].length + bio_size(bio) <= PAGE_SIZE ) { + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; } #else - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; #endif } else { - sgpnt[count - 1].length += bh->b_size; - SCpnt->request_bufflen += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + SCpnt->request_bufflen += bio_size(bio); + bioprev = bio; continue; } } } count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].length += bh->b_size; + sgpnt[count - 1].address = bio_data(bio); + sgpnt[count - 1].length += bio_size(bio); if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; + SCpnt->request_bufflen += bio_size(bio); } - bhprev = bh; + bioprev = bio; } /* @@ -1014,14 +1004,14 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bio = SCpnt->request.bio; + bio; bio = bio->bi_next) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { break; } - this_count += bh->b_size >> 9; + this_count += bio_sectors(bio); } } else { @@ -1041,7 +1031,7 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; + bio = SCpnt->request.bio; buff = SCpnt->request.buffer; if (dma_host) { @@ -1050,7 +1040,7 @@ * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) + if (virt_to_phys(bio_data(SCpnt->request.bio)) + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sd.c Sun May 20 14:05:16 2001 +++ linux/drivers/scsi/sd.c Wed May 16 14:20:54 2001 @@ -588,8 +588,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); switch (SCpnt->device->sector_size) { case 1024: error_sector <<= 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr.c Sun May 20 14:05:16 2001 +++ linux/drivers/scsi/sr.c Wed May 16 14:21:07 2001 @@ -218,8 +218,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); if (block_sectors < 4) block_sectors = 4; if (scsi_CDs[device_nr].device->sector_size == 2048) @@ -262,7 +262,7 @@ static int sr_scatter_pad(Scsi_Cmnd *SCpnt, int s_size) { struct scatterlist *sg, *old_sg = NULL; - int i, fsize, bsize, sg_ent; + int i, fsize, bsize, sg_ent, sg_count; char *front, *back; back = front = NULL; @@ -290,17 +290,24 @@ /* * extend or allocate new scatter-gather table */ - if (SCpnt->use_sg) + sg_count = SCpnt->use_sg; + if (sg_count) old_sg = (struct scatterlist *) SCpnt->request_buffer; else { - SCpnt->use_sg = 1; + sg_count = 1; sg_ent++; } - SCpnt->sglist_len = ((sg_ent * sizeof(struct scatterlist)) + 511) & ~511; - if ((sg = scsi_malloc(SCpnt->sglist_len)) == NULL) + i = ((sg_ent * sizeof(struct scatterlist)) + 511) & ~511; + if ((sg = scsi_malloc(i)) == NULL) goto no_mem; + /* + * no more failing memory allocs possible, we can safely assign + * SCpnt values now + */ + SCpnt->sglist_len = i; + SCpnt->use_sg = sg_count; memset(sg, 0, SCpnt->sglist_len); i = 0; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr_ioctl.c linux/drivers/scsi/sr_ioctl.c --- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr_ioctl.c Fri Dec 29 23:07:22 2000 +++ linux/drivers/scsi/sr_ioctl.c Tue May 15 15:14:24 2001 @@ -530,6 +530,8 @@ target = MINOR(cdi->dev); switch (cmd) { + case BLKGETSIZE: + return put_user(scsi_CDs[target].capacity >> 1, (long *) arg); case BLKROSET: case BLKROGET: case BLKRASET: diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/Makefile linux/fs/Makefile --- /opt/kernel/linux-2.4.5-pre4/fs/Makefile Sat Mar 3 00:16:59 2001 +++ linux/fs/Makefile Mon May 21 23:09:44 2001 @@ -7,11 +7,11 @@ O_TARGET := fs.o -export-objs := filesystems.o dcache.o +export-objs := filesystems.o dcache.o bio.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ + bio.o super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/bio.c linux/fs/bio.c --- /opt/kernel/linux-2.4.5-pre4/fs/bio.c Thu Jan 1 01:00:00 1970 +++ linux/fs/bio.c Mon May 21 23:29:04 2001 @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +kmem_cache_t *bio_cachep, *biovec_cachep; + +struct bio *bio_alloc(int gfp_mask, int loop) +{ + struct bio *bio; + + do { + bio = kmem_cache_alloc(bio_cachep, gfp_mask); + if (bio) + break; + + wakeup_bdflush(1); + } while (loop); + + return bio; +} + +void bio_free(struct bio *bio) +{ + kmem_cache_free(bio_cachep, bio); +} + +struct bio_vec *biovec_alloc(int gfp_mask, int loop) +{ + struct bio_vec *biov; + + do { + biov = kmem_cache_alloc(biovec_cachep, gfp_mask); + if (biov) + break; + + wakeup_bdflush(1); + } while (loop); + + return biov; +} + +void biovec_free(struct bio_vec *biov) +{ + kmem_cache_free(biovec_cachep, biov); +} + +static int bio_end_io_page(struct bio *bio) +{ + struct page *page = bio_page(bio); + + if (!(bio->bi_flags & BIO_UPTODATE)) + SetPageError(page); + if (!PageError(page)) + SetPageUptodate(page); + + /* + * Run the hooks that have to be done when a + * page I/O has completed. + */ + if (PageTestandClearDecrAfter(page)) + atomic_dec(&nr_async_pages); + + UnlockPage(page); + bio_free(bio); + return 1; +} + +void bio_end_io_kio(struct bio *bio) +{ + struct kiobuf *kio = bio->bi_private; + + end_kio_request(kio, bio->bi_flags & BIO_UPTODATE); + bio_free(bio); +} + +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block) +{ + struct bio *bio; + int i, bs, offset; + + if ((rw & WRITE) && is_read_only(dev)) { + printk("ll_rw_bio: WRITE to ro dev %s\n", kdevname(dev)); + kio->errno = -EPERM; + return; + } + + if (blksize_size[MAJOR(dev)]) + bs = blksize_size[MAJOR(dev)][MINOR(dev)]; + else + bs = BLOCK_SIZE; + + offset = kio->offset & ~PAGE_MASK; + + for (i = 0; i < kio->nr_pages; i++, block++) { + bio = bio_alloc(GFP_BUFFER, 1); + + bio->bi_dev = dev; + bio->bi_sector = block * (bs >> 9); + + bio->bi_io_vec.bv_page = kio->maplist[i]; + bio->bi_io_vec.bv_len = PAGE_CACHE_SIZE - offset; + bio->bi_io_vec.bv_offset = offset; + + bio->bi_end_io = bio_end_io_kio; + bio->bi_private = kio; + + /* + * kiobuf only has an offset into the first page + */ + offset = 0; + + atomic_inc(&kio->io_count); + submit_bio(rw, bio); + } +} + +static void bio_init(void *foo, kmem_cache_t *cachep, unsigned long flg) +{ + if ((flg & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { + struct bio *bio = foo; + + bio->bi_next = NULL; + bio->bi_flags = 0; + bio->bi_end_io = NULL; + } +} + +static int __init init_bio(void) +{ + bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0, + SLAB_HWCACHE_ALIGN, bio_init, NULL); + if (!bio_cachep) + panic("bio: can't create bio_cachep slab cache\n"); + + biovec_cachep = kmem_cache_create("biovec", sizeof(struct bio_vec), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!biovec_cachep) + panic("bio: can't create biovec_cachep slab cache\n"); + + return 0; +} + +module_init(init_bio); + +EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_free); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/buffer.c linux/fs/buffer.c --- /opt/kernel/linux-2.4.5-pre4/fs/buffer.c Sun May 20 14:05:16 2001 +++ linux/fs/buffer.c Mon May 21 17:52:28 2001 @@ -1967,57 +1967,6 @@ } /* - * IO completion routine for a buffer_head being used for kiobuf IO: we - * can't dispatch the kiobuf callback until io_count reaches 0. - */ - -static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) -{ - struct kiobuf *kiobuf; - - mark_buffer_uptodate(bh, uptodate); - - kiobuf = bh->b_private; - unlock_buffer(bh); - end_kio_request(kiobuf, uptodate); -} - -/* - * For brw_kiovec: submit a set of buffer_head temporary IOs and wait - * for them to complete. Clean up the buffer_heads afterwards. - */ - -static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) -{ - int iosize, err; - int i; - struct buffer_head *tmp; - - iosize = 0; - err = 0; - - for (i = nr; --i >= 0; ) { - iosize += size; - tmp = bh[i]; - if (buffer_locked(tmp)) { - wait_on_buffer(tmp); - } - - if (!buffer_uptodate(tmp)) { - /* We are traversing bh'es in reverse order so - clearing iosize on error calculates the - amount of IO before the first error. */ - iosize = 0; - err = -EIO; - } - } - - if (iosize) - return iosize; - return err; -} - -/* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). * @@ -2032,18 +1981,11 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, unsigned long b[], int size) { - int err; - int length; int transferred; int i; - int bufind; int pageind; - int bhind; - int offset; - unsigned long blocknr; - struct kiobuf * iobuf = NULL; + struct kiobuf * iobuf; struct page * map; - struct buffer_head *tmp, **bhs = NULL; if (!nr) return 0; @@ -2058,88 +2000,31 @@ return -EINVAL; if (!iobuf->nr_pages) panic("brw_kiovec: iobuf not initialised"); + for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { + map = iobuf->maplist[pageind]; + if (!map) + return -EFAULT; + } } /* * OK to walk down the iovec doing page IO on each page we find. */ - bufind = bhind = transferred = err = 0; + transferred = 0; for (i = 0; i < nr; i++) { iobuf = iovec[i]; - offset = iobuf->offset; - length = iobuf->length; iobuf->errno = 0; - if (!bhs) - bhs = iobuf->bh; - - for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - map = iobuf->maplist[pageind]; - if (!map) { - err = -EFAULT; - goto finished; - } - - while (length > 0) { - blocknr = b[bufind++]; - tmp = bhs[bhind++]; - - tmp->b_dev = B_FREE; - tmp->b_size = size; - set_bh_page(tmp, map, offset); - tmp->b_this_page = tmp; - - init_buffer(tmp, end_buffer_io_kiobuf, iobuf); - tmp->b_dev = dev; - tmp->b_blocknr = blocknr; - tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - - if (rw == WRITE) { - set_bit(BH_Uptodate, &tmp->b_state); - clear_bit(BH_Dirty, &tmp->b_state); - } else - set_bit(BH_Uptodate, &tmp->b_state); - - length -= size; - offset += size; - - atomic_inc(&iobuf->io_count); - - submit_bh(rw, tmp); - /* - * Wait for IO if we have got too much - */ - if (bhind >= KIO_MAX_SECTORS) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - bhind = 0; - } - - if (offset >= PAGE_SIZE) { - offset = 0; - break; - } - } /* End of block loop */ - } /* End of page loop */ - } /* End of iovec loop */ - - /* Is there any IO still left to submit? */ - if (bhind) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; + transferred += iobuf->length; + ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9)); } - finished: - if (transferred) - return transferred; - return err; + /* + * now they are all submitted, wait for completion + */ + for (i = 0; i < nr; i++) + kiobuf_wait_for_io(iovec[i]); + + return transferred; } /* diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/iobuf.c linux/fs/iobuf.c --- /opt/kernel/linux-2.4.5-pre4/fs/iobuf.c Fri Apr 27 23:23:25 2001 +++ linux/fs/iobuf.c Mon May 21 17:33:55 2001 @@ -8,7 +8,6 @@ #include #include -#include void end_kio_request(struct kiobuf *kiobuf, int uptodate) { @@ -26,52 +25,23 @@ { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); + atomic_set(&iobuf->io_count, 0); iobuf->array_len = KIO_STATIC_PAGES; iobuf->maplist = iobuf->map_array; } -int alloc_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) - if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { - while (i--) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } - return -ENOMEM; - } - return 0; -} - -void free_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } -} - int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = vmalloc(sizeof(struct kiobuf)); + iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); - if (alloc_kiobuf_bhs(iobuf)) { - vfree(iobuf); - free_kiovec(i, bufp); - return -ENOMEM; - } bufp[i] = iobuf; } @@ -89,8 +59,7 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - free_kiobuf_bhs(iobuf); - vfree(bufp[i]); + kfree(bufp[i]); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/bio.h linux/include/linux/bio.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/bio.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/bio.h Mon May 21 15:41:53 2001 @@ -0,0 +1,92 @@ +/* + * New 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +/* + * transition to 64-bit sector_t, possibly making it an option... + */ +#undef BLK_64BIT_SECTOR + +#ifdef BLK_64BIT_SECTOR +typedef u64 sector_t; +#else +typedef unsigned long sector_t; +#endif + +struct bio_vec { + struct page *bv_page; + unsigned short bv_len; + unsigned short bv_offset; +}; + +/* + * main unit of I/O for the block layer and lower layers (ie drivers) + */ +struct bio { + kdev_t bi_dev; + sector_t bi_sector; + struct bio *bi_next; /* request queue link */ + struct bio_vec bi_io_vec; + unsigned long bi_flags; /* status, command, etc */ + void (*bi_end_io)(struct bio *bio); + void *bi_private; +}; + +#define BIO_SECTOR_BITS 9 +#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) +#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) + +/* + * end I/O flags + */ +#define BIO_UPTODATE 1 /* ok after I/O completion */ +#define BIO_READ 2 /* read request */ +#define BIO_WRITE 4 /* write request */ +#define BIO_RW_AHEAD 8 /* read/write ahead */ +#define BIO_EOF 16 /* out-out-bounds error */ +#define BIO_BARRIER 32 /* barrier I/O */ + +#define bio_barrier(bio) ((bio)->bi_flags & BIO_BARRIER) + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define bio_iovec(bio) (&(bio)->bi_io_vec) +#define bio_page(bio) bio_iovec((bio))->bv_page +#define bio_size(bio) bio_iovec((bio))->bv_len +#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) + +#define BIO_CONTIG(bio, nxt) \ + bio_page((bio)) == bio_page((nxt)) && \ + bio_offset((bio)) + bio_size((bio)) == bio_offset((nxt)) + +typedef void (bi_end_io_t) (struct bio *); + +extern struct bio *bio_alloc(int, int); +extern void bio_free(struct bio *); +extern struct bio_vec *biovec_alloc(int, int); +extern void biovec_free(struct bio_vec *); + +#endif /* __LINUX_BIO_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/blk.h linux/include/linux/blk.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/blk.h Sat Apr 28 00:49:25 2001 +++ linux/include/linux/blk.h Mon May 21 23:26:38 2001 @@ -87,6 +87,14 @@ static inline void blkdev_dequeue_request(struct request * req) { + request_queue_t *q = req->q; + + if (q) { + elevator_t *e = &q->elevator; + if (e->last_merge == req) + e->last_merge = NULL; + } + list_del(&req->queue); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/blkdev.h Sat Apr 28 00:48:49 2001 +++ linux/include/linux/blkdev.h Mon May 21 23:26:27 2001 @@ -42,25 +42,19 @@ void * special; char * buffer; struct semaphore * sem; - struct buffer_head * bh; - struct buffer_head * bhtail; + struct bio *bio, *biotail; request_queue_t *q; }; #include -typedef int (merge_request_fn) (request_queue_t *q, - struct request *req, - struct buffer_head *bh, - int); -typedef int (merge_requests_fn) (request_queue_t *q, - struct request *req, - struct request *req2, - int); +typedef int (merge_request_fn) (request_queue_t *, struct request *, + struct bio *); +typedef int (merge_requests_fn) (request_queue_t *, struct request *, + struct request *); typedef void (request_fn_proc) (request_queue_t *q); typedef request_queue_t * (queue_proc) (kdev_t dev); -typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); -typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef int (make_request_fn) (request_queue_t *q, int rw, struct bio *bio); typedef void (unplug_device_fn) (void *q); /* @@ -89,7 +83,7 @@ merge_request_fn * front_merge_fn; merge_requests_fn * merge_requests_fn; make_request_fn * make_request_fn; - plug_device_fn * plug_device_fn; + /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. @@ -97,20 +91,19 @@ void * queuedata; /* - * This is used to remove the plug when tq_disk runs. + * queue needs bounce pages for pages above this limit (phys addr) */ - struct tq_struct plug_tq; + struct page *bounce_limit; /* - * Boolean that indicates whether this queue is plugged or not. + * This is used to remove the plug when tq_disk runs. */ - char plugged; + struct tq_struct plug_tq; /* - * Boolean that indicates whether current_request is active or - * not. + * various queue flags, see QUEUE_* below */ - char head_active; + unsigned long queue_flags; /* * Is meant to protect the queue in the future instead of @@ -121,9 +114,37 @@ /* * Tasks wait here for free request */ - wait_queue_head_t wait_for_request; + wait_queue_head_t wait_for_request[2]; + + /* + * queue settings + */ + unsigned int max_sectors; + unsigned int max_segments; }; +#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ +#define QUEUE_FLAG_HEADACTIVE 1 /* has active head (going away) */ + +#define blk_queue_flag(q, flag) test_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_set_flag(q, flag) set_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_clear_flag(q, flag) clear_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) + +#define blk_queue_plugged(q) blk_queue_flag(q, PLUGGED) +#define blk_queue_headlive(q) blk_queue_flag(q, HEADACTIVE) + +#define blk_mark_plugged(q) blk_set_flag(q, PLUGGED) +#define blk_mark_headactive(q) blk_set_flag(q, HEADACTIVE) + +#define blk_set_unplugged(q) test_and_clear_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) +#define blk_set_plugged(q) test_and_set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#if CONFIG_HIGHMEM +#define blk_queue_bounce(q, bio) create_bounce((q), (bio)) +#else +#define blk_queue_bounce(q, bio) (bio) +#endif + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -150,10 +171,11 @@ extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void generic_make_request(int rw, struct buffer_head * bh); -extern request_queue_t *blk_get_queue(kdev_t dev); -extern inline request_queue_t *__blk_get_queue(kdev_t dev); +extern void generic_make_request(int rw, struct bio *bio); +extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); +extern inline void blk_wake_queue(request_queue_t *); +extern void blk_attempt_remerge(request_queue_t *, struct request *); /* * Access functions for manipulating queue properties @@ -162,6 +184,8 @@ extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void blk_queue_bounce_limit(request_queue_t *, struct page *); +extern void blk_queue_max_sectors(request_queue_t *q, int); extern void generic_unplug_device(void *); extern int * blk_size[MAX_BLKDEV]; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/elevator.h linux/include/linux/elevator.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/elevator.h Fri Feb 16 01:58:34 2001 +++ linux/include/linux/elevator.h Mon May 21 22:58:16 2001 @@ -5,8 +5,8 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, + struct list_head *, struct bio *, int); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); @@ -21,14 +21,16 @@ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_req_fn *elevator_merge_req_fn; + struct request *last_merge; + unsigned int queue_ID; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *, int); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_req(struct request *, struct request *); -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *, int); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_req(struct request *, struct request *); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/fs.h linux/include/linux/fs.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/fs.h Sun May 20 14:05:17 2001 +++ linux/include/linux/fs.h Mon May 21 15:42:31 2001 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -234,26 +235,25 @@ kdev_t b_dev; /* device (B_FREE = free) */ atomic_t b_count; /* users using this block */ - kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ struct buffer_head *b_next_free;/* lru/free list linkage */ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ struct buffer_head *b_this_page;/* circular list of buffers in one page */ - struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ + void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completio +n */ void *b_private; /* reserved for b_end_io */ - unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct inode * b_inode; struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ + + struct bio *b_bio; /* allocated on I/O to/from buffer */ }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); @@ -1075,10 +1075,13 @@ static inline void buffer_IO_error(struct buffer_head * bh) { mark_buffer_clean(bh); + /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! + * b_end_io has to clear the BH_Uptodate bitflag in the read error + * case, however buffer contents are not necessarily bad if a + * write fails */ - bh->b_end_io(bh, 0); + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); } extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); @@ -1243,6 +1246,7 @@ extern struct buffer_head * getblk(kdev_t, int, int); extern void ll_rw_block(int, int, struct buffer_head * bh[]); extern void submit_bh(int, struct buffer_head *); +extern void submit_bio(int, struct bio *); extern int is_read_only(kdev_t); extern void __brelse(struct buffer_head *); static inline void brelse(struct buffer_head *buf) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/highmem.h linux/include/linux/highmem.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/highmem.h Sat Apr 28 00:48:31 2001 +++ linux/include/linux/highmem.h Mon May 21 23:26:38 2001 @@ -2,6 +2,7 @@ #define _LINUX_HIGHMEM_H #include +#include #include #ifdef CONFIG_HIGHMEM @@ -13,7 +14,7 @@ /* declarations for linux/mm/highmem.c */ FASTCALL(unsigned int nr_free_highpages(void)); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); +extern struct bio * create_bounce(request_queue_t *, struct bio * bio_orig); static inline char *bh_kmap(struct buffer_head *bh) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/iobuf.h linux/include/linux/iobuf.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/iobuf.h Sat Apr 28 00:48:51 2001 +++ linux/include/linux/iobuf.h Mon May 21 16:10:47 2001 @@ -26,7 +26,6 @@ #define KIO_MAX_ATOMIC_IO 512 /* in kb */ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) -#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) /* The main kiobuf struct used for all our IO! */ @@ -48,8 +47,6 @@ /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; - struct buffer_head * bh[KIO_MAX_SECTORS]; - unsigned long blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -82,5 +79,8 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, unsigned long b[], int size); + +/* fs/bio.c */ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block); #endif /* __LINUX_IOBUF_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/loop.h linux/include/linux/loop.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/loop.h Wed Mar 7 04:35:36 2001 +++ linux/include/linux/loop.h Mon May 21 22:58:16 2001 @@ -49,8 +49,8 @@ int old_gfp_mask; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; + struct bio *lo_bio; + struct bio *lo_biotail; int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; @@ -77,6 +77,7 @@ */ #define LO_FLAGS_DO_BMAP 1 #define LO_FLAGS_READ_ONLY 2 +#define LO_FLAGS_BH_REMAP 4 /* * Note that this structure gets the wrong offsets when directly used diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/slab.h linux/include/linux/slab.h --- /opt/kernel/linux-2.4.5-pre4/include/linux/slab.h Sat Apr 28 00:48:37 2001 +++ linux/include/linux/slab.h Mon May 21 16:03:07 2001 @@ -74,6 +74,8 @@ extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; +extern kmem_cache_t *bio_cachep; +extern kmem_cache_t *biovec_cachep; #endif /* __KERNEL__ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/kernel/linux-2.4.5-pre4/kernel/ksyms.c Sun May 20 14:05:17 2001 +++ linux/kernel/ksyms.c Mon May 21 23:04:55 2001 @@ -299,7 +299,6 @@ EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); -EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); EXPORT_SYMBOL(file_moveto); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/mm/highmem.c linux/mm/highmem.c --- /opt/kernel/linux-2.4.5-pre4/mm/highmem.c Fri Apr 27 23:23:25 2001 +++ linux/mm/highmem.c Mon May 21 02:48:28 2001 @@ -164,126 +164,96 @@ * This will be moved to the block layer in 2.5. */ -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_from_high_bio(struct bio *to, struct bio *from) { - struct page *p_from; char *vfrom; unsigned long flags; - p_from = from->b_page; - /* * Since this can be executed from IRQ context, reentrance * on the same CPU must be avoided: */ __save_flags(flags); __cli(); - vfrom = kmap_atomic(p_from, KM_BOUNCE_WRITE); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); + vfrom = kmap_atomic(bio_page(from), KM_BOUNCE_WRITE); + memcpy(bio_data(to), vfrom + bio_offset(from), bio_size(to)); kunmap_atomic(vfrom, KM_BOUNCE_WRITE); __restore_flags(flags); } -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) { - struct page *p_to; char *vto; unsigned long flags; - p_to = to->b_page; __save_flags(flags); __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); + vto = kmap_atomic(bio_page(to), KM_BOUNCE_READ); + memcpy(vto + bio_offset(to), bio_data(from), bio_size(to)); kunmap_atomic(vto, KM_BOUNCE_READ); __restore_flags(flags); } -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) +static inline void bounce_end_io (struct bio *bio) { - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + struct bio *bio_orig = bio->bi_private; - bh_orig->b_end_io(bh_orig, uptodate); - __free_page(bh->b_page); -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); + bio_orig->bi_end_io(bio_orig); + __free_page(bio_page(bio)); + bio_free(bio); } -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) +static void bounce_end_io_write (struct bio *bio) { - bounce_end_io(bh, uptodate); + bounce_end_io(bio); } -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) +static void bounce_end_io_read (struct bio *bio) { - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + struct bio *bio_orig = bio->bi_private; + + /* + * was this particular page out-of-reach originally? + */ + if (bio->bi_flags & BIO_UPTODATE) + copy_to_high_bio_irq(bio_orig, bio); - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); + bounce_end_io(bio); } -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) +struct bio *create_bounce(request_queue_t *q, struct bio *bio_orig) { struct page *page; - struct buffer_head *bh; + struct bio *bio; - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; + if (bio_page(bio_orig) < q->bounce_limit) + return bio_orig; -repeat_bh: - bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER); - if (!bh) { - wakeup_bdflush(1); /* Sets task->state to TASK_RUNNING */ - goto repeat_bh; - } - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ -repeat_page: - page = alloc_page(GFP_BUFFER); - if (!page) { - wakeup_bdflush(1); /* Sets task->state to TASK_RUNNING */ - goto repeat_page; - } - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); + bio = bio_alloc(GFP_BUFFER, 1); + + do { + page = alloc_page(GFP_BUFFER); + if (page) + break; + + wakeup_bdflush(1); /* Sets task->state to TASK_RUNNING */ + } while (1); + + bio->bi_sector = bio_orig->bi_sector; + bio->bi_dev = bio_orig->bi_dev; + bio->bi_private = bio_orig; + bio->bi_flags = bio_orig->bi_flags; + + bio->bi_io_vec.bv_page = page; + bio->bi_io_vec.bv_len = bio_size(bio_orig); + bio->bi_io_vec.bv_offset = bio_offset(bio_orig); + + if (bio->bi_flags & WRITE) { + bio->bi_end_io = bounce_end_io_write; + copy_from_high_bio(bio, bio_orig); } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif + bio->bi_end_io = bounce_end_io_read; - return bh; + return bio; }