diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss.c linux/drivers/block/cciss.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss.c	Sun May 20 14:05:11 2001
+++ linux/drivers/block/cciss.c	Mon May 21 23:55:42 2001
@@ -1061,17 +1061,18 @@
 	}
 }
 
-static inline void complete_buffers( struct buffer_head *bh, int status)
+static inline void complete_buffers( struct bio *bio, int status)
 {
-	struct buffer_head *xbh;
+	struct bio *xbh;
 	
-	while(bh)
+	while(bio)
 	{
-		xbh = bh->b_reqnext; 
-		bh->b_reqnext = NULL; 
-		blk_finished_io(bh->b_size >> 9);
-		bh->b_end_io(bh, status);
-		bh = xbh;
+		xbh = bio->bi_next; 
+		bio->bi_next = NULL; 
+		blk_finished_io(bio_sectors(bio));
+		bio->bi_flags |= !!status;
+		bio->bi_end_io(bio);
+		bio = xbh;
 	}
 } 
 /* checks the status of the job and calls complete buffers to mark all 
@@ -1156,49 +1157,7 @@
 				status=0;
 		}
 	}
-	complete_buffers(cmd->bh, status);
-}
-
-
-static inline int cpq_new_segment(request_queue_t *q, struct request *rq,
-                                  int max_segments)
-{
-        if (rq->nr_segments < MAXSGENTRIES) {
-                rq->nr_segments++;
-                return 1;
-        }
-        return 0;
-}
-
-static int cpq_back_merge_fn(request_queue_t *q, struct request *rq,
-                             struct buffer_head *bh, int max_segments)
-{
-        if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data)
-                return 1;
-        return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_front_merge_fn(request_queue_t *q, struct request *rq,
-                             struct buffer_head *bh, int max_segments)
-{
-        if (bh->b_data + bh->b_size == rq->bh->b_data)
-                return 1;
-        return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq,
-                                 struct request *nxt, int max_segments)
-{
-        int total_segments = rq->nr_segments + nxt->nr_segments;
-
-        if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data)
-                total_segments--;
-
-        if (total_segments > MAXSGENTRIES)
-                return 0;
-
-        rq->nr_segments = total_segments;
-        return 1;
+	complete_buffers(cmd->bio, status);
 }
 
 /* 
@@ -1213,15 +1172,20 @@
 	CommandList_struct *c;
 	int log_unit, start_blk, seg, sect;
 	char *lastdataend;
-	struct buffer_head *bh;
+	struct bio *bio;
 	struct list_head *queue_head = &q->queue_head;
 	struct request *creq;
 	u64bit temp64;
 
-    // Loop till the queue is empty if or it is plugged
+	if (blk_queue_plugged(q)) {
+		start_io(h);
+		return;
+	}
+
+    // Loop till the queue is empty
     while (1)
     {
-	if (q->plugged || list_empty(queue_head)) {
+	if (list_empty(queue_head)) {
                 start_io(h);
                 return;
         }
@@ -1235,7 +1199,7 @@
                 printk(KERN_WARNING "doreq cmd for %d, %x at %p\n",
                                 h->ctlr, creq->rq_dev, creq);
                 blkdev_dequeue_request(creq);
-                complete_buffers(creq->bh, 0);
+                complete_buffers(creq->bio, 0);
                 start_io(h);
                 return;
         }
@@ -1246,7 +1210,7 @@
 		return;
 	}
 	c->cmd_type = CMD_RWREQ;      
-	bh = c->bh = creq->bh;
+	bio = c->bio = creq->bio;
 	
 	/* fill in the request */ 
 	log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; 
@@ -1263,34 +1227,34 @@
 	c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE;
 	start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector;
 #ifdef CCISS_DEBUG
-	if (bh == NULL)
-		panic("cciss: bh== NULL?");
+	if (bio == NULL)
+		panic("cciss: bio== NULL?");
 	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector,
 		(int) creq->nr_sectors);	
 #endif /* CCISS_DEBUG */
 	seg = 0; 
 	lastdataend = NULL;
 	sect = 0;
-	while(bh)
+	while(bio)
 	{
-		sect += bh->b_size/512;
-		if (bh->b_data == lastdataend)
+		sect += bio_sectors(bio);
+		if (bio_data(bio) == lastdataend)
 		{  // tack it on to the last segment 
-			c->SG[seg-1].Len +=bh->b_size;
-			lastdataend += bh->b_size;
+			c->SG[seg-1].Len += bio_size(bio);
+			lastdataend += bio_size(bio);
 		} else
 		{
 			if (seg == MAXSGENTRIES)
 				BUG();
-			c->SG[seg].Len = bh->b_size;
-			temp64.val = (__u64) virt_to_bus(bh->b_data);
+			c->SG[seg].Len = bio_size(bio);
+			temp64.val = (__u64) virt_to_bus(bio_data(bio));
 			c->SG[seg].Addr.lower = temp64.val32.lower;
 			c->SG[seg].Addr.upper = temp64.val32.upper;
 			c->SG[0].Ext = 0;  // we are not chaining
-			lastdataend = bh->b_data + bh->b_size;
+			lastdataend = bio_data(bio) + bio_size(bio);
 			seg++;
 		}
-		bh = bh->b_reqnext;
+		bio = bio->bi_next;
 	}
 	/* track how many SG entries we are using */ 
 	if( seg > h->maxSG)
@@ -1380,10 +1344,11 @@
 			}
 		}
 	}
+
 	/*
 	 * See if we can queue up some more IO
 	 */
-	do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
+	blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
 	spin_unlock_irqrestore(&io_request_lock, flags);
 }
 /* 
@@ -1876,17 +1841,12 @@
                 q->queuedata = hba[i];
                 blk_init_queue(q, do_cciss_request);
                 blk_queue_headactive(q, 0);		
+		q->max_segments = MAXSGENTRIES;
 
 		/* fill in the other Kernel structs */
 		blksize_size[MAJOR_NR+i] = hba[i]->blocksizes;
                 hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes;
                 read_ahead[MAJOR_NR+i] = READ_AHEAD;
-
-		/* Set the pointers to queue functions */ 
-		q->back_merge_fn = cpq_back_merge_fn;
-                q->front_merge_fn = cpq_front_merge_fn;
-                q->merge_requests_fn = cpq_merge_requests_fn;
-
 
 		/* Fill in the gendisk data */ 	
 		hba[i]->gendisk.major = MAJOR_NR + i;
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/cciss_cmd.h	Sun May 20 14:05:11 2001
+++ linux/drivers/block/cciss_cmd.h	Wed May 16 19:02:03 2001
@@ -227,7 +227,7 @@
   int			   cmd_type; 
   struct _CommandList_struct *prev;
   struct _CommandList_struct *next;
-  struct buffer_head *	   bh;
+  struct bio *		   bio;
 } CommandList_struct;
 
 //Configuration Table Structure
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/cpqarray.c	Sun May 20 14:05:11 2001
+++ linux/drivers/block/cpqarray.c	Mon May 21 23:54:54 2001
@@ -145,7 +145,7 @@
 
 static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c);
 static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c);
-static inline void complete_buffers(struct buffer_head *bh, int ok);
+static inline void complete_buffers(struct bio *bio, int ok);
 static inline void complete_command(cmdlist_t *cmd, int timeout);
 
 static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs);
@@ -346,47 +346,6 @@
 }
 #endif /* MODULE */
 
-static inline int cpq_new_segment(request_queue_t *q, struct request *rq,
-				  int max_segments)
-{
-	if (rq->nr_segments < SG_MAX) {
-		rq->nr_segments++;
-		return 1;
-	}
-	return 0;
-}
-
-static int cpq_back_merge_fn(request_queue_t *q, struct request *rq,
-			     struct buffer_head *bh, int max_segments)
-{
-	if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data)
-		return 1;
-	return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_front_merge_fn(request_queue_t *q, struct request *rq,
-			     struct buffer_head *bh, int max_segments)
-{
-	if (bh->b_data + bh->b_size == rq->bh->b_data)
-		return 1;
-	return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq,
-				 struct request *nxt, int max_segments)
-{
-	int total_segments = rq->nr_segments + nxt->nr_segments;
-
-	if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data)
-		total_segments--;
-
-	if (total_segments > SG_MAX)
-		return 0;
-
-	rq->nr_segments = total_segments;
-	return 1;
-}
-
 /*
  *  This is it.  Find all the controllers and register them.  I really hate
  *  stealing all these major device numbers.
@@ -521,14 +480,11 @@
 		q->queuedata = hba[i];
 		blk_init_queue(q, do_ida_request);
 		blk_queue_headactive(q, 0);
+		q->max_segments = SG_MAX;
 		blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256);
 		hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256);
 		read_ahead[MAJOR_NR+i] = READ_AHEAD;
 
-		q->back_merge_fn = cpq_back_merge_fn;
-		q->front_merge_fn = cpq_front_merge_fn;
-		q->merge_requests_fn = cpq_merge_requests_fn;
-
 		ida_gendisk[i].major = MAJOR_NR + i;
 		ida_gendisk[i].major_name = "ida";
 		ida_gendisk[i].minor_shift = NWD_SHIFT;
@@ -905,13 +861,18 @@
 	int seg, sect;
 	char *lastdataend;
 	struct list_head * queue_head = &q->queue_head;
-	struct buffer_head *bh;
+	struct bio *bio;
 	struct request *creq;
 
-// Loop till the queue is empty if or it is plugged 
+	if (blk_queue_plugged(q)) {
+		start_io(h);
+		return;
+	}
+
+// Loop till the queue is empty
    while (1)
 {
-	if (q->plugged || list_empty(queue_head)) {
+	if (list_empty(queue_head)) {
 		start_io(h);
 		return;
 	}
@@ -925,7 +886,7 @@
 		printk(KERN_WARNING "doreq cmd for %d, %x at %p\n",
 				h->ctlr, creq->rq_dev, creq);
 		blkdev_dequeue_request(creq);
-		complete_buffers(creq->bh, 0);
+		complete_buffers(creq->bio, 0);
 		start_io(h);
                 return;
 	}
@@ -936,7 +897,7 @@
                 return;
         }
 
-	bh = creq->bh;
+	bio = creq->bio;
 
 	c->ctlr = h->ctlr;
 	c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT;
@@ -944,29 +905,29 @@
 	c->size += sizeof(rblk_t);
 
 	c->req.hdr.blk = ida[(h->ctlr<<CTLR_SHIFT) + MINOR(creq->rq_dev)].start_sect + creq->sector;
-	c->bh = bh;
+	c->bio = bio;
 DBGPX(
-	if (bh == NULL)
+	if (bio == NULL)
 		panic("bh == NULL?");
 	
 	printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
 );
 	seg = 0; lastdataend = NULL;
 	sect = 0;
-	while(bh) {
-		sect += bh->b_size/512;
-		if (bh->b_data == lastdataend) {
-			c->req.sg[seg-1].size += bh->b_size;
-			lastdataend += bh->b_size;
+	while(bio) {
+		sect += bio_sectors(bio);
+		if (bio_data(bio) == lastdataend) {
+			c->req.sg[seg-1].size += bio_size(bio);
+			lastdataend += bio_size(bio);
 		} else {
 			if (seg == SG_MAX)
 				BUG();
-			c->req.sg[seg].size = bh->b_size;
-			c->req.sg[seg].addr = (__u32)virt_to_bus(bh->b_data);
-			lastdataend = bh->b_data + bh->b_size;
+			c->req.sg[seg].size = bio_size(bio);
+			c->req.sg[seg].addr = (__u32)virt_to_bus(bio_data(bio));
+			lastdataend = bio_data(bio) + bio_size(bio);
 			seg++;
 		}
-		bh = bh->b_reqnext;
+		bio = bio->bi_next;
 	}
 DBGPX(	printk("Submitting %d sectors in %d segments\n", sect, seg); );
 	c->req.hdr.sg_cnt = seg;
@@ -1028,17 +989,18 @@
 	}
 }
 
-static inline void complete_buffers(struct buffer_head *bh, int ok)
+static inline void complete_buffers(struct bio *bio, int ok)
 {
-	struct buffer_head *xbh;
-	while(bh) {
-		xbh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
+	struct bio *xbh;
+	while(bio) {
+		xbh = bio->bi_next;
+		bio->bi_next = NULL;
 		
-		blk_finished_io(bh->b_size >> 9);
-		bh->b_end_io(bh, ok);
+		blk_finished_io(bio_sectors(bio));
+		bio->bi_flags |= !!ok;
+		bio->bi_end_io(bio);
 
-		bh = xbh;
+		bio = xbh;
 	}
 }
 /*
@@ -1067,7 +1029,7 @@
 		ok = 0;	
 	}
 	if (timeout) ok = 0;
-	complete_buffers(cmd->bh, ok);
+	complete_buffers(cmd->bio, ok);
 }
 
 /*
@@ -1126,7 +1088,7 @@
 	/*
 	 * See if we can queue up some more IO
 	 */
-	do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
+	blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
 	spin_unlock_irqrestore(&io_request_lock, flags);
 }
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/elevator.c linux/drivers/block/elevator.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/elevator.c	Fri Feb 16 01:58:34 2001
+++ linux/drivers/block/elevator.c	Mon May 21 22:58:16 2001
@@ -28,13 +28,13 @@
 #include <asm/uaccess.h>
 
 /*
- * This is a bit tricky. It's given that bh and rq are for the same
+ * This is a bit tricky. It's given that bio and rq are for the same
  * device, but the next request might of course not be. Run through
  * the tests below to check if we want to insert here if we can't merge
- * bh into an existing request
+ * bio into an existing request
  */
-inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq,
-			    struct list_head *head)
+inline int bio_rq_in_between(struct bio *bio, struct request *rq,
+			     struct list_head *head)
 {
 	struct list_head *next;
 	struct request *next_rq;
@@ -45,21 +45,21 @@
 
 	/*
 	 * if the device is different (usually on a different partition),
-	 * just check if bh is after rq
+	 * just check if bio is after rq
 	 */
 	next_rq = blkdev_entry_to_request(next);
 	if (next_rq->rq_dev != rq->rq_dev)
-		return bh->b_rsector > rq->sector;
+		return bio->bi_sector > rq->sector;
 
 	/*
-	 * ok, rq, next_rq and bh are on the same device. if bh is in between
+	 * ok, rq, next_rq and bio are on the same device. if bio is in between
 	 * the two, this is the sweet spot
 	 */
-	if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector)
+	if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector)
 		return 1;
 
 	/*
-	 * next_rq is ordered wrt rq, but bh is not in between the two
+	 * next_rq is ordered wrt rq, but bio is not in between the two
 	 */
 	if (next_rq->sector > rq->sector)
 		return 0;
@@ -68,23 +68,43 @@
 	 * next_rq and rq not ordered, if we happen to be either before
 	 * next_rq or after rq insert here anyway
 	 */
-	if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector)
+	if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector)
 		return 1;
 
 	return 0;
 }
 
-
 int elevator_linus_merge(request_queue_t *q, struct request **req,
 			 struct list_head * head,
-			 struct buffer_head *bh, int rw,
-			 int max_sectors)
+			 struct bio *bio, int rw)
 {
 	struct list_head *entry = &q->queue_head;
-	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+	unsigned int count = bio_sectors(bio), ret = ELEVATOR_NO_MERGE;
+	elevator_t *e = &q->elevator;
+	struct request *__rq;
+
+	/*
+	 * first check it we can merge with the last inserted request.
+	 * this cuts down queue scans considerably, and removes the biggest
+	 * reason to pass bigger/more bios in trough ->make_request
+	 */
+	if ((__rq = e->last_merge)) {
+		if (&__rq->queue == head || __rq->cmd != rw
+		    || __rq->rq_dev != bio->bi_dev
+		    || __rq->nr_sectors + count > q->max_sectors)
+			*req = NULL;
+		else if (__rq->sector + __rq->nr_sectors == bio->bi_sector) {
+			*req = __rq;
+			return ELEVATOR_BACK_MERGE;
+		} else if (__rq->sector - count == bio->bi_sector) {
+			__rq->elevator_sequence -= count;
+			*req = __rq;
+			return ELEVATOR_FRONT_MERGE;
+		}
+	}
 
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = blkdev_entry_to_request(entry);
+		__rq = blkdev_entry_to_request(entry);
 
 		/*
 		 * simply "aging" of requests in queue
@@ -94,24 +114,24 @@
 
 		if (__rq->sem)
 			continue;
-		if (__rq->rq_dev != bh->b_rdev)
+		if (__rq->rq_dev != bio->bi_dev)
 			continue;
-		if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head))
+		if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head))
 			*req = __rq;
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > max_sectors)
+		if (__rq->nr_sectors + count > q->max_sectors)
 			continue;
 		if (__rq->elevator_sequence < count)
 			break;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+		if (__rq->sector + __rq->nr_sectors == bio->bi_sector) {
 			ret = ELEVATOR_BACK_MERGE;
-			*req = __rq;
+			e->last_merge = *req = __rq;
 			break;
-		} else if (__rq->sector - count == bh->b_rsector) {
+		} else if (__rq->sector - count == bio->bi_sector) {
 			ret = ELEVATOR_FRONT_MERGE;
 			__rq->elevator_sequence -= count;
-			*req = __rq;
+			e->last_merge = *req = __rq;
 			break;
 		}
 	}
@@ -143,11 +163,10 @@
  */
 int elevator_noop_merge(request_queue_t *q, struct request **req,
 			struct list_head * head,
-			struct buffer_head *bh, int rw,
-			int max_sectors)
+			struct bio *bio, int rw)
 {
 	struct list_head *entry;
-	unsigned int count = bh->b_size >> 9;
+	unsigned int count = bio_sectors(bio);
 
 	if (list_empty(&q->queue_head))
 		return ELEVATOR_NO_MERGE;
@@ -158,16 +177,16 @@
 
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->rq_dev != bh->b_rdev)
+		if (__rq->rq_dev != bio->bi_dev)
 			continue;
-		if (__rq->nr_sectors + count > max_sectors)
+		if (__rq->nr_sectors + count > q->max_sectors)
 			continue;
 		if (__rq->sem)
 			continue;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+		if (__rq->sector + __rq->nr_sectors == bio->bi_sector) {
 			*req = __rq;
 			return ELEVATOR_BACK_MERGE;
-		} else if (__rq->sector - count == bh->b_rsector) {
+		} else if (__rq->sector - count == bio->bi_sector) {
 			*req = __rq;
 			return ELEVATOR_FRONT_MERGE;
 		}
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/floppy.c linux/drivers/block/floppy.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/floppy.c	Fri Feb  9 20:30:22 2001
+++ linux/drivers/block/floppy.c	Wed May 16 14:00:53 2001
@@ -570,7 +570,7 @@
 static struct floppy_struct *_floppy = floppy_type;
 static unsigned char current_drive;
 static long current_count_sectors;
-static unsigned char sector_t; /* sector in track */
+static unsigned char fsector_t; /* sector in track */
 static unsigned char in_sector_offset;	/* offset within physical sector,
 					 * expressed in units of 512 bytes */
 
@@ -2382,7 +2382,7 @@
 		printk("rt=%d t=%d\n", R_TRACK, TRACK);
 		printk("heads=%d eoc=%d\n", heads, eoc);
 		printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK,
-		       sector_t, ssize);
+		       fsector_t, ssize);
 		printk("in_sector_offset=%d\n", in_sector_offset);
 	}
 #endif
@@ -2429,7 +2429,7 @@
 	} else if (CT(COMMAND) == FD_READ){
 		buffer_track = raw_cmd->track;
 		buffer_drive = current_drive;
-		INFBOUND(buffer_max, nr_sectors + sector_t);
+		INFBOUND(buffer_max, nr_sectors + fsector_t);
 	}
 	cont->redo();
 }
@@ -2437,19 +2437,19 @@
 /* Compute maximal contiguous buffer size. */
 static int buffer_chain_size(void)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 	int size;
 	char *base;
 
 	base = CURRENT->buffer;
 	size = CURRENT->current_nr_sectors << 9;
-	bh = CURRENT->bh;
+	bio = CURRENT->bio;
 
-	if (bh){
-		bh = bh->b_reqnext;
-		while (bh && bh->b_data == base + size){
-			size += bh->b_size;
-			bh = bh->b_reqnext;
+	if (bio){
+		bio = bio->bi_next;
+		while (bio && bio_data(bio) == base + size){
+			size += bio_size(bio);
+			bio = bio->bi_next;
 		}
 	}
 	return size >> 9;
@@ -2458,13 +2458,13 @@
 /* Compute the maximal transfer size */
 static int transfer_size(int ssize, int max_sector, int max_size)
 {
-	SUPBOUND(max_sector, sector_t + max_size);
+	SUPBOUND(max_sector, fsector_t + max_size);
 
 	/* alignment */
 	max_sector -= (max_sector % _floppy->sect) % ssize;
 
 	/* transfer size, beginning not aligned */
-	current_count_sectors = max_sector - sector_t ;
+	current_count_sectors = max_sector - fsector_t ;
 
 	return max_sector;
 }
@@ -2475,7 +2475,7 @@
 static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 {
 	int remaining; /* number of transferred 512-byte sectors */
-	struct buffer_head *bh;
+	struct bio *bio;
 	char *buffer, *dma_buffer;
 	int size;
 
@@ -2484,8 +2484,8 @@
 				   CURRENT->nr_sectors);
 
 	if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
-	    buffer_max > sector_t + CURRENT->nr_sectors)
-		current_count_sectors = minimum(buffer_max - sector_t,
+	    buffer_max > fsector_t + CURRENT->nr_sectors)
+		current_count_sectors = minimum(buffer_max - fsector_t,
 						CURRENT->nr_sectors);
 
 	remaining = current_count_sectors << 9;
@@ -2505,9 +2505,9 @@
 
 	buffer_max = maximum(max_sector, buffer_max);
 
-	dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9);
+	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
 
-	bh = CURRENT->bh;
+	bio = CURRENT->bio;
 	size = CURRENT->current_nr_sectors << 9;
 	buffer = CURRENT->buffer;
 
@@ -2519,8 +2519,8 @@
 		    dma_buffer < floppy_track_buffer){
 			DPRINT("buffer overrun in copy buffer %d\n",
 				(int) ((floppy_track_buffer - dma_buffer) >>9));
-			printk("sector_t=%d buffer_min=%d\n",
-			       sector_t, buffer_min);
+			printk("fsector_t=%d buffer_min=%d\n",
+			       fsector_t, buffer_min);
 			printk("current_count_sectors=%ld\n",
 			       current_count_sectors);
 			if (CT(COMMAND) == FD_READ)
@@ -2541,15 +2541,15 @@
 			break;
 
 		dma_buffer += size;
-		bh = bh->b_reqnext;
+		bio = bio->bi_next;
 #ifdef FLOPPY_SANITY_CHECK
-		if (!bh){
+		if (!bio){
 			DPRINT("bh=null in copy buffer after copy\n");
 			break;
 		}
 #endif
-		size = bh->b_size;
-		buffer = bh->b_data;
+		size = bio_size(bio);
+		buffer = bio_data(bio);
 	}
 #ifdef FLOPPY_SANITY_CHECK
 	if (remaining){
@@ -2641,7 +2641,7 @@
 	max_sector = _floppy->sect * _floppy->head;
 
 	TRACK = CURRENT->sector / max_sector;
-	sector_t = CURRENT->sector % max_sector;
+	fsector_t = CURRENT->sector % max_sector;
 	if (_floppy->track && TRACK >= _floppy->track) {
 		if (CURRENT->current_nr_sectors & 1) {
 			current_count_sectors = 1;
@@ -2649,17 +2649,17 @@
 		} else
 			return 0;
 	}
-	HEAD = sector_t / _floppy->sect;
+	HEAD = fsector_t / _floppy->sect;
 
 	if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) &&
-	    sector_t < _floppy->sect)
+	    fsector_t < _floppy->sect)
 		max_sector = _floppy->sect;
 
 	/* 2M disks have phantom sectors on the first track */
 	if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){
 		max_sector = 2 * _floppy->sect / 3;
-		if (sector_t >= max_sector){
-			current_count_sectors = minimum(_floppy->sect - sector_t,
+		if (fsector_t >= max_sector){
+			current_count_sectors = minimum(_floppy->sect - fsector_t,
 							CURRENT->nr_sectors);
 			return 1;
 		}
@@ -2681,7 +2681,7 @@
 	GAP = _floppy->gap;
 	CODE2SIZE;
 	SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
-	SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1;
+	SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1;
 
 	/* tracksize describes the size which can be filled up with sectors
 	 * of size ssize.
@@ -2689,11 +2689,11 @@
 	tracksize = _floppy->sect - _floppy->sect % ssize;
 	if (tracksize < _floppy->sect){
 		SECT_PER_TRACK ++;
-		if (tracksize <= sector_t % _floppy->sect)
+		if (tracksize <= fsector_t % _floppy->sect)
 			SECTOR--;
 
 		/* if we are beyond tracksize, fill up using smaller sectors */
-		while (tracksize <= sector_t % _floppy->sect){
+		while (tracksize <= fsector_t % _floppy->sect){
 			while(tracksize + ssize > _floppy->sect){
 				SIZECODE--;
 				ssize >>= 1;
@@ -2709,12 +2709,12 @@
 		max_sector = _floppy->sect;
 	}
 
-	in_sector_offset = (sector_t % _floppy->sect) % ssize;
-	aligned_sector_t = sector_t - in_sector_offset;
+	in_sector_offset = (fsector_t % _floppy->sect) % ssize;
+	aligned_sector_t = fsector_t - in_sector_offset;
 	max_size = CURRENT->nr_sectors;
 	if ((raw_cmd->track == buffer_track) && 
 	    (current_drive == buffer_drive) &&
-	    (sector_t >= buffer_min) && (sector_t < buffer_max)) {
+	    (fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
 		/* data already in track buffer */
 		if (CT(COMMAND) == FD_READ) {
 			copy_buffer(1, max_sector, buffer_max);
@@ -2722,8 +2722,8 @@
 		}
 	} else if (in_sector_offset || CURRENT->nr_sectors < ssize){
 		if (CT(COMMAND) == FD_WRITE){
-			if (sector_t + CURRENT->nr_sectors > ssize &&
-			    sector_t + CURRENT->nr_sectors < ssize + ssize)
+			if (fsector_t + CURRENT->nr_sectors > ssize &&
+			    fsector_t + CURRENT->nr_sectors < ssize + ssize)
 				max_size = ssize + ssize;
 			else
 				max_size = ssize;
@@ -2736,7 +2736,7 @@
 		int direct, indirect;
 
 		indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) -
-			sector_t;
+			fsector_t;
 
 		/*
 		 * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide
@@ -2751,7 +2751,7 @@
 		if (CROSS_64KB(CURRENT->buffer, max_size << 9))
 			max_size = (K_64 - 
 				    ((unsigned long)CURRENT->buffer) % K_64)>>9;
-		direct = transfer_size(ssize,max_sector,max_size) - sector_t;
+		direct = transfer_size(ssize,max_sector,max_size) - fsector_t;
 		/*
 		 * We try to read tracks, but if we get too many errors, we
 		 * go back to reading just one sector at a time.
@@ -2770,8 +2770,8 @@
 			raw_cmd->length = current_count_sectors << 9;
 			if (raw_cmd->length == 0){
 				DPRINT("zero dma transfer attempted from make_raw_request\n");
-				DPRINT("indirect=%d direct=%d sector_t=%d",
-					indirect, direct, sector_t);
+				DPRINT("indirect=%d direct=%d fsector_t=%d",
+					indirect, direct, fsector_t);
 				return 0;
 			}
 /*			check_dma_crossing(raw_cmd->kernel_data, 
@@ -2789,12 +2789,12 @@
 	/* claim buffer track if needed */
 	if (buffer_track != raw_cmd->track ||  /* bad track */
 	    buffer_drive !=current_drive || /* bad drive */
-	    sector_t > buffer_max ||
-	    sector_t < buffer_min ||
+	    fsector_t > buffer_max ||
+	    fsector_t < buffer_min ||
 	    ((CT(COMMAND) == FD_READ ||
 	      (!in_sector_offset && CURRENT->nr_sectors >= ssize))&&
 	     max_sector > 2 * max_buffer_sectors + buffer_min &&
-	     max_size + sector_t > 2 * max_buffer_sectors + buffer_min)
+	     max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)
 	    /* not enough space */){
 		buffer_track = -1;
 		buffer_drive = current_drive;
@@ -2841,7 +2841,7 @@
 				       floppy_track_buffer) >> 9),
 			       current_count_sectors);
 		printk("st=%d ast=%d mse=%d msi=%d\n",
-		       sector_t, aligned_sector_t, max_sector, max_size);
+		       fsector_t, aligned_sector_t, max_sector, max_size);
 		printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE);
 		printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n",
 		       COMMAND, SECTOR, HEAD, TRACK);
@@ -2859,8 +2859,8 @@
 		    raw_cmd->kernel_data + raw_cmd->length >
 		    floppy_track_buffer + (max_buffer_sectors  << 10)){
 			DPRINT("buffer overrun in schedule dma\n");
-			printk("sector_t=%d buffer_min=%d current_count=%ld\n",
-			       sector_t, buffer_min,
+			printk("fsector_t=%d buffer_min=%d current_count=%ld\n",
+			       fsector_t, buffer_min,
 			       raw_cmd->length >> 9);
 			printk("current_count_sectors=%ld\n",
 			       current_count_sectors);
@@ -2913,8 +2913,6 @@
 		}
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic(DEVICE_NAME ": request list destroyed");
-		if (CURRENT->bh && !buffer_locked(CURRENT->bh))
-			panic(DEVICE_NAME ": block not locked");
 
 		device = CURRENT->rq_dev;
 		set_fdc(DRIVE(device));
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/ida_cmd.h linux/drivers/block/ida_cmd.h
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/ida_cmd.h	Mon Dec 11 21:50:39 2000
+++ linux/drivers/block/ida_cmd.h	Wed May 16 18:54:09 2001
@@ -96,7 +96,7 @@
 	int	ctlr;
 	struct cmdlist *prev;
 	struct cmdlist *next;
-	struct buffer_head *bh;
+	struct bio *bio;
 	int type;
 } cmdlist_t;
 	
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/ll_rw_blk.c	Thu Apr 12 21:15:52 2001
+++ linux/drivers/block/ll_rw_blk.c	Mon May 21 23:26:20 2001
@@ -6,6 +6,7 @@
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 
 /*
@@ -22,6 +23,7 @@
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
+#include <linux/bootmem.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -113,11 +115,6 @@
 int * max_readahead[MAX_BLKDEV];
 
 /*
- * Max number of sectors per request
- */
-int * max_sectors[MAX_BLKDEV];
-
-/*
  * queued sectors for all devices, used to make sure we don't fill all
  * of memory with locked buffers
  */
@@ -130,14 +127,18 @@
 static int batch_requests, queue_nr_requests;
 static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait);
 
-static inline int get_max_sectors(kdev_t dev)
-{
-	if (!max_sectors[MAJOR(dev)])
-		return MAX_SECTORS;
-	return max_sectors[MAJOR(dev)][MINOR(dev)];
-}
-
-inline request_queue_t *__blk_get_queue(kdev_t dev)
+/**
+ * blk_get_queue: - return the queue that matches the given device
+ * @dev:    device
+ *
+ * Description:
+ *     Given a specific device, return the queue that will hold I/O
+ *     for it. This is either a &struct blk_dev_struct lookup and a
+ *     call to the ->queue() function defined, or the default queue
+ *     stored in the same location.
+ *
+ **/
+inline request_queue_t *blk_get_queue(kdev_t dev)
 {
 	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 
@@ -147,22 +148,6 @@
 		return &blk_dev[MAJOR(dev)].request_queue;
 }
 
-/*
- * NOTE: the device-specific queue() functions
- * have to be atomic!
- */
-request_queue_t *blk_get_queue(kdev_t dev)
-{
-	request_queue_t *ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&io_request_lock,flags);
-	ret = __blk_get_queue(dev);
-	spin_unlock_irqrestore(&io_request_lock,flags);
-
-	return ret;
-}
-
 static int __blk_cleanup_queue(struct list_head *head)
 {
 	struct request *rq;
@@ -233,10 +218,9 @@
  *
  *    When a queue is plugged the head will be assumed to be inactive.
  **/
- 
 void blk_queue_headactive(request_queue_t * q, int active)
 {
-	q->head_active = active;
+	set_bit(QUEUE_FLAG_HEADACTIVE, &q->queue_flags);
 }
 
 /**
@@ -261,15 +245,52 @@
  *    a kernel mapping, to by calling create_bounce() to create a
  *    buffer in normal memory.
  **/
-
 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 {
+	q->max_segments = MAX_SEGMENTS;
+	q->max_sectors = MAX_SECTORS;
 	q->make_request_fn = mfn;
 }
 
-static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+/**
+ * blk_queue_bounce_limit - set bounce buffer limit for queue
+ * @q:  the request queue for the device
+ * @page:  highest page we can do I/O to
+ *
+ * Description:
+ *    Different hardware can have different requirements as to what pages
+ *    it can do I/O directly to. A low level driver can call
+ *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
+ *    buffers for doing I/O to pages residing above @page. By default
+ *    the block layer sets this to the highest numbered "low" memory page, ie
+ *    one the driver can still call bio_page() and get a valid address on.
+ **/
+void blk_queue_bounce_limit(request_queue_t *q, struct page *page)
+{
+	q->bounce_limit = page;
+}
+
+/**
+ * blk_queue_max_setors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
+ * @max_sectors:  max sectors in the usual 512b unit
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the size of
+ *    received requests.
+ **/
+void blk_queue_max_sectors(request_queue_t *q, int max_sectors)
+{
+	q->max_sectors = max_sectors;
+}
+
+/*
+ * the standard queue merge functions, can be overridden with device
+ * specific ones if so desired
+ */
+static inline int ll_new_segment(request_queue_t *q, struct request *req)
 {
-	if (req->nr_segments < max_segments) {
+	if (req->nr_segments < q->max_segments) {
 		req->nr_segments++;
 		return 1;
 	}
@@ -277,36 +298,57 @@
 }
 
 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
-			    struct buffer_head *bh, int max_segments)
+			    struct bio *bio)
 {
-	if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+	if (BIO_CONTIG(req->biotail, bio))
 		return 1;
-	return ll_new_segment(q, req, max_segments);
+
+	return ll_new_segment(q, req);
 }
 
 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
-			     struct buffer_head *bh, int max_segments)
+			     struct bio *bio)
 {
-	if (bh->b_data + bh->b_size == req->bh->b_data)
+	if (BIO_CONTIG(bio, req->bio))
 		return 1;
-	return ll_new_segment(q, req, max_segments);
+
+	return ll_new_segment(q, req);
 }
 
 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
-				struct request *next, int max_segments)
+				struct request *next)
 {
 	int total_segments = req->nr_segments + next->nr_segments;
 
-	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+	if (BIO_CONTIG(req->biotail, next->bio))
 		total_segments--;
     
-	if (total_segments > max_segments)
+	if (total_segments > q->max_segments)
 		return 0;
 
 	req->nr_segments = total_segments;
 	return 1;
 }
 
+/**
+ * blk_wake_queue - restart a queue that wasn't fully emptied at request_fn time
+ * @q:    The &request_queue_t in question
+ *
+ * Description:
+ *   Sometimes hardware can run out of resources, so no more commands can
+ *   be queued. If a driver breaks out of request_fn while there are still
+ *   requests left on there to be serviced, it will be left in a state where
+ *   it is still unplugged but not be recalled by the block layer. 
+ *   not be replugged, and thus request_fn will be run. Once a driver has
+ *   freed enough resources to start queueing new requests again, it must
+ *   call blk_wake_queue to start processing again.
+ **/
+void inline blk_wake_queue(request_queue_t *q)
+{
+	if (!blk_set_plugged(q))
+		queue_task(&q->plug_tq, &tq_disk);
+}
+
 /*
  * "plug" the device if there are no outstanding requests: this will
  * force the transfer to start only after we have put all the requests
@@ -315,16 +357,12 @@
  * This is called with interrupts off and no requests on the queue.
  * (and with the request spinlock acquired)
  */
-static void generic_plug_device(request_queue_t *q, kdev_t dev)
+static void blk_plug_device(request_queue_t *q)
 {
-	/*
-	 * no need to replug device
-	 */
-	if (!list_empty(&q->queue_head) || q->plugged)
+	if (!list_empty(&q->queue_head))
 		return;
 
-	q->plugged = 1;
-	queue_task(&q->plug_tq, &tq_disk);
+	blk_wake_queue(q);
 }
 
 /*
@@ -332,13 +370,22 @@
  */
 static inline void __generic_unplug_device(request_queue_t *q)
 {
-	if (q->plugged) {
-		q->plugged = 0;
-		if (!list_empty(&q->queue_head))
-			q->request_fn(q);
-	}
+	if (blk_set_unplugged(q) && !list_empty(&q->queue_head))
+		q->request_fn(q);
 }
 
+/**
+ * generic_unplug_device - fire a request queue
+ * @q:    The &request_queue_t in question
+ *
+ * Description:
+ *   Linux uses plugging to build bigger requests queues before letting
+ *   the device have at them. If a queue is plugged, the I/O scheduler
+ *   is still adding and merging requests on the queue. Once the queue
+ *   gets unplugged (either by manually calling this function, or by
+ *   running the tq_disk task queue), the request_fn defined for the
+ *   queue is invoked and transfers started.
+ **/
 void generic_unplug_device(void *data)
 {
 	request_queue_t *q = (request_queue_t *) data;
@@ -367,14 +414,18 @@
 		rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
 		memset(rq, 0, sizeof(struct request));
 		rq->rq_status = RQ_INACTIVE;
-		list_add(&rq->table, &q->request_freelist[i & 1]);
+		if (i < queue_nr_requests / 2)
+			list_add(&rq->table, &q->request_freelist[READ]);
+		else
+			list_add(&rq->table, &q->request_freelist[WRITE]);
 	}
 
-	init_waitqueue_head(&q->wait_for_request);
+	init_waitqueue_head(&q->wait_for_request[READ]);
+	init_waitqueue_head(&q->wait_for_request[WRITE]);
 	spin_lock_init(&q->queue_lock);
 }
 
-static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+static int __make_request(request_queue_t *, int, struct bio *);
 
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
@@ -418,19 +469,18 @@
 	q->back_merge_fn       	= ll_back_merge_fn;
 	q->front_merge_fn      	= ll_front_merge_fn;
 	q->merge_requests_fn	= ll_merge_requests_fn;
-	q->make_request_fn	= __make_request;
 	q->plug_tq.sync		= 0;
 	q->plug_tq.routine	= &generic_unplug_device;
 	q->plug_tq.data		= q;
-	q->plugged        	= 0;
+	blk_set_unplugged(q);
+	blk_mark_headactive(q);
+
 	/*
-	 * These booleans describe the queue properties.  We set the
-	 * default (and most common) values here.  Other drivers can
-	 * use the appropriate functions to alter the queue properties.
-	 * as appropriate.
+	 * by default assume old behaviour and bounce for any highmem page
 	 */
-	q->plug_device_fn 	= generic_plug_device;
-	q->head_active    	= 1;
+	blk_queue_bounce_limit(q, max_low_pfn + mem_map);
+
+	blk_queue_make_request(q, __make_request);
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
@@ -461,7 +511,7 @@
 	register struct request *rq;
 	DECLARE_WAITQUEUE(wait, current);
 
-	add_wait_queue_exclusive(&q->wait_for_request, &wait);
+	add_wait_queue_exclusive(&q->wait_for_request[rw], &wait);
 	for (;;) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
 		spin_lock_irq(&io_request_lock);
@@ -472,23 +522,11 @@
 		generic_unplug_device(q);
 		schedule();
 	}
-	remove_wait_queue(&q->wait_for_request, &wait);
+	remove_wait_queue(&q->wait_for_request[rw], &wait);
 	current->state = TASK_RUNNING;
 	return rq;
 }
 
-static inline struct request *get_request_wait(request_queue_t *q, int rw)
-{
-	register struct request *rq;
-
-	spin_lock_irq(&io_request_lock);
-	rq = get_request(q, rw);
-	spin_unlock_irq(&io_request_lock);
-	if (rq)
-		return rq;
-	return __get_request_wait(q, rw);
-}
-
 /* RO fail safe mechanism */
 
 static long ro_bits[MAX_BLKDEV][8];
@@ -546,9 +584,12 @@
 static inline void add_request(request_queue_t * q, struct request * req,
 			       struct list_head *insert_here)
 {
+	elevator_t *e = &q->elevator;
+
 	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 
-	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+	if (!blk_queue_plugged(q) && blk_queue_headlive(q)
+	    && insert_here == &q->queue_head) {
 		spin_unlock_irq(&io_request_lock);
 		BUG();
 	}
@@ -558,6 +599,7 @@
 	 * inserted at elevator_merge time
 	 */
 	list_add(&req->queue, insert_here);
+	e->last_merge = req;
 }
 
 inline void blk_refill_freelist(request_queue_t *q, int rw)
@@ -600,7 +642,7 @@
 		if (++q->pending_free[rw] >= batch_requests) {
 			int wake_up = q->pending_free[rw];
 			blk_refill_freelist(q, rw);
-			wake_up_nr(&q->wait_for_request, wake_up);
+			wake_up_nr(&q->wait_for_request[rw], wake_up);
 		}
 	}
 }
@@ -608,10 +650,7 @@
 /*
  * Has to be called with the request spinlock acquired
  */
-static void attempt_merge(request_queue_t * q,
-			  struct request *req,
-			  int max_sectors,
-			  int max_segments)
+static void attempt_merge(request_queue_t *q, struct request *req)
 {
 	struct request *next;
   
@@ -620,7 +659,7 @@
 		return;
 	if (req->cmd != next->cmd
 	    || req->rq_dev != next->rq_dev
-	    || req->nr_sectors + next->nr_sectors > max_sectors
+	    || req->nr_sectors + next->nr_sectors > q->max_sectors
 	    || next->sem)
 		return;
 	/*
@@ -629,90 +668,91 @@
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if (!q->merge_requests_fn(q, req, next, max_segments))
+	if (!q->merge_requests_fn(q, req, next))
 		return;
 
 	q->elevator.elevator_merge_req_fn(req, next);
-	req->bhtail->b_reqnext = next->bh;
-	req->bhtail = next->bhtail;
+	req->biotail->bi_next = next->bio;
+	req->biotail = next->biotail;
 	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
-	list_del(&next->queue);
+	blkdev_dequeue_request(next);
 	blkdev_release_request(next);
 }
 
-static inline void attempt_back_merge(request_queue_t * q,
-				      struct request *req,
-				      int max_sectors,
-				      int max_segments)
+static inline void attempt_back_merge(request_queue_t *q, struct request *rq)
 {
-	if (&req->queue == q->queue_head.prev)
-		return;
-	attempt_merge(q, req, max_sectors, max_segments);
+	if (&rq->queue != q->queue_head.prev)
+		attempt_merge(q, rq);
 }
 
-static inline void attempt_front_merge(request_queue_t * q,
-				       struct list_head * head,
-				       struct request *req,
-				       int max_sectors,
-				       int max_segments)
+static inline void attempt_front_merge(request_queue_t *q,
+				       struct list_head *head,
+				       struct request *rq)
 {
-	struct list_head * prev;
+	struct list_head *prev = rq->queue.prev;
 
-	prev = req->queue.prev;
-	if (head == prev)
-		return;
-	attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+	if (prev != head)
+		attempt_merge(q, blkdev_entry_to_request(prev));
 }
 
-static int __make_request(request_queue_t * q, int rw,
-				  struct buffer_head * bh)
+static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq)
+{
+	if (rq->queue.next != &q->queue_head)
+		attempt_merge(q, rq);
+}
+/**
+ * blk_attempt_remerge  - attempt to remerge active head with next request
+ * @q:    The &request_queue_t belonging to the device
+ * @rq:   The head request (usually)
+ *
+ * Description:
+ *    For head-active devices, the queue can easily be unplugged so quickly
+ *    that proper merging is not done on the front request. This may hurt
+ *    performance greatly for some devices. The block layer cannot safely
+ *    do merging on that first request, but the driver can allow us to do
+ *    it since it knows when it is safe to do so.
+ **/
+void blk_attempt_remerge(request_queue_t *q, struct request *rq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	__blk_attempt_remerge(q, rq);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+static int __make_request(request_queue_t *q, int rw, struct bio *bio)
 {
 	unsigned int sector, count;
-	int max_segments = MAX_SEGMENTS;
-	struct request * req, *freereq = NULL;
-	int rw_ahead, max_sectors, el_ret;
+	struct request *req, *freereq = NULL;
+	int rw_ahead, el_ret, lat = 0;
 	struct list_head *head, *insert_here;
-	int latency;
 	elevator_t *elevator = &q->elevator;
 
-	count = bh->b_size >> 9;
-	sector = bh->b_rsector;
+	sector = bio->bi_sector;
+	count = bio_sectors(bio);
 
 	rw_ahead = 0;	/* normal case; gets changed below for READA */
 	switch (rw) {
 		case READA:
-			rw_ahead = 1;
 			rw = READ;	/* drop into READ */
+			rw_ahead = 1;
 		case READ:
 		case WRITE:
-			latency = elevator_request_latency(elevator, rw);
+			if (!(bio->bi_flags & BIO_BARRIER))
+				lat = elevator_request_latency(elevator, rw);
 			break;
 		default:
 			BUG();
 			goto end_io;
 	}
 
-	/* We'd better have a real physical mapping!
-	   Check this bit only if the buffer was dirty and just locked
-	   down by us so at this point flushpage will block and
-	   won't clear the mapped bit under us. */
-	if (!buffer_mapped(bh))
-		BUG();
-
 	/*
-	 * Temporary solution - in 2.5 this will be done by the lowlevel
-	 * driver. Create a bounce buffer if the buffer data points into
-	 * high memory - keep the original buffer otherwise.
+	 * low level driver can indicate that it wants pages above a
+	 * certain limit bounced to low memory (ie for highmem, or even
+	 * ISA dma)
 	 */
-#if CONFIG_HIGHMEM
-	bh = create_bounce(rw, bh);
-#endif
-
-/* look for a free request. */
-	/*
-	 * Try to coalesce the new request with old requests
-	 */
-	max_sectors = get_max_sectors(bh->b_rdev);
+	bio = blk_queue_bounce(q, bio);
 
 again:
 	req = NULL;
@@ -725,39 +765,44 @@
 
 	insert_here = head->prev;
 	if (list_empty(head)) {
-		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+		blk_plug_device(q);
 		goto get_rq;
-	} else if (q->head_active && !q->plugged)
+	} else if (blk_queue_headlive(q) && !blk_queue_plugged(q))
 		head = head->next;
 
-	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bio, rw);
 	switch (el_ret) {
 
 		case ELEVATOR_BACK_MERGE:
-			if (!q->back_merge_fn(q, req, bh, max_segments))
+			if (!q->back_merge_fn(q, req, bio))
 				break;
 			elevator->elevator_merge_cleanup_fn(q, req, count);
-			req->bhtail->b_reqnext = bh;
-			req->bhtail = bh;
+			req->biotail->bi_next = bio;
+			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += count;
 			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
-			attempt_back_merge(q, req, max_sectors, max_segments);
+			attempt_back_merge(q, req);
 			goto out;
 
 		case ELEVATOR_FRONT_MERGE:
-			if (!q->front_merge_fn(q, req, bh, max_segments))
+			if (!q->front_merge_fn(q, req, bio))
 				break;
 			elevator->elevator_merge_cleanup_fn(q, req, count);
-			bh->b_reqnext = req->bh;
-			req->bh = bh;
-			req->buffer = bh->b_data;
+			bio->bi_next = req->bio;
+			req->bio = bio;
+			/*
+			 * may not be valid, if the low level driver said
+			 * it didn't need a bounce buffer then it better
+			 * not touch req->buffer either...
+			 */
+			req->buffer = bio_data(bio);
 			req->current_nr_sectors = count;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += count;
 			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
-			attempt_front_merge(q, head, req, max_sectors, max_segments);
+			attempt_front_merge(q, head, req);
 			goto out;
 
 		/*
@@ -797,7 +842,7 @@
 	}
 
 /* fill up the request-info, and add it to the queue */
-	req->elevator_sequence = latency;
+	req->elevator_sequence = lat;
 	req->cmd = rw;
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
@@ -805,11 +850,10 @@
 	req->current_nr_sectors = count;
 	req->nr_segments = 1; /* Always 1 for a new request. */
 	req->nr_hw_segments = 1; /* Always 1 for a new request. */
-	req->buffer = bh->b_data;
+	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->sem = NULL;
-	req->bh = bh;
-	req->bhtail = bh;
-	req->rq_dev = bh->b_rdev;
+	req->bio = req->biotail = bio;
+	req->rq_dev = bio->bi_dev;
 	blk_started_io(count);
 	add_request(q, req, insert_here);
 out:
@@ -817,8 +861,9 @@
 		blkdev_release_request(freereq);
 	spin_unlock_irq(&io_request_lock);
 	return 0;
+
 end_io:
-	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+	bio->bi_end_io(bio);
 	return 0;
 }
 
@@ -856,34 +901,32 @@
  * particular, no other flags, are changed by generic_make_request or
  * any lower level drivers.
  * */
-void generic_make_request (int rw, struct buffer_head * bh)
+void generic_make_request (int rw, struct bio *bio)
 {
-	int major = MAJOR(bh->b_rdev);
+	int major = MAJOR(bio->bi_dev);
+	int minor = MINOR(bio->bi_dev);
 	request_queue_t *q;
 
-	if (!bh->b_end_io)
-		BUG();
-
 	if (blk_size[major]) {
-		unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
-		unsigned long sector = bh->b_rsector;
-		unsigned int count = bh->b_size >> 9;
+		unsigned long maxsector = (blk_size[major][minor] << 1) + 1;
+		unsigned long sector = bio->bi_sector;
+		unsigned int count = bio_sectors(bio);
 
 		if (maxsector < count || maxsector - count < sector) {
-			bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
-			if (blk_size[major][MINOR(bh->b_rdev)]) {
+			if (blk_size[major][minor]) {
 				
-				/* This may well happen - the kernel calls bread()
-				   without checking the size of the device, e.g.,
-				   when mounting a device. */
+				/* This may well happen - the kernel calls
+				 * bread() without checking the size of the
+				 * device, e.g., when mounting a device. */
 				printk(KERN_INFO
 				       "attempt to access beyond end of device\n");
 				printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
-				       kdevname(bh->b_rdev), rw,
+				       kdevname(bio->bi_dev), rw,
 				       (sector + count)>>1,
-				       blk_size[major][MINOR(bh->b_rdev)]);
+				       blk_size[major][minor]);
 			}
-			bh->b_end_io(bh, 0);
+			bio->bi_flags |= BIO_EOF;
+			bio->bi_end_io(bio);
 			return;
 		}
 	}
@@ -897,17 +940,74 @@
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	do {
-		q = blk_get_queue(bh->b_rdev);
+		q = blk_get_queue(bio->bi_dev);
 		if (!q) {
 			printk(KERN_ERR
 			       "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n",
-			       kdevname(bh->b_rdev), bh->b_rsector);
-			buffer_IO_error(bh);
+			       kdevname(bio->bi_dev), bio->bi_sector);
+			bio->bi_end_io(bio);
 			break;
 		}
-	} while (q->make_request_fn(q, rw, bh));
+	} while (q->make_request_fn(q, rw, bio));
+}
+
+/*
+ * Default IO end handler, used by "ll_rw_block()".
+ */
+static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+{
+	mark_buffer_uptodate(bh, uptodate);
+	unlock_buffer(bh);
 }
 
+/*
+ * our default bio end_io callback handler for a buffer_head mapping. it's
+ * pretty simple, because no bio will ever contain more than one bio_vec
+ */
+static void end_bio_bh_io_sync(struct bio *bio)
+{
+	struct buffer_head *bh = bio->bi_private;
+
+	bh->b_end_io(bh, bio->bi_flags & BIO_UPTODATE);
+	bio_free(bio);
+}
+
+/**
+ * submit_bio: submit a bio to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bio: The &struct bio which describes the I/O
+ *
+ * submit_bio() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work. Both are fairly rough
+ * interfaces, @bio must be presetup and ready for I/O.
+ *
+ */
+void submit_bio(int rw, struct bio *bio)
+{
+	int count = bio_sectors(bio);
+
+	if (!bio->bi_end_io)
+		BUG();
+	if (bio_size(bio) > PAGE_SIZE) {
+		printk("bio: invalid size %d\n", bio_size(bio));
+		BUG();
+	} else if ((bio_offset(bio) + bio_size(bio)) > PAGE_SIZE) {
+		printk("bio: size/off %d/%d\n", bio_size(bio), bio_offset(bio));
+		BUG();
+	}
+
+	if (rw & WRITE) {
+		kstat.pgpgout += count;
+		bio->bi_flags |= BIO_WRITE;
+	} else {
+		kstat.pgpgin += count;
+		bio->bi_flags |= BIO_READ;
+		if (rw == READA)
+			bio->bi_flags |= BIO_RW_AHEAD;
+	}
+
+	generic_make_request(rw, bio);
+}
 
 /**
  * submit_bh: submit a buffer_head to the block device later for I/O
@@ -924,39 +1024,33 @@
  */
 void submit_bh(int rw, struct buffer_head * bh)
 {
-	int count = bh->b_size >> 9;
+	struct bio *bio;
 
 	if (!test_bit(BH_Lock, &bh->b_state))
 		BUG();
+	if (!buffer_mapped(bh))
+		BUG();
+	if (!bh->b_end_io)
+		BUG();
 
 	set_bit(BH_Req, &bh->b_state);
 
 	/*
-	 * First step, 'identity mapping' - RAID or LVM might
-	 * further remap this.
+	 * from here on down, it's all bio
 	 */
-	bh->b_rdev = bh->b_dev;
-	bh->b_rsector = bh->b_blocknr * count;
-
-	generic_make_request(rw, bh);
+	bio = bio_alloc(GFP_BUFFER, 1);
 
-	switch (rw) {
-		case WRITE:
-			kstat.pgpgout += count;
-			break;
-		default:
-			kstat.pgpgin += count;
-			break;
-	}
-}
+	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_dev = bh->b_dev;
+	bio->bi_next = NULL;
+	bio->bi_private = bh;
+	bio->bi_end_io = end_bio_bh_io_sync;
+
+	bio->bi_io_vec.bv_page = bh->b_page;
+	bio->bi_io_vec.bv_len = bh->b_size;
+	bio->bi_io_vec.bv_offset = bh_offset(bh);
 
-/*
- * Default IO end handler, used by "ll_rw_block()".
- */
-static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
-{
-	mark_buffer_uptodate(bh, uptodate);
-	unlock_buffer(bh);
+	submit_bio(rw, bio);
 }
 
 /**
@@ -1012,7 +1106,7 @@
 	/* Verify requested block sizes. */
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
-		if (bh->b_size % correct_size) {
+		if (bh->b_size & (correct_size - 1)) {
 			printk(KERN_NOTICE "ll_rw_block: device %s: "
 			       "only %d-char blocks implemented (%u)\n",
 			       kdevname(bhs[0]->b_dev),
@@ -1104,7 +1198,7 @@
 
 int end_that_request_first (struct request *req, int uptodate, char *name)
 {
-	struct buffer_head * bh;
+	struct bio *bio;
 	int nsect;
 
 	req->errors = 0;
@@ -1112,24 +1206,25 @@
 		printk("end_request: I/O error, dev %s (%s), sector %lu\n",
 			kdevname(req->rq_dev), name, req->sector);
 
-	if ((bh = req->bh) != NULL) {
-		nsect = bh->b_size >> 9;
+	if ((bio = req->bio) != NULL) {
+		nsect = bio_sectors(bio);
 		blk_finished_io(nsect);
-		req->bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-		bh->b_end_io(bh, uptodate);
-		if ((bh = req->bh) != NULL) {
+		req->bio = bio->bi_next;
+		bio->bi_next = NULL;
+		bio->bi_flags |= !!uptodate;
+		bio->bi_end_io(bio);
+		if ((bio = req->bio) != NULL) {
 			req->hard_sector += nsect;
 			req->hard_nr_sectors -= nsect;
 			req->sector = req->hard_sector;
 			req->nr_sectors = req->hard_nr_sectors;
 
-			req->current_nr_sectors = bh->b_size >> 9;
+			req->current_nr_sectors = bio_sectors(bio);
 			if (req->nr_sectors < req->current_nr_sectors) {
 				req->nr_sectors = req->current_nr_sectors;
 				printk("end_request: buffer-list destroyed\n");
 			}
-			req->buffer = bh->b_data;
+			req->buffer = bio_data(bio);
 			return 1;
 		}
 	}
@@ -1163,7 +1258,6 @@
 
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
-	memset(max_sectors, 0, sizeof(max_sectors));
 
 	atomic_set(&queued_sectors, 0);
 	total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
@@ -1202,110 +1296,16 @@
 						low_queued_sectors / 2,
 						queue_nr_requests);
 
-#ifdef CONFIG_AMIGA_Z2RAM
-	z2_init();
-#endif
-#ifdef CONFIG_STRAM_SWAP
-	stram_device_init();
-#endif
-#ifdef CONFIG_BLK_DEV_RAM
-	rd_init();
-#endif
-#ifdef CONFIG_ISP16_CDI
-	isp16_init();
-#endif
 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
 	ide_init();		/* this MUST precede hd_init */
 #endif
 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
 	hd_init();
 #endif
-#ifdef CONFIG_BLK_DEV_PS2
-	ps2esdi_init();
-#endif
-#ifdef CONFIG_BLK_DEV_XD
-	xd_init();
-#endif
-#ifdef CONFIG_BLK_DEV_MFM
-	mfm_init();
-#endif
-#ifdef CONFIG_PARIDE
-	{ extern void paride_init(void); paride_init(); };
-#endif
-#ifdef CONFIG_MAC_FLOPPY
-	swim3_init();
-#endif
-#ifdef CONFIG_BLK_DEV_SWIM_IOP
-	swimiop_init();
-#endif
-#ifdef CONFIG_AMIGA_FLOPPY
-	amiga_floppy_init();
-#endif
-#ifdef CONFIG_ATARI_FLOPPY
-	atari_floppy_init();
-#endif
-#ifdef CONFIG_BLK_DEV_FD
-	floppy_init();
-#else
 #if defined(__i386__)	/* Do we even need this? */
 	outb_p(0xc, 0x3f2);
 #endif
-#endif
-#ifdef CONFIG_CDU31A
-	cdu31a_init();
-#endif
-#ifdef CONFIG_ATARI_ACSI
-	acsi_init();
-#endif
-#ifdef CONFIG_MCD
-	mcd_init();
-#endif
-#ifdef CONFIG_MCDX
-	mcdx_init();
-#endif
-#ifdef CONFIG_SBPCD
-	sbpcd_init();
-#endif
-#ifdef CONFIG_AZTCD
-	aztcd_init();
-#endif
-#ifdef CONFIG_CDU535
-	sony535_init();
-#endif
-#ifdef CONFIG_GSCD
-	gscd_init();
-#endif
-#ifdef CONFIG_CM206
-	cm206_init();
-#endif
-#ifdef CONFIG_OPTCD
-	optcd_init();
-#endif
-#ifdef CONFIG_SJCD
-	sjcd_init();
-#endif
-#ifdef CONFIG_APBLOCK
-	ap_init();
-#endif
-#ifdef CONFIG_DDV
-	ddv_init();
-#endif
-#ifdef CONFIG_MDISK
-	mdisk_init();
-#endif
-#ifdef CONFIG_DASD
-	dasd_init();
-#endif
-#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
-	tapeblock_init();
-#endif
-#ifdef CONFIG_BLK_DEV_XPRAM
-        xpram_init();
-#endif
 
-#ifdef CONFIG_SUN_JSFLASH
-	jsfd_init();
-#endif
 	return 0;
 };
 
@@ -1314,11 +1314,13 @@
 EXPORT_SYMBOL(end_that_request_last);
 EXPORT_SYMBOL(blk_init_queue);
 EXPORT_SYMBOL(blk_get_queue);
-EXPORT_SYMBOL(__blk_get_queue);
 EXPORT_SYMBOL(blk_cleanup_queue);
 EXPORT_SYMBOL(blk_queue_headactive);
 EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(blk_queue_bounce_limit);
 EXPORT_SYMBOL(generic_make_request);
 EXPORT_SYMBOL(blkdev_release_request);
 EXPORT_SYMBOL(generic_unplug_device);
 EXPORT_SYMBOL(queued_sectors);
+EXPORT_SYMBOL(blk_wake_queue);
+EXPORT_SYMBOL(blk_attempt_remerge);
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/loop.c linux/drivers/block/loop.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/loop.c	Thu Apr 12 04:05:14 2001
+++ linux/drivers/block/loop.c	Mon May 21 23:07:59 2001
@@ -75,7 +75,7 @@
 #define MAJOR_NR LOOP_MAJOR
 
 static int max_loop = 8;
-static struct loop_device *loop_dev;
+static struct loop_device *loop_dev, **loop_lookup;
 static int *loop_sizes;
 static int *loop_blksizes;
 static devfs_handle_t devfs_handle;      /*  For the directory */
@@ -86,10 +86,12 @@
 static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
 			 char *loop_buf, int size, int real_block)
 {
-	if (cmd == READ)
-		memcpy(loop_buf, raw_buf, size);
-	else
-		memcpy(raw_buf, loop_buf, size);
+	if (raw_buf != loop_buf) {
+		if (cmd == READ)
+			memcpy(loop_buf, raw_buf, size);
+		else
+			memcpy(raw_buf, loop_buf, size);
+	}
 
 	return 0;
 }
@@ -117,6 +119,7 @@
 
 static int none_status(struct loop_device *lo, struct loop_info *info)
 {
+	lo->lo_flags |= LO_FLAGS_BH_REMAP;
 	return 0;
 }
 
@@ -164,8 +167,7 @@
 					lo->lo_device);
 }
 
-static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		   loff_t pos)
+static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
 	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
@@ -178,8 +180,8 @@
 
 	index = pos >> PAGE_CACHE_SHIFT;
 	offset = pos & (PAGE_CACHE_SIZE - 1);
-	len = bh->b_size;
-	data = bh->b_data;
+	len = bio_size(bio);
+	data = bio_data(bio);
 	while (len > 0) {
 		int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
 		size = PAGE_CACHE_SIZE - offset;
@@ -251,18 +253,17 @@
 	return size;
 }
 
-static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		      loff_t pos)
+static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
 	struct lo_read_data cookie;
 	read_descriptor_t desc;
 	struct file *file;
 
 	cookie.lo = lo;
-	cookie.data = bh->b_data;
+	cookie.data = bio_data(bio);
 	cookie.bsize = bsize;
 	desc.written = 0;
-	desc.count = bh->b_size;
+	desc.count = bio_size(bio);
 	desc.buf = (char*)&cookie;
 	desc.error = 0;
 	spin_lock_irq(&lo->lo_lock);
@@ -298,42 +299,46 @@
 	return IV;
 }
 
-static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
+static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 {
 	loff_t pos;
 	int ret;
 
-	pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
+	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 
-	if (rw == WRITE)
-		ret = lo_send(lo, bh, loop_get_bs(lo), pos);
+	if (bio->bi_flags & BIO_WRITE)
+		ret = lo_send(lo, bio, loop_get_bs(lo), pos);
 	else
-		ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
+		ret = lo_receive(lo, bio, loop_get_bs(lo), pos);
 
 	return ret;
 }
 
-static void loop_put_buffer(struct buffer_head *bh)
+static void loop_end_io_transfer(struct bio *);
+static void loop_put_buffer(struct bio *bio)
 {
-	if (bh) {
-		__free_page(bh->b_page);
-		kmem_cache_free(bh_cachep, bh);
+	/*
+	 * check bi_end_io, may just be a remapped bio
+	 */
+	if (bio && bio->bi_end_io == loop_end_io_transfer) {
+		__free_page(bio_page(bio));
+		bio_free(bio);
 	}
 }
 
 /*
- * Add buffer_head to back of pending list
+ * Add bio to back of pending list
  */
-static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
+static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&lo->lo_lock, flags);
-	if (lo->lo_bhtail) {
-		lo->lo_bhtail->b_reqnext = bh;
-		lo->lo_bhtail = bh;
+	if (lo->lo_biotail) {
+		lo->lo_biotail->bi_next = bio;
+		lo->lo_biotail = bio;
 	} else
-		lo->lo_bh = lo->lo_bhtail = bh;
+		lo->lo_bio = lo->lo_biotail = bio;
 	spin_unlock_irqrestore(&lo->lo_lock, flags);
 
 	up(&lo->lo_bh_mutex);
@@ -342,65 +347,57 @@
 /*
  * Grab first pending buffer
  */
-static struct buffer_head *loop_get_bh(struct loop_device *lo)
+static struct bio *loop_get_bio(struct loop_device *lo)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 
 	spin_lock_irq(&lo->lo_lock);
-	if ((bh = lo->lo_bh)) {
-		if (bh == lo->lo_bhtail)
-			lo->lo_bhtail = NULL;
-		lo->lo_bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
+	if ((bio = lo->lo_bio)) {
+		if (bio == lo->lo_biotail)
+			lo->lo_biotail = NULL;
+		lo->lo_bio = bio->bi_next;
+		bio->bi_next = NULL;
 	}
 	spin_unlock_irq(&lo->lo_lock);
 
-	return bh;
+	return bio;
 }
 
 /*
- * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
- * and lo->transfer stuff has already been done. if not, it was a READ
- * so queue it for the loop thread and let it do the transfer out of
- * b_end_io context (we don't want to do decrypt of a page with irqs
+ * if this was a WRITE lo->transfer stuff has already been done. for READs,
+ * queue it for the loop thread and let it do the transfer out of
+ * bi_end_io context (we don't want to do decrypt of a page with irqs
  * disabled)
  */
-static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
+static void loop_end_io_transfer(struct bio *bio)
 {
-	struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
+	struct loop_device *lo = loop_lookup[MINOR(bio->bi_dev)];
 
-	if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
-		struct buffer_head *rbh = bh->b_private;
+	if (bio->bi_flags & (BIO_UPTODATE | BIO_WRITE)) {
+		struct bio *rbh = bio->bi_private;
 
-		rbh->b_end_io(rbh, uptodate);
+		rbh->bi_flags = bio->bi_flags;
+		rbh->bi_end_io(rbh);
 		if (atomic_dec_and_test(&lo->lo_pending))
 			up(&lo->lo_bh_mutex);
-		loop_put_buffer(bh);
+		loop_put_buffer(bio);
 	} else
-		loop_add_bh(lo, bh);
+		loop_add_bio(lo, bio);
 }
 
-static struct buffer_head *loop_get_buffer(struct loop_device *lo,
-					   struct buffer_head *rbh)
+static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
 {
-	struct buffer_head *bh;
-
-	do {
-		bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER);
-		if (bh)
-			break;
+	struct bio *bio;
 
-		run_task_queue(&tq_disk);
-		schedule_timeout(HZ);
-	} while (1);
-	memset(bh, 0, sizeof(*bh));
+	/*
+	 * for xfer_funcs that can operate on the same bh, do that
+	 */
+	if (lo->lo_flags & LO_FLAGS_BH_REMAP) {
+		bio = rbh;
+		goto out_bh;
+	}
 
-	bh->b_size = rbh->b_size;
-	bh->b_dev = rbh->b_rdev;
-	spin_lock_irq(&lo->lo_lock);
-	bh->b_rdev = lo->lo_device;
-	spin_unlock_irq(&lo->lo_lock);
-	bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
+	bio = bio_alloc(GFP_BUFFER, 1);
 
 	/*
 	 * easy way out, although it does waste some memory for < PAGE_SIZE
@@ -408,35 +405,40 @@
 	 * so can we :-)
 	 */
 	do {
-		bh->b_page = alloc_page(GFP_BUFFER);
-		if (bh->b_page)
+		bio->bi_io_vec.bv_page = alloc_page(GFP_BUFFER);
+		if (bio->bi_io_vec.bv_page)
 			break;
 
 		run_task_queue(&tq_disk);
 		schedule_timeout(HZ);
 	} while (1);
 
-	bh->b_data = page_address(bh->b_page);
-	bh->b_end_io = loop_end_io_transfer;
-	bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
-	init_waitqueue_head(&bh->b_wait);
+	bio->bi_io_vec.bv_len = bio_size(rbh);
+	bio->bi_io_vec.bv_offset = bio_offset(rbh);
 
-	return bh;
+	bio->bi_end_io = loop_end_io_transfer;
+	bio->bi_private = rbh;
+
+out_bh:
+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
+	bio->bi_flags = rbh->bi_flags;
+	spin_lock_irq(&lo->lo_lock);
+	bio->bi_dev = lo->lo_device;
+	spin_unlock_irq(&lo->lo_lock);
+
+	return bio;
 }
 
-static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
+static int loop_make_request(request_queue_t *q, int rw, struct bio *rbh)
 {
-	struct buffer_head *bh = NULL;
+	struct bio *bh = NULL;
 	struct loop_device *lo;
 	unsigned long IV;
 
-	if (!buffer_locked(rbh))
-		BUG();
-
-	if (MINOR(rbh->b_rdev) >= max_loop)
+	if (MINOR(rbh->bi_dev) >= max_loop)
 		goto out;
 
-	lo = &loop_dev[MINOR(rbh->b_rdev)];
+	lo = &loop_dev[MINOR(rbh->bi_dev)];
 	spin_lock_irq(&lo->lo_lock);
 	if (lo->lo_state != Lo_bound)
 		goto inactive;
@@ -453,9 +455,7 @@
 		goto err;
 	}
 
-#if CONFIG_HIGHMEM
-	rbh = create_bounce(rw, rbh);
-#endif
+	rbh = blk_queue_bounce(q, rbh);
 
 	/*
 	 * file backed, queue for loop_thread to handle
@@ -465,9 +465,7 @@
 		 * rbh locked at this point, noone else should clear
 		 * the dirty flag
 		 */
-		if (rw == WRITE)
-			set_bit(BH_Dirty, &rbh->b_state);
-		loop_add_bh(lo, rbh);
+		loop_add_bio(lo, rbh);
 		return 0;
 	}
 
@@ -475,12 +473,10 @@
 	 * piggy old buffer on original, and submit for I/O
 	 */
 	bh = loop_get_buffer(lo, rbh);
-	bh->b_private = rbh;
-	IV = loop_get_iv(lo, bh->b_rsector);
+	IV = loop_get_iv(lo, rbh->bi_sector);
 	if (rw == WRITE) {
-		set_bit(BH_Dirty, &bh->b_state);
-		if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data,
-				   bh->b_size, IV))
+		if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh),
+				   bio_size(bh), IV))
 			goto err;
 	}
 
@@ -492,14 +488,15 @@
 		up(&lo->lo_bh_mutex);
 	loop_put_buffer(bh);
 out:
-	buffer_IO_error(rbh);
+	rbh->bi_flags &= ~BIO_UPTODATE;
+	rbh->bi_end_io(rbh);
 	return 0;
 inactive:
 	spin_unlock_irq(&lo->lo_lock);
 	goto out;
 }
 
-static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
+static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 {
 	int ret;
 
@@ -507,19 +504,21 @@
 	 * For block backed loop, we know this is a READ
 	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
-
-		ret = do_bh_filebacked(lo, bh, rw);
-		bh->b_end_io(bh, !ret);
+		ret = do_bio_filebacked(lo, bio);
+		if (!ret)
+			bio->bi_flags |= BIO_UPTODATE;
+		bio->bi_end_io(bio);
 	} else {
-		struct buffer_head *rbh = bh->b_private;
-		unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
+		struct bio *rbh = bio->bi_private;
+		unsigned long IV = loop_get_iv(lo, rbh->bi_sector);
 
-		ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
-				     bh->b_size, IV);
+		ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh),
+				     bio_size(bio), IV);
 
-		rbh->b_end_io(rbh, !ret);
-		loop_put_buffer(bh);
+		if (!ret)
+			bio->bi_flags |= BIO_UPTODATE;
+		rbh->bi_end_io(rbh);
+		loop_put_buffer(bio);
 	}
 }
 
@@ -532,7 +531,7 @@
 static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
-	struct buffer_head *bh;
+	struct bio *bio;
 
 	daemonize();
 	exit_files(current);
@@ -566,12 +565,12 @@
 		if (!atomic_read(&lo->lo_pending))
 			break;
 
-		bh = loop_get_bh(lo);
-		if (!bh) {
-			printk("loop: missing bh\n");
+		bio = loop_get_bio(lo);
+		if (!bio) {
+			printk("loop: missing bio\n");
 			continue;
 		}
-		loop_handle_bh(lo, bh);
+		loop_handle_bio(lo, bio);
 
 		/*
 		 * upped both for pending work and tear-down, lo_pending
@@ -600,7 +599,7 @@
 	error = -EBUSY;
 	if (lo->lo_state != Lo_unbound)
 		goto out;
-	 
+
 	error = -EBADF;
 	file = fget(arg);
 	if (!file)
@@ -620,7 +619,6 @@
 		 * If we can't read - sorry. If we only can't write - well,
 		 * it's going to be read-only.
 		 */
-		error = -EINVAL;
 		if (!aops->readpage)
 			goto out_putf;
 
@@ -649,6 +647,7 @@
 	figure_loop_size(lo);
 	lo->old_gfp_mask = inode->i_mapping->gfp_mask;
 	inode->i_mapping->gfp_mask = GFP_BUFFER;
+	loop_lookup[MINOR(lo_device)] = lo;
 
 	bs = 0;
 	if (blksize_size[MAJOR(inode->i_rdev)])
@@ -658,7 +657,7 @@
 
 	set_blocksize(dev, bs);
 
-	lo->lo_bh = lo->lo_bhtail = NULL;
+	lo->lo_bio = lo->lo_biotail = NULL;
 	kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 	down(&lo->lo_sem);
 
@@ -983,13 +982,17 @@
 	if (!loop_dev)
 		return -ENOMEM;
 
+	loop_lookup = kmalloc(max_loop*sizeof(struct loop_device *),GFP_KERNEL);
+	if (!loop_lookup)
+		goto out_mem;
+
 	loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
 	if (!loop_sizes)
-		goto out_sizes;
+		goto out_mem;
 
 	loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
 	if (!loop_blksizes)
-		goto out_blksizes;
+		goto out_mem;
 
 	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
 
@@ -1005,6 +1008,7 @@
 
 	memset(loop_sizes, 0, max_loop * sizeof(int));
 	memset(loop_blksizes, 0, max_loop * sizeof(int));
+	memset(loop_lookup, 0, max_loop * sizeof(struct loop_device *));
 	blk_size[MAJOR_NR] = loop_sizes;
 	blksize_size[MAJOR_NR] = loop_blksizes;
 	for (i = 0; i < max_loop; i++)
@@ -1013,9 +1017,9 @@
 	printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
 	return 0;
 
-out_sizes:
+out_mem:
 	kfree(loop_dev);
-out_blksizes:
+	kfree(loop_lookup);
 	kfree(loop_sizes);
 	printk(KERN_ERR "loop: ran out of memory\n");
 	return -ENOMEM;
@@ -1028,6 +1032,7 @@
 		printk(KERN_WARNING "loop: cannot unregister blkdev\n");
 
 	kfree(loop_dev);
+	kfree(loop_lookup);
 	kfree(loop_sizes);
 	kfree(loop_blksizes);
 }
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/nbd.c linux/drivers/block/nbd.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/nbd.c	Sun May 20 14:05:11 2001
+++ linux/drivers/block/nbd.c	Mon May 21 14:30:16 2001
@@ -251,7 +251,7 @@
 			goto out;
 		}
 #endif
-		list_del(&req->queue);
+		blkdev_dequeue_request(req);
 		up (&lo->queue_lock);
 		
 		nbd_end_request(req);
@@ -286,7 +286,7 @@
 		}
 #endif
 		req->errors++;
-		list_del(&req->queue);
+		blkdev_dequeue_request(req);
 		up(&lo->queue_lock);
 
 		nbd_end_request(req);
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/block/rd.c linux/drivers/block/rd.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/block/rd.c	Fri Feb  9 20:30:22 2001
+++ linux/drivers/block/rd.c	Tue May 15 15:14:24 2001
@@ -201,13 +201,11 @@
 	struct buffer_head *rbh;
 	char *bdata;
 
-	
 	minor = MINOR(sbh->b_rdev);
 
 	if (minor >= NUM_RAMDISKS)
 		goto fail;
 
-	
 	offset = sbh->b_rsector << 9;
 	len = sbh->b_size;
 
@@ -225,21 +223,24 @@
 	/* I think that it is safe to assume that rbh is not in HighMem, though
 	 * sbh might be - NeilBrown
 	 */
-	bdata = bh_kmap(sbh);
-	if (rw == READ) {
-		if (sbh != rbh)
+	if (sbh != rbh) {
+		bdata = bh_kmap(sbh);
+
+		if (rw == READ)
 			memcpy(bdata, rbh->b_data, rbh->b_size);
-	} else
-		if (sbh != rbh)
+		else
 			memcpy(rbh->b_data, bdata, rbh->b_size);
-	bh_kunmap(sbh);
+
+		bh_kunmap(sbh);
+	}
+
 	mark_buffer_protected(rbh);
 	brelse(rbh);
 
 	sbh->b_end_io(sbh,1);
 	return 0;
  fail:
-	sbh->b_end_io(sbh,0);
+	buffer_IO_error(sbh);
 	return 0;
 } 
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/char/raw.c linux/drivers/char/raw.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/char/raw.c	Fri Apr 27 23:23:25 2001
+++ linux/drivers/char/raw.c	Mon May 21 17:49:43 2001
@@ -285,13 +285,11 @@
 	unsigned long	blocknr, blocks;
 	size_t		transferred;
 	int		iosize;
-	int		i;
 	int		minor;
 	kdev_t		dev;
 	unsigned long	limit;
 
 	int		sector_size, sector_bits, sector_mask;
-	int		max_sectors;
 	
 	/*
 	 * First, a few checks on device size limits 
@@ -316,7 +314,6 @@
 	sector_size = raw_devices[minor].sector_size;
 	sector_bits = raw_devices[minor].sector_bits;
 	sector_mask = sector_size- 1;
-	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
 	
 	if (blk_size[MAJOR(dev)])
 		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
@@ -334,18 +331,10 @@
 	if ((*offp >> sector_bits) >= limit)
 		goto out_free;
 
-	/*
-	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
-	 * unmapping the single kiobuf as we go to perform each chunk of
-	 * IO.  
-	 */
-
 	transferred = 0;
 	blocknr = *offp >> sector_bits;
 	while (size > 0) {
 		blocks = size >> sector_bits;
-		if (blocks > max_sectors)
-			blocks = max_sectors;
 		if (blocks > limit - blocknr)
 			blocks = limit - blocknr;
 		if (!blocks)
@@ -357,10 +346,8 @@
 		if (err)
 			break;
 
-		for (i=0; i < blocks; i++) 
-			iobuf->blocks[i] = blocknr++;
-		
-		err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size);
+		err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size);
+		blocknr += blocks;
 
 		if (rw == READ && err > 0)
 			mark_dirty_kiobuf(iobuf, err);
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/hd.c linux/drivers/ide/hd.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/hd.c	Sun May 20 14:05:11 2001
+++ linux/drivers/ide/hd.c	Mon May 21 22:58:16 2001
@@ -107,7 +107,6 @@
 static int hd_sizes[MAX_HD<<6];
 static int hd_blocksizes[MAX_HD<<6];
 static int hd_hardsectsizes[MAX_HD<<6];
-static int hd_maxsect[MAX_HD<<6];
 
 static struct timer_list device_timer;
 
@@ -734,11 +733,9 @@
 	for(drive=0; drive < (MAX_HD << 6); drive++) {
 		hd_blocksizes[drive] = 1024;
 		hd_hardsectsizes[drive] = 512;
-		hd_maxsect[drive]=255;
 	}
 	blksize_size[MAJOR_NR] = hd_blocksizes;
 	hardsect_size[MAJOR_NR] = hd_hardsectsizes;
-	max_sectors[MAJOR_NR] = hd_maxsect;
 
 #ifdef __i386__
 	if (!NR_HD) {
@@ -841,6 +838,7 @@
 		return -1;
 	}
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255);
 	read_ahead[MAJOR_NR] = 8;		/* 8 sector (4kB) read-ahead */
 	hd_gendisk.next = gendisk_head;
 	gendisk_head = &hd_gendisk;
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.c	Fri Feb  9 20:30:23 2001
+++ linux/drivers/ide/ide-cd.c	Mon May 21 22:58:16 2001
@@ -958,7 +958,7 @@
 
 	/* First, figure out if we need to bit-bucket
 	   any of the leading sectors. */
-	nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)),
+	nskip = MIN ((int)(rq->current_nr_sectors - bio_sectors(rq->bio)),
 		     sectors_to_transfer);
 
 	while (nskip > 0) {
@@ -977,8 +977,7 @@
 
 		/* If we've filled the present buffer but there's another
 		   chained buffer after it, move on. */
-		if (rq->current_nr_sectors == 0 &&
-		    rq->nr_sectors > 0)
+		if (rq->current_nr_sectors == 0 && rq->nr_sectors)
 			cdrom_end_request (1, drive);
 
 		/* If the buffers are full, cache the rest of the data in our
@@ -1058,7 +1057,7 @@
 	   represent the number of sectors to skip at the start of a transfer
 	   will fail.  I think that this will never happen, but let's be
 	   paranoid and check. */
-	if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) &&
+	if (rq->current_nr_sectors < bio_sectors(rq->bio) &&
 	    (rq->sector % SECTORS_PER_FRAME) != 0) {
 		printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n",
 			drive->name, rq->sector);
@@ -1097,7 +1096,7 @@
 	nskip = (sector % SECTORS_PER_FRAME);
 	if (nskip > 0) {
 		/* Sanity check... */
-		if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) &&
+		if (rq->current_nr_sectors != bio_sectors(rq->bio) &&
 			(rq->sector % CD_FRAMESIZE != 0)) {
 			printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n",
 				drive->name, rq->current_nr_sectors);
@@ -1196,13 +1195,15 @@
    start it over entirely, or even put it back on the request queue. */
 static void restore_request (struct request *rq)
 {
-	if (rq->buffer != rq->bh->b_data) {
-		int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE;
-		rq->buffer = rq->bh->b_data;
+	if (rq->buffer != bio_data(rq->bio)) {
+		int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE;
+		rq->buffer = bio_data(rq->bio);
 		rq->nr_sectors += n;
 		rq->sector -= n;
 	}
-	rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS;
+	rq->current_nr_sectors = bio_sectors(rq->bio);
+	rq->hard_nr_sectors = rq->nr_sectors;
+	rq->hard_sector = rq->sector;
 }
 
 /*
@@ -1216,20 +1217,22 @@
 
 	/* If the request is relative to a partition, fix it up to refer to the
 	   absolute address.  */
-	if ((minor & PARTN_MASK) != 0) {
+	if (minor & PARTN_MASK) {
 		rq->sector = block;
 		minor &= ~PARTN_MASK;
-		rq->rq_dev = MKDEV (MAJOR(rq->rq_dev), minor);
+		rq->rq_dev = MKDEV(MAJOR(rq->rq_dev), minor);
 	}
 
 	/* We may be retrying this request after an error.  Fix up
 	   any weirdness which might be present in the request packet. */
-	restore_request (rq);
+	restore_request(rq);
 
 	/* Satisfy whatever we can of this request from our cached sector. */
 	if (cdrom_read_from_buffer(drive))
 		return ide_stopped;
 
+	blk_attempt_remerge(&drive->queue, rq);
+
 	/* Clear the local sector buffer. */
 	info->nsectors_buffered = 0;
 
@@ -1477,7 +1480,7 @@
 
 static ide_startstop_t cdrom_write_intr(ide_drive_t *drive)
 {
-	int stat, ireason, len, sectors_to_transfer;
+	int stat, ireason, len, sectors_to_transfer, uptodate;
 	struct cdrom_info *info = drive->driver_data;
 	int i, dma_error = 0, dma = info->dma;
 	ide_startstop_t startstop;
@@ -1498,6 +1501,9 @@
 		return startstop;
 	}
  
+	/*
+	 * using dma, transfer is complete now
+	 */
 	if (dma) {
 		if (dma_error)
 			return ide_error(drive, "dma error", stat);
@@ -1519,12 +1525,13 @@
 		/* If we're not done writing, complain.
 		 * Otherwise, complete the command normally.
 		 */
+		uptodate = 1;
 		if (rq->current_nr_sectors > 0) {
 			printk("%s: write_intr: data underrun (%ld blocks)\n",
-				drive->name, rq->current_nr_sectors);
-			cdrom_end_request(0, drive);
-		} else
-			cdrom_end_request(1, drive);
+			drive->name, rq->current_nr_sectors);
+			uptodate = 0;
+		}
+		cdrom_end_request(uptodate, drive);
 		return ide_stopped;
 	}
 
@@ -1533,26 +1540,42 @@
 		if (cdrom_write_check_ireason(drive, len, ireason))
 			return ide_stopped;
 
-	/* The number of sectors we need to read from the drive. */
 	sectors_to_transfer = len / SECTOR_SIZE;
 
-	/* Now loop while we still have data to read from the drive. DMA
-	 * transfers will already have been complete
+	/*
+	 * now loop and write out the data
 	 */
 	while (sectors_to_transfer > 0) {
-		/* If we've filled the present buffer but there's another
-		   chained buffer after it, move on. */
-		if (rq->current_nr_sectors == 0 && rq->nr_sectors > 0)
-			cdrom_end_request(1, drive);
+		int this_transfer;
 
-		atapi_output_bytes(drive, rq->buffer, rq->current_nr_sectors);
-		rq->nr_sectors -= rq->current_nr_sectors;
-		rq->current_nr_sectors = 0;
-		rq->sector += rq->current_nr_sectors;
-		sectors_to_transfer -= rq->current_nr_sectors;
+		if (!rq->current_nr_sectors) {
+			printk("ide-cd: write_intr: oops\n");
+			break;
+		}
+
+		/*
+		 * Figure out how many sectors we can transfer
+		 */
+		this_transfer = MIN(sectors_to_transfer,rq->current_nr_sectors);
+
+		while (this_transfer > 0) {
+			atapi_output_bytes(drive, rq->buffer, SECTOR_SIZE);
+			rq->buffer += SECTOR_SIZE;
+			--rq->nr_sectors;
+			--rq->current_nr_sectors;
+			++rq->sector;
+			--this_transfer;
+			--sectors_to_transfer;
+		}
+
+		/*
+		 * current buffer complete, move on
+		 */
+		if (rq->current_nr_sectors == 0 && rq->nr_sectors)
+			cdrom_end_request (1, drive);
 	}
 
-	/* arm handler */
+	/* re-arm handler */
 	ide_set_handler(drive, &cdrom_write_intr, 5 * WAIT_CMD, NULL);
 	return ide_started;
 }
@@ -1583,10 +1606,26 @@
 	return cdrom_transfer_packet_command(drive, &pc, cdrom_write_intr);
 }
 
-static ide_startstop_t cdrom_start_write(ide_drive_t *drive)
+static ide_startstop_t cdrom_start_write(ide_drive_t *drive, struct request *rq)
 {
 	struct cdrom_info *info = drive->driver_data;
 
+	/*
+	 * writes *must* be 2kB frame aligned
+	 */
+	if ((rq->nr_sectors & 3) || (rq->sector & 3)) {
+		cdrom_end_request(0, drive);
+		return ide_stopped;
+	}
+
+	/*
+	 * for dvd-ram and such media, it's a really big deal to get
+	 * big writes all the time. so scour the queue and attempt to
+	 * remerge requests, often the plugging will not have had time
+	 * to do this properly
+	 */
+	blk_attempt_remerge(&drive->queue, rq);
+
 	info->nsectors_buffered = 0;
 
         /* use dma, if possible. we don't need to check more, since we
@@ -1629,7 +1668,7 @@
 				if (rq->cmd == READ)
 					action = cdrom_start_read(drive, block);
 				else
-					action = cdrom_start_write(drive);
+					action = cdrom_start_write(drive, rq);
 			}
 			info->last_block = block;
 			return action;
@@ -1832,6 +1871,7 @@
 
 	pc.buffer =  buf;
 	pc.buflen = buflen;
+	pc.quiet = 1;
 	pc.c[0] = GPCMD_READ_TOC_PMA_ATIP;
 	pc.c[6] = trackno;
 	pc.c[7] = (buflen >> 8);
@@ -2112,7 +2152,9 @@
 	pc.quiet = cgc->quiet;
 	pc.timeout = cgc->timeout;
 	pc.sense = cgc->sense;
-	return cgc->stat = cdrom_queue_packet_command(drive, &pc);
+	cgc->stat = cdrom_queue_packet_command(drive, &pc);
+	cgc->buflen -= pc.buflen;
+	return cgc->stat;
 }
 
 static
@@ -2621,7 +2663,6 @@
 
 	ide_add_setting(drive,	"breada_readahead",	SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL);
 	ide_add_setting(drive,	"file_readahead",	SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor],	NULL);
-	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL);
 	ide_add_setting(drive,	"dsc_overlap",		SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1,	1, &drive->dsc_overlap, NULL);
 }
 
@@ -2826,7 +2867,12 @@
 	drive->part[0].nr_sects = toc->capacity * SECTORS_PER_FRAME;
 	HWIF(drive)->gd->sizes[minor] = toc->capacity * BLOCKS_PER_FRAME;
 
+	/*
+	 * reset block size, ide_revalidate_disk incorrectly sets it to
+	 * 1024 even for CDROM's
+	 */
 	blk_size[HWIF(drive)->major] = HWIF(drive)->gd->sizes;
+	set_blocksize(MKDEV(HWIF(drive)->major, minor), CD_FRAMESIZE);
 }
 
 static
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-cd.h	Sat Apr 28 00:48:21 2001
+++ linux/drivers/ide/ide-cd.h	Mon May 21 16:16:57 2001
@@ -37,11 +37,12 @@
 
 /************************************************************************/
 
-#define SECTOR_SIZE		512
 #define SECTOR_BITS 		9
-#define SECTORS_PER_FRAME	(CD_FRAMESIZE / SECTOR_SIZE)
+#define SECTOR_SIZE		(1 << SECTOR_BITS)
+#define SECTORS_PER_FRAME	(CD_FRAMESIZE >> SECTOR_BITS)
 #define SECTOR_BUFFER_SIZE	(CD_FRAMESIZE * 32)
-#define SECTORS_BUFFER		(SECTOR_BUFFER_SIZE / SECTOR_SIZE)
+#define SECTORS_BUFFER		(SECTOR_BUFFER_SIZE >> SECTOR_BITS)
+#define SECTORS_MAX		(131072 >> SECTOR_BITS)
 
 #define BLOCKS_PER_FRAME	(CD_FRAMESIZE / BLOCK_SIZE)
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-disk.c	Fri Feb  9 20:30:23 2001
+++ linux/drivers/ide/ide-disk.c	Mon May 21 22:58:16 2001
@@ -251,15 +251,15 @@
 
 		/* Do we move to the next bh after this? */
 		if (!rq->current_nr_sectors) {
-			struct buffer_head *bh = rq->bh->b_reqnext;
+			struct bio *bio = rq->bio->bi_next;
 
 			/* end early early we ran out of requests */
-			if (!bh) {
+			if (!bio) {
 				mcount = 0;
 			} else {
-				rq->bh = bh;
-				rq->current_nr_sectors = bh->b_size >> 9;
-				rq->buffer             = bh->b_data;
+				rq->bio = bio;
+				rq->current_nr_sectors = bio_sectors(bio);
+				rq->buffer             = bio_data(bio);
 			}
 		}
 
@@ -690,7 +690,6 @@
 	ide_add_setting(drive,	"nowerr",		SETTING_RW,					HDIO_GET_NOWERR,	HDIO_SET_NOWERR,	TYPE_BYTE,	0,	1,				1,	1,	&drive->nowerr,			set_nowerr);
 	ide_add_setting(drive,	"breada_readahead",	SETTING_RW,					BLKRAGET,		BLKRASET,		TYPE_INT,	0,	255,				1,	2,	&read_ahead[major],		NULL);
 	ide_add_setting(drive,	"file_readahead",	SETTING_RW,					BLKFRAGET,		BLKFRASET,		TYPE_INTA,	0,	INT_MAX,			1,	1024,	&max_readahead[major][minor],	NULL);
-	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW,					BLKSECTGET,		BLKSECTSET,		TYPE_INTA,	1,	255,				1,	2,	&max_sectors[major][minor],	NULL);
 	ide_add_setting(drive,	"lun",			SETTING_RW,					-1,			-1,			TYPE_INT,	0,	7,				1,	1,	&drive->lun,			NULL);
 }
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-dma.c	Mon Jan 15 22:08:15 2001
+++ linux/drivers/ide/ide-dma.c	Wed May 16 13:53:03 2001
@@ -213,32 +213,33 @@
 
 static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
 {
-	struct buffer_head *bh;
 	struct scatterlist *sg = hwif->sg_table;
+	struct bio *bio;
 	int nents = 0;
 
 	if (rq->cmd == READ)
 		hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
 	else
 		hwif->sg_dma_direction = PCI_DMA_TODEVICE;
-	bh = rq->bh;
+
+	bio = rq->bio;
 	do {
-		unsigned char *virt_addr = bh->b_data;
-		unsigned int size = bh->b_size;
+		unsigned char *virt_addr = bio_data(bio);
+		unsigned int size = bio_size(bio);
 
 		if (nents >= PRD_ENTRIES)
 			return 0;
 
-		while ((bh = bh->b_reqnext) != NULL) {
-			if ((virt_addr + size) != (unsigned char *) bh->b_data)
+		while ((bio = bio->bi_next) != NULL) {
+			if ((virt_addr + size) != bio_data(bio))
 				break;
-			size += bh->b_size;
+			size += bio_size(bio);
 		}
 		memset(&sg[nents], 0, sizeof(*sg));
 		sg[nents].address = virt_addr;
 		sg[nents].length = size;
 		nents++;
-	} while (bh != NULL);
+	} while (bio);
 
 	return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
 }
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-floppy.c	Fri Feb  9 20:30:23 2001
+++ linux/drivers/ide/ide-floppy.c	Mon May 21 22:58:16 2001
@@ -1522,7 +1522,6 @@
 	ide_add_setting(drive,	"bios_sect",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	63,				1,	1,	&drive->bios_sect,		NULL);
 	ide_add_setting(drive,	"breada_readahead",	SETTING_RW,					BLKRAGET,		BLKRASET,		TYPE_INT,	0,	255,				1,	2,	&read_ahead[major],		NULL);
 	ide_add_setting(drive,	"file_readahead",	SETTING_RW,					BLKFRAGET,		BLKFRASET,		TYPE_INTA,	0,	INT_MAX,			1,	1024,	&max_readahead[major][minor],	NULL);
-	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW,					BLKSECTGET,		BLKSECTSET,		TYPE_INTA,	1,	255,				1,	2,	&max_sectors[major][minor],	NULL);
 
 }
 
@@ -1555,10 +1554,7 @@
 	 */
 
 	if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0)
-	{
-		for (i = 0; i < 1 << PARTN_BITS; i++)
-			max_sectors[major][minor + i] = 64;
-	}
+		blk_queue_max_sectors(&drive->queue, 64);
 
 	(void) idefloppy_get_capacity (drive);
 	idefloppy_add_settings(drive);
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-pci.c linux/drivers/ide/ide-pci.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-pci.c	Sun May 20 14:05:11 2001
+++ linux/drivers/ide/ide-pci.c	Sun May 20 14:08:54 2001
@@ -708,7 +708,7 @@
 				/*
  	 			 * Set up BM-DMA capability (PnP BIOS should have done this)
  	 			 */
-		    		if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530)
+		    		if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530))
 					hwif->autodma = 0;	/* default DMA off if we had to configure it here */
 				(void) pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_MASTER);
 				if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) || !(pcicmd & PCI_COMMAND_MASTER)) {
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide-probe.c	Sun Mar 18 18:25:02 2001
+++ linux/drivers/ide/ide-probe.c	Mon May 21 23:27:49 2001
@@ -597,6 +597,13 @@
 
 	q->queuedata = HWGROUP(drive);
 	blk_init_queue(q, do_ide_request);
+
+	/* IDE can do up to 128K per request. */
+	blk_queue_max_sectors(q, 255);
+#ifdef CONFIG_BLK_DEV_PDC4030
+	if (hwif->chipset == ide_pdc4040)
+		blk_queue_max_sectors(q, 127);
+#endif
 }
 
 /*
@@ -749,7 +756,7 @@
 {
 	struct gendisk *gd, **gdp;
 	unsigned int unit, units, minors;
-	int *bs, *max_sect, *max_ra;
+	int *bs, *max_ra;
 	extern devfs_handle_t ide_devfs_handle;
 
 	/* figure out maximum drive number on the interface */
@@ -762,23 +769,15 @@
 	gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
 	gd->part  = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
 	bs        = kmalloc (minors*sizeof(int), GFP_KERNEL);
-	max_sect  = kmalloc (minors*sizeof(int), GFP_KERNEL);
 	max_ra    = kmalloc (minors*sizeof(int), GFP_KERNEL);
 
 	memset(gd->part, 0, minors * sizeof(struct hd_struct));
 
 	/* cdroms and msdos f/s are examples of non-1024 blocksizes */
 	blksize_size[hwif->major] = bs;
-	max_sectors[hwif->major] = max_sect;
 	max_readahead[hwif->major] = max_ra;
 	for (unit = 0; unit < minors; ++unit) {
 		*bs++ = BLOCK_SIZE;
-#ifdef CONFIG_BLK_DEV_PDC4030
-		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255);
-#else
-		/* IDE can do up to 128K per request. */
-		*max_sect++ = 255;
-#endif
 		*max_ra++ = MAX_READAHEAD;
 	}
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide.c linux/drivers/ide/ide.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/ide/ide.c	Sun May 20 14:05:11 2001
+++ linux/drivers/ide/ide.c	Mon May 21 22:58:16 2001
@@ -1228,7 +1228,7 @@
 			 || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
 			 || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
 			{
-				if( !drive->queue.plugged )
+				if (!blk_queue_plugged(&drive->queue))
 					best = drive;
 			}
 		}
@@ -1344,7 +1344,7 @@
 		drive->sleep = 0;
 		drive->service_start = jiffies;
 
-		if ( drive->queue.plugged )	/* paranoia */
+		if (blk_queue_plugged(&drive->queue)) /* paranoia */
 			printk("%s: Huh? nuking plugged queue\n", drive->name);
 		hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
 		/*
@@ -2057,7 +2057,6 @@
 	 */
 	unregister_blkdev(hwif->major, hwif->name);
 	kfree(blksize_size[hwif->major]);
-	kfree(max_sectors[hwif->major]);
 	kfree(max_readahead[hwif->major]);
 	blk_dev[hwif->major].data = NULL;
 	blk_dev[hwif->major].queue = NULL;
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h
--- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi.h	Sat Apr 28 00:49:19 2001
+++ linux/drivers/scsi/scsi.h	Mon May 21 16:12:25 2001
@@ -389,9 +389,9 @@
 #if defined(__mc68000__) || defined(CONFIG_APUS)
 #include <asm/pgtable.h>
 #define CONTIGUOUS_BUFFERS(X,Y) \
-	(virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data))
+	(virt_to_phys(bio_data(X)+bio_size(X)-1)+1==virt_to_phys(bio_data(Y)))
 #else
-#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data)
+#define CONTIGUOUS_BUFFERS(X,Y) BIO_CONTIG(X, Y)
 #endif
 
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_lib.c	Sun May 20 14:05:16 2001
+++ linux/drivers/scsi/scsi_lib.c	Sat May 19 04:10:33 2001
@@ -360,7 +360,7 @@
 				     int frequeue)
 {
 	struct request *req;
-	struct buffer_head *bh;
+	struct bio *bio;
         Scsi_Device * SDpnt;
 	int nsect;
 
@@ -373,30 +373,31 @@
 		       kdevname(req->rq_dev), req->sector);
 	}
 	do {
-		if ((bh = req->bh) != NULL) {
-			nsect = bh->b_size >> 9;
+		if ((bio = req->bio) != NULL) {
+			nsect = bio_sectors(bio);
 			blk_finished_io(nsect);
-			req->bh = bh->b_reqnext;
+			req->bio = bio->bi_next;
 			req->nr_sectors -= nsect;
 			req->sector += nsect;
-			bh->b_reqnext = NULL;
+			bio->bi_next = NULL;
 			sectors -= nsect;
-			bh->b_end_io(bh, uptodate);
-			if ((bh = req->bh) != NULL) {
-				req->current_nr_sectors = bh->b_size >> 9;
+			bio->bi_flags |= !!uptodate;
+			bio->bi_end_io(bio);
+			if ((bio = req->bio) != NULL) {
+				req->current_nr_sectors = bio_sectors(bio);
 				if (req->nr_sectors < req->current_nr_sectors) {
 					req->nr_sectors = req->current_nr_sectors;
 					printk("scsi_end_request: buffer-list destroyed\n");
 				}
 			}
 		}
-	} while (sectors && bh);
+	} while (sectors && bio);
 
 	/*
 	 * If there are blocks left over at the end, set up the command
 	 * to queue the remainder of them.
 	 */
-	if (req->bh) {
+	if (req->bio) {
                 request_queue_t *q;
 
 		if( !requeue )
@@ -406,7 +407,7 @@
 
                 q = &SCpnt->device->request_queue;
 
-		req->buffer = bh->b_data;
+		req->buffer = bio_data(bio);
 		/*
 		 * Bleah.  Leftovers again.  Stick the leftovers in
 		 * the front of the queue, and goose the queue again.
@@ -834,7 +835,7 @@
 		 * released the lock and grabbed it again, so each time
 		 * we need to check to see if the queue is plugged or not.
 		 */
-		if (SHpnt->in_recovery || q->plugged)
+		if (SHpnt->in_recovery || blk_queue_plugged(q))
 			return;
 
 		/*
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/scsi_merge.c	Fri Feb  9 20:30:23 2001
+++ linux/drivers/scsi/scsi_merge.c	Mon May 21 22:58:16 2001
@@ -81,7 +81,7 @@
 		      int dma_host,
 		      int segments)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 
 	/*
 	 * Dump the information that we have.  We know we have an
@@ -90,12 +90,12 @@
 	printk("nr_segments is %x\n", req->nr_segments);
 	printk("counted segments is %x\n", segments);
 	printk("Flags %d %d\n", use_clustering, dma_host);
-	for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) 
+	for (bio = req->bio; bio->bi_next != NULL; bio = bio->bi_next) 
 	{
 		printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
-		       bh,
-		       bh->b_size >> 9,
-		       virt_to_phys(bh->b_data - 1));
+		       bio,
+		       bio_sectors(bio),
+		       virt_to_phys(bio_data(bio) - 1));
 	}
 	panic("Ththththaats all folks.  Too dangerous to continue.\n");
 }
@@ -191,8 +191,7 @@
 {
 	int ret = 1;
 	int reqsize = 0;
-	struct buffer_head *bh;
-	struct buffer_head *bhnext;
+	struct bio *bio, *bionext;
 
 	if( remainder != NULL ) {
 		reqsize = *remainder;
@@ -201,21 +200,21 @@
 	/*
 	 * Add in the size increment for the first buffer.
 	 */
-	bh = req->bh;
+	bio = req->bio;
 #ifdef DMA_SEGMENT_SIZE_LIMITED
-	if( reqsize + bh->b_size > PAGE_SIZE ) {
+	if( reqsize + bio_size(bio) > PAGE_SIZE ) {
 		ret++;
-		reqsize = bh->b_size;
+		reqsize = bio_size(bio);
 	} else {
-		reqsize += bh->b_size;
+		reqsize += bio_size(bio);
 	}
 #else
-	reqsize += bh->b_size;
+	reqsize += bio_size(bio);
 #endif
 
-	for (bh = req->bh, bhnext = bh->b_reqnext; 
-	     bhnext != NULL; 
-	     bh = bhnext, bhnext = bh->b_reqnext) {
+	for (bio = req->bio, bionext = bio->bi_next; 
+	     bionext != NULL; 
+	     bio = bionext, bionext = bio->bi_next) {
 		if (use_clustering) {
 			/* 
 			 * See if we can do this without creating another
@@ -224,10 +223,10 @@
 			 * the DMA threshold boundary.  
 			 */
 			if (dma_host &&
-			    virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) {
+			    virt_to_phys(bio_data(bionext)) - 1 == ISA_DMA_THRESHOLD) {
 				ret++;
-				reqsize = bhnext->b_size;
-			} else if (CONTIGUOUS_BUFFERS(bh, bhnext)) {
+				reqsize = bio_size(bionext);
+			} else if (CONTIGUOUS_BUFFERS(bio, bionext)) {
 				/*
 				 * This one is OK.  Let it go.
 				 */ 
@@ -242,22 +241,22 @@
 				 * another segment.
 				 */
 				if( dma_host
-				    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD
-				    && reqsize + bhnext->b_size > PAGE_SIZE )
+				    && virt_to_phys(bio_data(bionext)) - 1 >= ISA_DMA_THRESHOLD
+				    && reqsize + bio_size(bionext) > PAGE_SIZE )
 				{
 					ret++;
-					reqsize = bhnext->b_size;
+					reqsize = bio_size(bionext);
 					continue;
 				}
 #endif
-				reqsize += bhnext->b_size;
+				reqsize += bio_size(bionext);
 				continue;
 			}
 			ret++;
-			reqsize = bhnext->b_size;
+			reqsize = bio_size(bionext);
 		} else {
 			ret++;
-			reqsize = bhnext->b_size;
+			reqsize = bio_size(bionext);
 		}
 	}
 	if( remainder != NULL ) {
@@ -304,14 +303,13 @@
 }
 
 #define MERGEABLE_BUFFERS(X,Y) \
-(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \
+(((((long)bio_data((X))+bio_size((X)))|((long)bio_data((Y)))) & \
   (DMA_CHUNK_SIZE - 1)) == 0)
 
 #ifdef DMA_CHUNK_SIZE
 static inline int scsi_new_mergeable(request_queue_t * q,
 				     struct request * req,
-				     struct Scsi_Host *SHpnt,
-				     int max_segments)
+				     struct Scsi_Host *SHpnt)
 {
 	/*
 	 * pci_map_sg will be able to merge these two
@@ -320,7 +318,7 @@
 	 * scsi.c allocates for this purpose
 	 * min(64,sg_tablesize) entries.
 	 */
-	if (req->nr_segments >= max_segments ||
+	if (req->nr_segments >= q->max_segments ||
 	    req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
 	req->nr_segments++;
@@ -329,8 +327,7 @@
 
 static inline int scsi_new_segment(request_queue_t * q,
 				   struct request * req,
-				   struct Scsi_Host *SHpnt,
-				   int max_segments)
+				   struct Scsi_Host *SHpnt)
 {
 	/*
 	 * pci_map_sg won't be able to map these two
@@ -347,11 +344,10 @@
 #else
 static inline int scsi_new_segment(request_queue_t * q,
 				   struct request * req,
-				   struct Scsi_Host *SHpnt,
-				   int max_segments)
+				   struct Scsi_Host *SHpnt)
 {
 	if (req->nr_segments < SHpnt->sg_tablesize &&
-	    req->nr_segments < max_segments) {
+	    req->nr_segments < q->max_segments) {
 		/*
 		 * This will form the start of a new segment.  Bump the 
 		 * counter.
@@ -371,7 +367,7 @@
  *
  * Arguments:   q       - Queue for which we are merging request.
  *              req     - request into which we wish to merge.
- *              bh      - Block which we may wish to merge into request
+ *              bio     - Block which we may wish to merge into request
  *              use_clustering - 1 if this host wishes to use clustering
  *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
  *                      expose all of the address lines, so that DMA cannot
@@ -399,8 +395,7 @@
  */
 __inline static int __scsi_back_merge_fn(request_queue_t * q,
 					 struct request *req,
-					 struct buffer_head *bh,
-					 int max_segments,
+					 struct bio *bio,
 					 int use_clustering,
 					 int dma_host)
 {
@@ -412,9 +407,11 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+	/*
+	 * FIXME: remember to look into this /jens
+	 */
 #ifdef DMA_CHUNK_SIZE
-	if (max_segments > 64)
-		max_segments = 64;
+	q->max_segments = 64;
 #endif
 
 	if (use_clustering) {
@@ -425,16 +422,16 @@
 		 * the DMA threshold boundary.  
 		 */
 		if (dma_host &&
-		    virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+		    virt_to_phys(bio_data(req->biotail)) - 1 == ISA_DMA_THRESHOLD) {
 			goto new_end_segment;
 		}
-		if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) {
+		if (CONTIGUOUS_BUFFERS(req->biotail, bio)) {
 #ifdef DMA_SEGMENT_SIZE_LIMITED
 			if( dma_host
-			    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+			    && virt_to_phys(bio_data(bio)) - 1 >= ISA_DMA_THRESHOLD ) {
 				segment_size = 0;
 				count = __count_segments(req, use_clustering, dma_host, &segment_size);
-				if( segment_size + bh->b_size > PAGE_SIZE ) {
+				if( segment_size + bio_size(bio) > PAGE_SIZE ) {
 					goto new_end_segment;
 				}
 			}
@@ -447,16 +444,15 @@
 	}
  new_end_segment:
 #ifdef DMA_CHUNK_SIZE
-	if (MERGEABLE_BUFFERS(req->bhtail, bh))
-		return scsi_new_mergeable(q, req, SHpnt, max_segments);
+	if (MERGEABLE_BUFFERS(req->biotail, bio))
+		return scsi_new_mergeable(q, req, SHpnt);
 #endif
-	return scsi_new_segment(q, req, SHpnt, max_segments);
+	return scsi_new_segment(q, req, SHpnt);
 }
 
 __inline static int __scsi_front_merge_fn(request_queue_t * q,
 					  struct request *req,
-					  struct buffer_head *bh,
-					  int max_segments,
+					  struct bio *bio,
 					  int use_clustering,
 					  int dma_host)
 {
@@ -469,8 +465,7 @@
 	SHpnt = SDpnt->host;
 
 #ifdef DMA_CHUNK_SIZE
-	if (max_segments > 64)
-		max_segments = 64;
+	q->max_segments = 64;
 #endif
 
 	if (use_clustering) {
@@ -481,14 +476,14 @@
 		 * the DMA threshold boundary. 
 		 */
 		if (dma_host &&
-		    virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) {
+		    virt_to_phys(bio_data(bio)) - 1 == ISA_DMA_THRESHOLD) {
 			goto new_start_segment;
 		}
-		if (CONTIGUOUS_BUFFERS(bh, req->bh)) {
+		if (CONTIGUOUS_BUFFERS(bio, req->bio)) {
 #ifdef DMA_SEGMENT_SIZE_LIMITED
 			if( dma_host
-			    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
-				segment_size = bh->b_size;
+			    && virt_to_phys(bio_data(bio)) - 1 >= ISA_DMA_THRESHOLD ) {
+				segment_size = bio_size(bio);
 				count = __count_segments(req, use_clustering, dma_host, &segment_size);
 				if( count != req->nr_segments ) {
 					goto new_start_segment;
@@ -503,10 +498,10 @@
 	}
  new_start_segment:
 #ifdef DMA_CHUNK_SIZE
-	if (MERGEABLE_BUFFERS(bh, req->bh))
-		return scsi_new_mergeable(q, req, SHpnt, max_segments);
+	if (MERGEABLE_BUFFERS(bio, req->bio))
+		return scsi_new_mergeable(q, req, SHpnt);
 #endif
-	return scsi_new_segment(q, req, SHpnt, max_segments);
+	return scsi_new_segment(q, req, SHpnt);
 }
 
 /*
@@ -516,7 +511,7 @@
  *
  * Arguments:   q       - Queue for which we are merging request.
  *              req     - request into which we wish to merge.
- *              bh      - Block which we may wish to merge into request
+ *              bio     - Block which we may wish to merge into request
  *
  * Returns:     1 if it is OK to merge the block into the request.  0
  *              if it is not OK.
@@ -529,15 +524,13 @@
 #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA)		\
 static int _FUNCTION(request_queue_t * q,				\
 		     struct request * req,				\
-		     struct buffer_head * bh,				\
-		     int max_segments)					\
+		     struct bio *bio)					\
 {									\
     int ret;								\
     SANITY_CHECK(req, _CLUSTER, _DMA);					\
     ret =  __scsi_ ## _BACK_FRONT ## _merge_fn(q,			\
 					       req,			\
-					       bh,			\
-					       max_segments,		\
+					       bio,			\
 					       _CLUSTER,		\
 					       _DMA);			\
     return ret;								\
@@ -590,7 +583,6 @@
 __inline static int __scsi_merge_requests_fn(request_queue_t * q,
 					     struct request *req,
 					     struct request *next,
-					     int max_segments,
 					     int use_clustering,
 					     int dma_host)
 {
@@ -601,13 +593,12 @@
 	SHpnt = SDpnt->host;
 
 #ifdef DMA_CHUNK_SIZE
-	if (max_segments > 64)
-		max_segments = 64;
+	q->max_segments = 64;
 
 	/* If it would not fit into prepared memory space for sg chain,
 	 * then don't allow the merge.
 	 */
-	if (req->nr_segments + next->nr_segments - 1 > max_segments ||
+	if (req->nr_segments + next->nr_segments - 1 > q->max_segments ||
 	    req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
 		return 0;
 	}
@@ -636,7 +627,7 @@
 		 * the DMA threshold boundary.  
 		 */
 		if (dma_host &&
-		    virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+		    virt_to_phys(bio_data(req->biotail)) - 1 == ISA_DMA_THRESHOLD) {
 			goto dont_combine;
 		}
 #ifdef DMA_SEGMENT_SIZE_LIMITED
@@ -645,8 +636,8 @@
 		 * buffers in chunks of PAGE_SIZE or less.
 		 */
 		if (dma_host
-		    && CONTIGUOUS_BUFFERS(req->bhtail, next->bh)
-		    && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD )
+		    && CONTIGUOUS_BUFFERS(req->biotail, next->bio)
+		    && virt_to_phys(bio_data(req->biotail)) - 1 >= ISA_DMA_THRESHOLD )
 		{
 			int segment_size = 0;
 			int count = 0;
@@ -658,7 +649,7 @@
 			}
 		}
 #endif
-		if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) {
+		if (CONTIGUOUS_BUFFERS(req->biotail, next->bio)) {
 			/*
 			 * This one is OK.  Let it go.
 			 */
@@ -671,7 +662,7 @@
 	}
       dont_combine:
 #ifdef DMA_CHUNK_SIZE
-	if (req->nr_segments + next->nr_segments > max_segments ||
+	if (req->nr_segments + next->nr_segments > q->max_segments ||
 	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 		return 0;
 	}
@@ -679,7 +670,7 @@
 	 * first segment in next, then the check for hw segments was
 	 * done above already, so we can always merge.
 	 */
-	if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) {
+	if (MERGEABLE_BUFFERS (req->biotail, next->bio)) {
 		req->nr_hw_segments += next->nr_hw_segments - 1;
 	} else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) {
 		return 0;
@@ -694,7 +685,7 @@
 	 * Make sure we can fix something that is the sum of the two.
 	 * A slightly stricter test than we had above.
 	 */
-	if (req->nr_segments + next->nr_segments > max_segments ||
+	if (req->nr_segments + next->nr_segments > q->max_segments ||
 	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 		return 0;
 	} else {
@@ -715,7 +706,7 @@
  *
  * Arguments:   q       - Queue for which we are merging request.
  *              req     - request into which we wish to merge.
- *              bh      - Block which we may wish to merge into request
+ *              bio     - Block which we may wish to merge into request
  *
  * Returns:     1 if it is OK to merge the block into the request.  0
  *              if it is not OK.
@@ -728,12 +719,11 @@
 #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA)		\
 static int _FUNCTION(request_queue_t * q,		\
 		     struct request * req,		\
-		     struct request * next,		\
-		     int max_segments)			\
+		     struct request * next)		\
 {							\
     int ret;						\
     SANITY_CHECK(req, _CLUSTER, _DMA);			\
-    ret =  __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
+    ret =  __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \
     return ret;						\
 }
 
@@ -781,8 +771,8 @@
 			      int use_clustering,
 			      int dma_host)
 {
-	struct buffer_head * bh;
-	struct buffer_head * bhprev;
+	struct bio	   * bio;
+	struct bio	   * bioprev;
 	char		   * buff;
 	int		     count;
 	int		     i;
@@ -797,7 +787,7 @@
 	 * needed any more.  Need to play with it and see if we hit the
 	 * panic.  If not, then don't bother.
 	 */
-	if (!SCpnt->request.bh) {
+	if (!SCpnt->request.bio) {
 		/* 
 		 * Case of page request (i.e. raw device), or unlinked buffer 
 		 * Typically used for swapping, but this isn't how we do
@@ -870,15 +860,15 @@
 	memset(sgpnt, 0, SCpnt->sglist_len);
 	SCpnt->request_buffer = (char *) sgpnt;
 	SCpnt->request_bufflen = 0;
-	bhprev = NULL;
+	bioprev = NULL;
 
-	for (count = 0, bh = SCpnt->request.bh;
-	     bh; bh = bh->b_reqnext) {
-		if (use_clustering && bhprev != NULL) {
+	for (count = 0, bio = SCpnt->request.bio;
+	     bio; bio = bio->bi_next) {
+		if (use_clustering && bioprev != NULL) {
 			if (dma_host &&
-			    virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
+			    virt_to_phys(bio_data(bioprev)) - 1 == ISA_DMA_THRESHOLD) {
 				/* Nothing - fall through */
-			} else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
+			} else if (CONTIGUOUS_BUFFERS(bioprev, bio)) {
 				/*
 				 * This one is OK.  Let it go.  Note that we
 				 * do not have the ability to allocate
@@ -887,32 +877,32 @@
 				 */
 				if( dma_host ) {
 #ifdef DMA_SEGMENT_SIZE_LIMITED
-					if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD
-					    || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) {
-						sgpnt[count - 1].length += bh->b_size;
-						bhprev = bh;
+					if( virt_to_phys(bio_data(bio)) - 1 < ISA_DMA_THRESHOLD
+					    || sgpnt[count - 1].length + bio_size(bio) <= PAGE_SIZE ) {
+						sgpnt[count - 1].length += bio_size(bio);
+						bioprev = bio;
 						continue;
 					}
 #else
-					sgpnt[count - 1].length += bh->b_size;
-					bhprev = bh;
+					sgpnt[count - 1].length += bio_size(bio);
+					bioprev = bio;
 					continue;
 #endif
 				} else {
-					sgpnt[count - 1].length += bh->b_size;
-					SCpnt->request_bufflen += bh->b_size;
-					bhprev = bh;
+					sgpnt[count - 1].length += bio_size(bio);
+					SCpnt->request_bufflen += bio_size(bio);
+					bioprev = bio;
 					continue;
 				}
 			}
 		}
 		count++;
-		sgpnt[count - 1].address = bh->b_data;
-		sgpnt[count - 1].length += bh->b_size;
+		sgpnt[count - 1].address = bio_data(bio);
+		sgpnt[count - 1].length += bio_size(bio);
 		if (!dma_host) {
-			SCpnt->request_bufflen += bh->b_size;
+			SCpnt->request_bufflen += bio_size(bio);
 		}
-		bhprev = bh;
+		bioprev = bio;
 	}
 
 	/*
@@ -1014,14 +1004,14 @@
 	 * single-block requests if we had hundreds of free sectors.
 	 */
 	if( scsi_dma_free_sectors > 30 ) {
-		for (this_count = 0, bh = SCpnt->request.bh;
-		     bh; bh = bh->b_reqnext) {
+		for (this_count = 0, bio = SCpnt->request.bio;
+		     bio; bio = bio->bi_next) {
 			if( scsi_dma_free_sectors - this_count < 30 
 			    || this_count == sectors )
 			{
 				break;
 			}
-			this_count += bh->b_size >> 9;
+			this_count += bio_sectors(bio);
 		}
 
 	} else {
@@ -1041,7 +1031,7 @@
 	 * segment.  Possibly the entire request, or possibly a small
 	 * chunk of the entire request.
 	 */
-	bh = SCpnt->request.bh;
+	bio = SCpnt->request.bio;
 	buff = SCpnt->request.buffer;
 
 	if (dma_host) {
@@ -1050,7 +1040,7 @@
 		 * back and allocate a really small one - enough to satisfy
 		 * the first buffer.
 		 */
-		if (virt_to_phys(SCpnt->request.bh->b_data)
+		if (virt_to_phys(bio_data(SCpnt->request.bio))
 		    + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
 			buff = (char *) scsi_malloc(this_count << 9);
 			if (!buff) {
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sd.c linux/drivers/scsi/sd.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sd.c	Sun May 20 14:05:16 2001
+++ linux/drivers/scsi/sd.c	Wed May 16 14:20:54 2001
@@ -588,8 +588,8 @@
 			(SCpnt->sense_buffer[4] << 16) |
 			(SCpnt->sense_buffer[5] << 8) |
 			SCpnt->sense_buffer[6];
-			if (SCpnt->request.bh != NULL)
-				block_sectors = SCpnt->request.bh->b_size >> 9;
+			if (SCpnt->request.bio != NULL)
+				block_sectors = bio_sectors(SCpnt->request.bio);
 			switch (SCpnt->device->sector_size) {
 			case 1024:
 				error_sector <<= 1;
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr.c linux/drivers/scsi/sr.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr.c	Sun May 20 14:05:16 2001
+++ linux/drivers/scsi/sr.c	Wed May 16 14:21:07 2001
@@ -218,8 +218,8 @@
 		(SCpnt->sense_buffer[4] << 16) |
 		(SCpnt->sense_buffer[5] << 8) |
 		SCpnt->sense_buffer[6];
-		if (SCpnt->request.bh != NULL)
-			block_sectors = SCpnt->request.bh->b_size >> 9;
+		if (SCpnt->request.bio != NULL)
+			block_sectors = bio_sectors(SCpnt->request.bio);
 		if (block_sectors < 4)
 			block_sectors = 4;
 		if (scsi_CDs[device_nr].device->sector_size == 2048)
@@ -262,7 +262,7 @@
 static int sr_scatter_pad(Scsi_Cmnd *SCpnt, int s_size)
 {
 	struct scatterlist *sg, *old_sg = NULL;
-	int i, fsize, bsize, sg_ent;
+	int i, fsize, bsize, sg_ent, sg_count;
 	char *front, *back;
 
 	back = front = NULL;
@@ -290,17 +290,24 @@
 	/*
 	 * extend or allocate new scatter-gather table
 	 */
-	if (SCpnt->use_sg)
+	sg_count = SCpnt->use_sg;
+	if (sg_count)
 		old_sg = (struct scatterlist *) SCpnt->request_buffer;
 	else {
-		SCpnt->use_sg = 1;
+		sg_count = 1;
 		sg_ent++;
 	}
 
-	SCpnt->sglist_len = ((sg_ent * sizeof(struct scatterlist)) + 511) & ~511;
-	if ((sg = scsi_malloc(SCpnt->sglist_len)) == NULL)
+	i = ((sg_ent * sizeof(struct scatterlist)) + 511) & ~511;
+	if ((sg = scsi_malloc(i)) == NULL)
 		goto no_mem;
 
+	/*
+	 * no more failing memory allocs possible, we can safely assign
+	 * SCpnt values now
+	 */
+	SCpnt->sglist_len = i;
+	SCpnt->use_sg = sg_count;
 	memset(sg, 0, SCpnt->sglist_len);
 
 	i = 0;
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr_ioctl.c linux/drivers/scsi/sr_ioctl.c
--- /opt/kernel/linux-2.4.5-pre4/drivers/scsi/sr_ioctl.c	Fri Dec 29 23:07:22 2000
+++ linux/drivers/scsi/sr_ioctl.c	Tue May 15 15:14:24 2001
@@ -530,6 +530,8 @@
 	target = MINOR(cdi->dev);
 
 	switch (cmd) {
+	case BLKGETSIZE:
+		return put_user(scsi_CDs[target].capacity >> 1, (long *) arg);
 	case BLKROSET:
 	case BLKROGET:
 	case BLKRASET:
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/Makefile linux/fs/Makefile
--- /opt/kernel/linux-2.4.5-pre4/fs/Makefile	Sat Mar  3 00:16:59 2001
+++ linux/fs/Makefile	Mon May 21 23:09:44 2001
@@ -7,11 +7,11 @@
 
 O_TARGET := fs.o
 
-export-objs :=	filesystems.o dcache.o
+export-objs :=	filesystems.o dcache.o bio.o
 mod-subdirs :=	nls
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
-		super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+		bio.o super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
 		filesystems.o
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/bio.c linux/fs/bio.c
--- /opt/kernel/linux-2.4.5-pre4/fs/bio.c	Thu Jan  1 01:00:00 1970
+++ linux/fs/bio.c	Mon May 21 23:29:04 2001
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or mo
+ * it under the terms of the GNU General Public License as publishe
+ * the Free Software Foundation; either version 2 of the License, o
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/iobuf.h>
+
+kmem_cache_t *bio_cachep, *biovec_cachep;
+
+struct bio *bio_alloc(int gfp_mask, int loop)
+{
+	struct bio *bio;
+
+	do {
+		bio = kmem_cache_alloc(bio_cachep, gfp_mask);
+		if (bio)
+			break;
+
+		wakeup_bdflush(1);
+	} while (loop);
+
+	return bio;
+}
+
+void bio_free(struct bio *bio)
+{
+	kmem_cache_free(bio_cachep, bio);
+}
+
+struct bio_vec *biovec_alloc(int gfp_mask, int loop)
+{
+	struct bio_vec *biov;
+
+	do {
+		biov = kmem_cache_alloc(biovec_cachep, gfp_mask);
+		if (biov)
+			break;
+
+		wakeup_bdflush(1);
+	} while (loop);
+
+	return biov;
+}
+
+void biovec_free(struct bio_vec *biov)
+{
+	kmem_cache_free(biovec_cachep, biov);
+}
+
+static int bio_end_io_page(struct bio *bio)
+{
+	struct page *page = bio_page(bio);
+
+	if (!(bio->bi_flags & BIO_UPTODATE))
+		SetPageError(page);
+	if (!PageError(page))
+		SetPageUptodate(page);
+
+	/*
+	 * Run the hooks that have to be done when a
+	 * page I/O has completed.
+	 */
+	if (PageTestandClearDecrAfter(page))
+		atomic_dec(&nr_async_pages);
+
+	UnlockPage(page);
+	bio_free(bio);
+	return 1;
+}
+
+void bio_end_io_kio(struct bio *bio)
+{
+	struct kiobuf *kio = bio->bi_private;
+
+	end_kio_request(kio, bio->bi_flags & BIO_UPTODATE);
+	bio_free(bio);
+}
+
+void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block)
+{
+	struct bio *bio;
+	int i, bs, offset;
+
+	if ((rw & WRITE) && is_read_only(dev)) {
+		printk("ll_rw_bio: WRITE to ro dev %s\n", kdevname(dev));
+		kio->errno = -EPERM;
+		return;
+	}
+
+	if (blksize_size[MAJOR(dev)])
+		bs = blksize_size[MAJOR(dev)][MINOR(dev)];
+	else
+		bs = BLOCK_SIZE;
+
+	offset = kio->offset & ~PAGE_MASK;
+
+	for (i = 0; i < kio->nr_pages; i++, block++) {
+		bio = bio_alloc(GFP_BUFFER, 1);
+
+		bio->bi_dev = dev;
+		bio->bi_sector = block * (bs >> 9);
+
+		bio->bi_io_vec.bv_page = kio->maplist[i];
+		bio->bi_io_vec.bv_len = PAGE_CACHE_SIZE - offset;
+		bio->bi_io_vec.bv_offset = offset;
+
+		bio->bi_end_io = bio_end_io_kio;
+		bio->bi_private = kio;
+
+		/*
+		 * kiobuf only has an offset into the first page
+		 */
+		offset = 0;
+
+		atomic_inc(&kio->io_count);
+		submit_bio(rw, bio);
+	}
+}
+
+static void bio_init(void *foo, kmem_cache_t *cachep, unsigned long flg)
+{
+	if ((flg & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		struct bio *bio = foo;
+
+		bio->bi_next = NULL;
+		bio->bi_flags = 0;
+		bio->bi_end_io = NULL;
+	}
+}
+
+static int __init init_bio(void)
+{
+	bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0,
+					SLAB_HWCACHE_ALIGN, bio_init, NULL);
+	if (!bio_cachep)
+		panic("bio: can't create bio_cachep slab cache\n");
+
+	biovec_cachep = kmem_cache_create("biovec", sizeof(struct bio_vec), 0,
+					  SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!biovec_cachep)
+		panic("bio: can't create biovec_cachep slab cache\n");
+
+	return 0;
+}
+
+module_init(init_bio);
+
+EXPORT_SYMBOL(bio_alloc);
+EXPORT_SYMBOL(bio_free);
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/buffer.c linux/fs/buffer.c
--- /opt/kernel/linux-2.4.5-pre4/fs/buffer.c	Sun May 20 14:05:16 2001
+++ linux/fs/buffer.c	Mon May 21 17:52:28 2001
@@ -1967,57 +1967,6 @@
 }
 
 /*
- * IO completion routine for a buffer_head being used for kiobuf IO: we
- * can't dispatch the kiobuf callback until io_count reaches 0.  
- */
-
-static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
-{
-	struct kiobuf *kiobuf;
-	
-	mark_buffer_uptodate(bh, uptodate);
-
-	kiobuf = bh->b_private;
-	unlock_buffer(bh);
-	end_kio_request(kiobuf, uptodate);
-}
-
-/*
- * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
- * for them to complete.  Clean up the buffer_heads afterwards.  
- */
-
-static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
-{
-	int iosize, err;
-	int i;
-	struct buffer_head *tmp;
-
-	iosize = 0;
-	err = 0;
-
-	for (i = nr; --i >= 0; ) {
-		iosize += size;
-		tmp = bh[i];
-		if (buffer_locked(tmp)) {
-			wait_on_buffer(tmp);
-		}
-		
-		if (!buffer_uptodate(tmp)) {
-			/* We are traversing bh'es in reverse order so
-                           clearing iosize on error calculates the
-                           amount of IO before the first error. */
-			iosize = 0;
-			err = -EIO;
-		}
-	}
-	
-	if (iosize)
-		return iosize;
-	return err;
-}
-
-/*
  * Start I/O on a physical range of kernel memory, defined by a vector
  * of kiobuf structs (much like a user-space iovec list).
  *
@@ -2032,18 +1981,11 @@
 int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
 	       kdev_t dev, unsigned long b[], int size)
 {
-	int		err;
-	int		length;
 	int		transferred;
 	int		i;
-	int		bufind;
 	int		pageind;
-	int		bhind;
-	int		offset;
-	unsigned long	blocknr;
-	struct kiobuf *	iobuf = NULL;
+	struct kiobuf *	iobuf;
 	struct page *	map;
-	struct buffer_head *tmp, **bhs = NULL;
 
 	if (!nr)
 		return 0;
@@ -2058,88 +2000,31 @@
 			return -EINVAL;
 		if (!iobuf->nr_pages)
 			panic("brw_kiovec: iobuf not initialised");
+		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
+			map = iobuf->maplist[pageind];
+			if (!map)
+				return -EFAULT;
+		}
 	}
 
 	/* 
 	 * OK to walk down the iovec doing page IO on each page we find. 
 	 */
-	bufind = bhind = transferred = err = 0;
+	transferred = 0;
 	for (i = 0; i < nr; i++) {
 		iobuf = iovec[i];
-		offset = iobuf->offset;
-		length = iobuf->length;
 		iobuf->errno = 0;
-		if (!bhs)
-			bhs = iobuf->bh;
-		
-		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
-			map  = iobuf->maplist[pageind];
-			if (!map) {
-				err = -EFAULT;
-				goto finished;
-			}
-			
-			while (length > 0) {
-				blocknr = b[bufind++];
-				tmp = bhs[bhind++];
-
-				tmp->b_dev = B_FREE;
-				tmp->b_size = size;
-				set_bh_page(tmp, map, offset);
-				tmp->b_this_page = tmp;
-
-				init_buffer(tmp, end_buffer_io_kiobuf, iobuf);
-				tmp->b_dev = dev;
-				tmp->b_blocknr = blocknr;
-				tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req);
-
-				if (rw == WRITE) {
-					set_bit(BH_Uptodate, &tmp->b_state);
-					clear_bit(BH_Dirty, &tmp->b_state);
-				} else
-					set_bit(BH_Uptodate, &tmp->b_state);
-
-				length -= size;
-				offset += size;
-
-				atomic_inc(&iobuf->io_count);
-
-				submit_bh(rw, tmp);
-				/* 
-				 * Wait for IO if we have got too much 
-				 */
-				if (bhind >= KIO_MAX_SECTORS) {
-					kiobuf_wait_for_io(iobuf); /* wake-one */
-					err = wait_kio(rw, bhind, bhs, size);
-					if (err >= 0)
-						transferred += err;
-					else
-						goto finished;
-					bhind = 0;
-				}
-				
-				if (offset >= PAGE_SIZE) {
-					offset = 0;
-					break;
-				}
-			} /* End of block loop */
-		} /* End of page loop */		
-	} /* End of iovec loop */
-
-	/* Is there any IO still left to submit? */
-	if (bhind) {
-		kiobuf_wait_for_io(iobuf); /* wake-one */
-		err = wait_kio(rw, bhind, bhs, size);
-		if (err >= 0)
-			transferred += err;
-		else
-			goto finished;
+		transferred += iobuf->length;
+		ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9));
 	}
 
- finished:
-	if (transferred)
-		return transferred;
-	return err;
+	/*
+	 * now they are all submitted, wait for completion
+	 */
+	for (i = 0; i < nr; i++)
+		kiobuf_wait_for_io(iovec[i]);
+
+	return transferred;
 }
 
 /*
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/fs/iobuf.c linux/fs/iobuf.c
--- /opt/kernel/linux-2.4.5-pre4/fs/iobuf.c	Fri Apr 27 23:23:25 2001
+++ linux/fs/iobuf.c	Mon May 21 17:33:55 2001
@@ -8,7 +8,6 @@
 
 #include <linux/iobuf.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 
 void end_kio_request(struct kiobuf *kiobuf, int uptodate)
 {
@@ -26,52 +25,23 @@
 {
 	memset(iobuf, 0, sizeof(*iobuf));
 	init_waitqueue_head(&iobuf->wait_queue);
+	atomic_set(&iobuf->io_count, 0);
 	iobuf->array_len = KIO_STATIC_PAGES;
 	iobuf->maplist   = iobuf->map_array;
 }
 
-int alloc_kiobuf_bhs(struct kiobuf * kiobuf)
-{
-	int i;
-
-	for (i = 0; i < KIO_MAX_SECTORS; i++)
-		if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) {
-			while (i--) {
-				kmem_cache_free(bh_cachep, kiobuf->bh[i]);
-				kiobuf->bh[i] = NULL;
-			}
-			return -ENOMEM;
-		}
-	return 0;
-}
-
-void free_kiobuf_bhs(struct kiobuf * kiobuf)
-{
-	int i;
-
-	for (i = 0; i < KIO_MAX_SECTORS; i++) {
-		kmem_cache_free(bh_cachep, kiobuf->bh[i]);
-		kiobuf->bh[i] = NULL;
-	}
-}
-
 int alloc_kiovec(int nr, struct kiobuf **bufp)
 {
 	int i;
 	struct kiobuf *iobuf;
 	
 	for (i = 0; i < nr; i++) {
-		iobuf = vmalloc(sizeof(struct kiobuf));
+		iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL);
 		if (!iobuf) {
 			free_kiovec(i, bufp);
 			return -ENOMEM;
 		}
 		kiobuf_init(iobuf);
- 		if (alloc_kiobuf_bhs(iobuf)) {
-			vfree(iobuf);
- 			free_kiovec(i, bufp);
- 			return -ENOMEM;
- 		}
 		bufp[i] = iobuf;
 	}
 	
@@ -89,8 +59,7 @@
 			unlock_kiovec(1, &iobuf);
 		if (iobuf->array_len > KIO_STATIC_PAGES)
 			kfree (iobuf->maplist);
-		free_kiobuf_bhs(iobuf);
-		vfree(bufp[i]);
+		kfree(bufp[i]);
 	}
 }
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/bio.h linux/include/linux/bio.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/bio.h	Thu Jan  1 01:00:00 1970
+++ linux/include/linux/bio.h	Mon May 21 15:41:53 2001
@@ -0,0 +1,92 @@
+/*
+ * New 2.5 block I/O model
+ *
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or mo
+ * it under the terms of the GNU General Public License as publishe
+ * the Free Software Foundation; either version 2 of the License, o
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#ifndef __LINUX_BIO_H
+#define __LINUX_BIO_H
+
+/*
+ * transition to 64-bit sector_t, possibly making it an option...
+ */
+#undef BLK_64BIT_SECTOR
+
+#ifdef BLK_64BIT_SECTOR
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+
+struct bio_vec {
+	struct page	*bv_page;
+	unsigned short	bv_len;
+	unsigned short	bv_offset;
+};
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers)
+ */
+struct bio {
+	kdev_t			bi_dev;
+	sector_t		bi_sector;
+	struct bio		*bi_next;	/* request queue link */
+	struct bio_vec		bi_io_vec;
+	unsigned long		bi_flags;	/* status, command, etc */
+	void (*bi_end_io)(struct bio *bio);
+	void			*bi_private;
+};
+
+#define BIO_SECTOR_BITS	9
+#define BIO_OFFSET_MASK	((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1)
+#define BIO_PAGE_MASK	(PAGE_CACHE_SIZE - 1)
+
+/*
+ * end I/O flags
+ */
+#define BIO_UPTODATE	1	/* ok after I/O completion */
+#define BIO_READ	2	/* read request */
+#define BIO_WRITE	4	/* write request */
+#define BIO_RW_AHEAD	8	/* read/write ahead */
+#define BIO_EOF		16	/* out-out-bounds error */
+#define BIO_BARRIER	32	/* barrier I/O */
+
+#define bio_barrier(bio)	((bio)->bi_flags & BIO_BARRIER)
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define bio_iovec(bio)	(&(bio)->bi_io_vec)
+#define bio_page(bio)	bio_iovec((bio))->bv_page
+#define bio_size(bio)	bio_iovec((bio))->bv_len
+#define bio_offset(bio)	bio_iovec((bio))->bv_offset
+#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS)
+#define bio_data(bio)	(page_address(bio_page((bio))) + bio_offset((bio)))
+
+#define BIO_CONTIG(bio, nxt)			\
+	bio_page((bio)) == bio_page((nxt)) &&   \
+	bio_offset((bio)) + bio_size((bio)) == bio_offset((nxt))
+
+typedef void (bi_end_io_t) (struct bio *);
+
+extern struct bio *bio_alloc(int, int);
+extern void bio_free(struct bio *);
+extern struct bio_vec *biovec_alloc(int, int);
+extern void biovec_free(struct bio_vec *);
+
+#endif /* __LINUX_BIO_H */
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/blk.h linux/include/linux/blk.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/blk.h	Sat Apr 28 00:49:25 2001
+++ linux/include/linux/blk.h	Mon May 21 23:26:38 2001
@@ -87,6 +87,14 @@
 
 static inline void blkdev_dequeue_request(struct request * req)
 {
+	request_queue_t *q = req->q;
+
+	if (q) {
+		elevator_t *e = &q->elevator;
+		if (e->last_merge == req)
+			e->last_merge = NULL;
+	}
+
 	list_del(&req->queue);
 }
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/blkdev.h linux/include/linux/blkdev.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/blkdev.h	Sat Apr 28 00:48:49 2001
+++ linux/include/linux/blkdev.h	Mon May 21 23:26:27 2001
@@ -42,25 +42,19 @@
 	void * special;
 	char * buffer;
 	struct semaphore * sem;
-	struct buffer_head * bh;
-	struct buffer_head * bhtail;
+	struct bio *bio, *biotail;
 	request_queue_t *q;
 };
 
 #include <linux/elevator.h>
 
-typedef int (merge_request_fn) (request_queue_t *q, 
-				struct request  *req,
-				struct buffer_head *bh,
-				int);
-typedef int (merge_requests_fn) (request_queue_t *q, 
-				 struct request  *req,
-				 struct request  *req2,
-				 int);
+typedef int (merge_request_fn) (request_queue_t *, struct request *,
+				struct bio *);
+typedef int (merge_requests_fn) (request_queue_t *, struct request *,
+				 struct request *);
 typedef void (request_fn_proc) (request_queue_t *q);
 typedef request_queue_t * (queue_proc) (kdev_t dev);
-typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh);
-typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
+typedef int (make_request_fn) (request_queue_t *q, int rw, struct bio *bio);
 typedef void (unplug_device_fn) (void *q);
 
 /*
@@ -89,7 +83,7 @@
 	merge_request_fn	* front_merge_fn;
 	merge_requests_fn	* merge_requests_fn;
 	make_request_fn		* make_request_fn;
-	plug_device_fn		* plug_device_fn;
+
 	/*
 	 * The queue owner gets to use this for whatever they like.
 	 * ll_rw_blk doesn't touch it.
@@ -97,20 +91,19 @@
 	void			* queuedata;
 
 	/*
-	 * This is used to remove the plug when tq_disk runs.
+	 * queue needs bounce pages for pages above this limit (phys addr)
 	 */
-	struct tq_struct	plug_tq;
+	struct page		*bounce_limit;
 
 	/*
-	 * Boolean that indicates whether this queue is plugged or not.
+	 * This is used to remove the plug when tq_disk runs.
 	 */
-	char			plugged;
+	struct tq_struct	plug_tq;
 
 	/*
-	 * Boolean that indicates whether current_request is active or
-	 * not.
+	 * various queue flags, see QUEUE_* below
 	 */
-	char			head_active;
+	unsigned long		queue_flags;
 
 	/*
 	 * Is meant to protect the queue in the future instead of
@@ -121,9 +114,37 @@
 	/*
 	 * Tasks wait here for free request
 	 */
-	wait_queue_head_t	wait_for_request;
+	wait_queue_head_t	wait_for_request[2];
+
+	/*
+	 * queue settings
+	 */
+	unsigned int		max_sectors;
+	unsigned int		max_segments;
 };
 
+#define QUEUE_FLAG_PLUGGED	0	/* queue is plugged */
+#define QUEUE_FLAG_HEADACTIVE	1	/* has active head (going away) */
+
+#define blk_queue_flag(q, flag)	test_bit(QUEUE_FLAG_##flag, &(q)->queue_flags)
+#define blk_set_flag(q, flag)	set_bit(QUEUE_FLAG_##flag, &(q)->queue_flags)
+#define blk_clear_flag(q, flag)	clear_bit(QUEUE_FLAG_##flag, &(q)->queue_flags)
+
+#define blk_queue_plugged(q)	blk_queue_flag(q, PLUGGED)
+#define blk_queue_headlive(q)	blk_queue_flag(q, HEADACTIVE)
+
+#define blk_mark_plugged(q)	blk_set_flag(q, PLUGGED)
+#define blk_mark_headactive(q)	blk_set_flag(q, HEADACTIVE)
+
+#define blk_set_unplugged(q)	test_and_clear_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
+#define blk_set_plugged(q)	test_and_set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
+
+#if CONFIG_HIGHMEM
+#define blk_queue_bounce(q, bio)	create_bounce((q), (bio))
+#else
+#define blk_queue_bounce(q, bio) 	(bio)
+#endif
+
 struct blk_dev_struct {
 	/*
 	 * queue_proc has to be atomic
@@ -150,10 +171,11 @@
 extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
 extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size);
 extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
-extern void generic_make_request(int rw, struct buffer_head * bh);
-extern request_queue_t *blk_get_queue(kdev_t dev);
-extern inline request_queue_t *__blk_get_queue(kdev_t dev);
+extern void generic_make_request(int rw, struct bio *bio);
+extern inline request_queue_t *blk_get_queue(kdev_t dev);
 extern void blkdev_release_request(struct request *);
+extern inline void blk_wake_queue(request_queue_t *);
+extern void blk_attempt_remerge(request_queue_t *, struct request *);
 
 /*
  * Access functions for manipulating queue properties
@@ -162,6 +184,8 @@
 extern void blk_cleanup_queue(request_queue_t *);
 extern void blk_queue_headactive(request_queue_t *, int);
 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void blk_queue_bounce_limit(request_queue_t *, struct page *);
+extern void blk_queue_max_sectors(request_queue_t *q, int);
 extern void generic_unplug_device(void *);
 
 extern int * blk_size[MAX_BLKDEV];
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/elevator.h linux/include/linux/elevator.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/elevator.h	Fri Feb 16 01:58:34 2001
+++ linux/include/linux/elevator.h	Mon May 21 22:58:16 2001
@@ -5,8 +5,8 @@
 			    struct list_head *,
 			    struct list_head *, int);
 
-typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
-				 struct buffer_head *, int, int);
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
+				 struct list_head *, struct bio *, int);
 
 typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
 
@@ -21,14 +21,16 @@
 	elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
 
+	struct request *last_merge;
+
 	unsigned int queue_ID;
 };
 
-int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *, int);
 void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
 void elevator_noop_merge_req(struct request *, struct request *);
 
-int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *, int);
 void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
 void elevator_linus_merge_req(struct request *, struct request *);
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/fs.h linux/include/linux/fs.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/fs.h	Sun May 20 14:05:17 2001
+++ linux/include/linux/fs.h	Mon May 21 15:42:31 2001
@@ -21,6 +21,7 @@
 #include <linux/cache.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <linux/bio.h>
 
 #include <asm/atomic.h>
 #include <asm/bitops.h>
@@ -234,26 +235,25 @@
 	kdev_t b_dev;			/* device (B_FREE = free) */
 
 	atomic_t b_count;		/* users using this block */
-	kdev_t b_rdev;			/* Real device */
 	unsigned long b_state;		/* buffer state bitmap (see above) */
 	unsigned long b_flushtime;	/* Time when (dirty) buffer should be written */
 
 	struct buffer_head *b_next_free;/* lru/free list linkage */
 	struct buffer_head *b_prev_free;/* doubly linked list of buffers */
 	struct buffer_head *b_this_page;/* circular list of buffers in one page */
-	struct buffer_head *b_reqnext;	/* request queue */
-
 	struct buffer_head **b_pprev;	/* doubly linked list of hash-queue */
 	char * b_data;			/* pointer to data block */
 	struct page *b_page;		/* the page this bh is mapped to */
-	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completio
+n */
  	void *b_private;		/* reserved for b_end_io */
 
-	unsigned long b_rsector;	/* Real buffer location on disk */
 	wait_queue_head_t b_wait;
 
 	struct inode *	     b_inode;
 	struct list_head     b_inode_buffers;	/* doubly linked list of inode dirty buffers */
+
+	struct bio *b_bio;		/* allocated on I/O to/from buffer */
 };
 
 typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
@@ -1075,10 +1075,13 @@
 static inline void buffer_IO_error(struct buffer_head * bh)
 {
 	mark_buffer_clean(bh);
+
 	/*
-	 * b_end_io has to clear the BH_Uptodate bitflag in the error case!
+	 * b_end_io has to clear the BH_Uptodate bitflag in the read error
+	 * case, however buffer contents are not necessarily bad if a
+	 * write fails
 	 */
-	bh->b_end_io(bh, 0);
+	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
 }
 
 extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *);
@@ -1243,6 +1246,7 @@
 extern struct buffer_head * getblk(kdev_t, int, int);
 extern void ll_rw_block(int, int, struct buffer_head * bh[]);
 extern void submit_bh(int, struct buffer_head *);
+extern void submit_bio(int, struct bio *);
 extern int is_read_only(kdev_t);
 extern void __brelse(struct buffer_head *);
 static inline void brelse(struct buffer_head *buf)
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/highmem.h linux/include/linux/highmem.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/highmem.h	Sat Apr 28 00:48:31 2001
+++ linux/include/linux/highmem.h	Mon May 21 23:26:38 2001
@@ -2,6 +2,7 @@
 #define _LINUX_HIGHMEM_H
 
 #include <linux/config.h>
+#include <linux/blkdev.h>
 #include <asm/pgalloc.h>
 
 #ifdef CONFIG_HIGHMEM
@@ -13,7 +14,7 @@
 /* declarations for linux/mm/highmem.c */
 FASTCALL(unsigned int nr_free_highpages(void));
 
-extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig);
+extern struct bio * create_bounce(request_queue_t *, struct bio * bio_orig);
 
 
 static inline char *bh_kmap(struct buffer_head *bh)
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/iobuf.h linux/include/linux/iobuf.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/iobuf.h	Sat Apr 28 00:48:51 2001
+++ linux/include/linux/iobuf.h	Mon May 21 16:10:47 2001
@@ -26,7 +26,6 @@
 
 #define KIO_MAX_ATOMIC_IO	512 /* in kb */
 #define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
-#define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
 
 /* The main kiobuf struct used for all our IO! */
 
@@ -48,8 +47,6 @@
 	
 	/* Always embed enough struct pages for atomic IO */
 	struct page *	map_array[KIO_STATIC_PAGES];
-	struct buffer_head * bh[KIO_MAX_SECTORS];
-	unsigned long blocks[KIO_MAX_SECTORS];
 
 	/* Dynamic state for IO completion: */
 	atomic_t	io_count;	/* IOs still in progress */
@@ -82,5 +79,8 @@
 
 int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
 		   kdev_t dev, unsigned long b[], int size);
+
+/* fs/bio.c */
+void	ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block);
 
 #endif /* __LINUX_IOBUF_H */
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/loop.h linux/include/linux/loop.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/loop.h	Wed Mar  7 04:35:36 2001
+++ linux/include/linux/loop.h	Mon May 21 22:58:16 2001
@@ -49,8 +49,8 @@
 	int		old_gfp_mask;
 
 	spinlock_t		lo_lock;
-	struct buffer_head	*lo_bh;
-	struct buffer_head	*lo_bhtail;
+	struct bio 		*lo_bio;
+	struct bio		*lo_biotail;
 	int			lo_state;
 	struct semaphore	lo_sem;
 	struct semaphore	lo_ctl_mutex;
@@ -77,6 +77,7 @@
  */
 #define LO_FLAGS_DO_BMAP	1
 #define LO_FLAGS_READ_ONLY	2
+#define LO_FLAGS_BH_REMAP	4
 
 /* 
  * Note that this structure gets the wrong offsets when directly used
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/include/linux/slab.h linux/include/linux/slab.h
--- /opt/kernel/linux-2.4.5-pre4/include/linux/slab.h	Sat Apr 28 00:48:37 2001
+++ linux/include/linux/slab.h	Mon May 21 16:03:07 2001
@@ -74,6 +74,8 @@
 extern kmem_cache_t	*bh_cachep;
 extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*sigact_cachep;
+extern kmem_cache_t	*bio_cachep;
+extern kmem_cache_t	*biovec_cachep;
 
 #endif	/* __KERNEL__ */
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/kernel/ksyms.c linux/kernel/ksyms.c
--- /opt/kernel/linux-2.4.5-pre4/kernel/ksyms.c	Sun May 20 14:05:17 2001
+++ linux/kernel/ksyms.c	Mon May 21 23:04:55 2001
@@ -299,7 +299,6 @@
 EXPORT_SYMBOL(tq_disk);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(refile_buffer);
-EXPORT_SYMBOL(max_sectors);
 EXPORT_SYMBOL(max_readahead);
 EXPORT_SYMBOL(file_moveto);
 
diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.5-pre4/mm/highmem.c linux/mm/highmem.c
--- /opt/kernel/linux-2.4.5-pre4/mm/highmem.c	Fri Apr 27 23:23:25 2001
+++ linux/mm/highmem.c	Mon May 21 02:48:28 2001
@@ -164,126 +164,96 @@
  * This will be moved to the block layer in 2.5.
  */
 
-static inline void copy_from_high_bh (struct buffer_head *to,
-			 struct buffer_head *from)
+static inline void copy_from_high_bio(struct bio *to, struct bio *from)
 {
-	struct page *p_from;
 	char *vfrom;
 	unsigned long flags;
 
-	p_from = from->b_page;
-
 	/*
 	 * Since this can be executed from IRQ context, reentrance
 	 * on the same CPU must be avoided:
 	 */
 	__save_flags(flags);
 	__cli();
-	vfrom = kmap_atomic(p_from, KM_BOUNCE_WRITE);
-	memcpy(to->b_data, vfrom + bh_offset(from), to->b_size);
+	vfrom = kmap_atomic(bio_page(from), KM_BOUNCE_WRITE);
+	 memcpy(bio_data(to), vfrom + bio_offset(from), bio_size(to));
 	kunmap_atomic(vfrom, KM_BOUNCE_WRITE);
 	__restore_flags(flags);
 }
 
-static inline void copy_to_high_bh_irq (struct buffer_head *to,
-			 struct buffer_head *from)
+static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 {
-	struct page *p_to;
 	char *vto;
 	unsigned long flags;
 
-	p_to = to->b_page;
 	__save_flags(flags);
 	__cli();
-	vto = kmap_atomic(p_to, KM_BOUNCE_READ);
-	memcpy(vto + bh_offset(to), from->b_data, to->b_size);
+	vto = kmap_atomic(bio_page(to), KM_BOUNCE_READ);
+	memcpy(vto + bio_offset(to), bio_data(from), bio_size(to));
 	kunmap_atomic(vto, KM_BOUNCE_READ);
 	__restore_flags(flags);
 }
 
-static inline void bounce_end_io (struct buffer_head *bh, int uptodate)
+static inline void bounce_end_io (struct bio *bio)
 {
-	struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private);
+	struct bio *bio_orig = bio->bi_private;
 
-	bh_orig->b_end_io(bh_orig, uptodate);
-	__free_page(bh->b_page);
-#ifdef HIGHMEM_DEBUG
-	/* Don't clobber the constructed slab cache */
-	init_waitqueue_head(&bh->b_wait);
-#endif
-	kmem_cache_free(bh_cachep, bh);
+	bio_orig->bi_end_io(bio_orig);
+	__free_page(bio_page(bio));
+	bio_free(bio);
 }
 
-static void bounce_end_io_write (struct buffer_head *bh, int uptodate)
+static void bounce_end_io_write (struct bio *bio)
 {
-	bounce_end_io(bh, uptodate);
+	bounce_end_io(bio);
 }
 
-static void bounce_end_io_read (struct buffer_head *bh, int uptodate)
+static void bounce_end_io_read (struct bio *bio)
 {
-	struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private);
+	struct bio *bio_orig = bio->bi_private;
+
+	/*
+	 * was this particular page out-of-reach originally?
+	 */
+	if (bio->bi_flags & BIO_UPTODATE)
+		copy_to_high_bio_irq(bio_orig, bio);
 
-	if (uptodate)
-		copy_to_high_bh_irq(bh_orig, bh);
-	bounce_end_io(bh, uptodate);
+	bounce_end_io(bio);
 }
 
-struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig)
+struct bio *create_bounce(request_queue_t *q, struct bio *bio_orig)
 {
 	struct page *page;
-	struct buffer_head *bh;
+	struct bio *bio;
 
-	if (!PageHighMem(bh_orig->b_page))
-		return bh_orig;
+	if (bio_page(bio_orig) < q->bounce_limit)
+		return bio_orig;
 
-repeat_bh:
-	bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER);
-	if (!bh) {
-		wakeup_bdflush(1);  /* Sets task->state to TASK_RUNNING */
-		goto repeat_bh;
-	}
-	/*
-	 * This is wasteful for 1k buffers, but this is a stopgap measure
-	 * and we are being ineffective anyway. This approach simplifies
-	 * things immensly. On boxes with more than 4GB RAM this should
-	 * not be an issue anyway.
-	 */
-repeat_page:
-	page = alloc_page(GFP_BUFFER);
-	if (!page) {
-		wakeup_bdflush(1);  /* Sets task->state to TASK_RUNNING */
-		goto repeat_page;
-	}
-	set_bh_page(bh, page, 0);
-
-	bh->b_next = NULL;
-	bh->b_blocknr = bh_orig->b_blocknr;
-	bh->b_size = bh_orig->b_size;
-	bh->b_list = -1;
-	bh->b_dev = bh_orig->b_dev;
-	bh->b_count = bh_orig->b_count;
-	bh->b_rdev = bh_orig->b_rdev;
-	bh->b_state = bh_orig->b_state;
-#ifdef HIGHMEM_DEBUG
-	bh->b_flushtime = jiffies;
-	bh->b_next_free = NULL;
-	bh->b_prev_free = NULL;
-	/* bh->b_this_page */
-	bh->b_reqnext = NULL;
-	bh->b_pprev = NULL;
-#endif
-	/* bh->b_page */
-	if (rw == WRITE) {
-		bh->b_end_io = bounce_end_io_write;
-		copy_from_high_bh(bh, bh_orig);
+	bio = bio_alloc(GFP_BUFFER, 1);
+
+	do {
+		page = alloc_page(GFP_BUFFER);
+		if (page)
+			break;
+
+		wakeup_bdflush(1);	/* Sets task->state to TASK_RUNNING */
+	} while (1);
+
+	bio->bi_sector = bio_orig->bi_sector;
+	bio->bi_dev = bio_orig->bi_dev;
+	bio->bi_private = bio_orig;
+	bio->bi_flags = bio_orig->bi_flags;
+
+	bio->bi_io_vec.bv_page = page;
+	bio->bi_io_vec.bv_len = bio_size(bio_orig);
+	bio->bi_io_vec.bv_offset = bio_offset(bio_orig);
+
+	if (bio->bi_flags & WRITE) {
+		bio->bi_end_io = bounce_end_io_write;
+		copy_from_high_bio(bio, bio_orig);
 	} else
-		bh->b_end_io = bounce_end_io_read;
-	bh->b_private = (void *)bh_orig;
-	bh->b_rsector = bh_orig->b_rsector;
-#ifdef HIGHMEM_DEBUG
-	memset(&bh->b_wait, -1, sizeof(bh->b_wait));
-#endif
+		bio->bi_end_io = bounce_end_io_read;
 
-	return bh;
+	return bio;
 }