diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/CREDITS linux/CREDITS
--- /opt/kernel/linux-2.4.7/CREDITS	Wed Jul  4 23:41:33 2001
+++ linux/CREDITS	Wed Jan  1 00:07:23 1997
@@ -140,9 +140,11 @@
 D: VIA MVP-3/TX Pro III chipset IDE
 
 N: Jens Axboe
-E: axboe@image.dk
-D: Linux CD-ROM maintainer
-D: jiffies wrap fixes + schedule timeouts depending on HZ == 100
+E: axboe@suse.de
+D: Linux CD-ROM maintainer, DVD support
+D: elevator + block layer rewrites
+D: highmem I/O support
+D: misc hacking on IDE, SCSI, block drivers, etc
 S: Peter Bangs Vej 258, 2TH
 S: 2500 Valby
 S: Denmark
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/Documentation/Configure.help linux/Documentation/Configure.help
--- /opt/kernel/linux-2.4.7/Documentation/Configure.help	Fri Jul 20 02:48:15 2001
+++ linux/Documentation/Configure.help	Wed Jan  1 00:07:23 1997
@@ -5520,17 +5520,6 @@
   Documentation/scsi.txt. The module will be called sg.o. If unsure,
   say N.
 
-Debug new queueing code for SCSI
-CONFIG_SCSI_DEBUG_QUEUES
-  This option turns on a lot of additional consistency checking for
-  the new queueing code. This will adversely affect performance, but
-  it is likely that bugs will be caught sooner if this is turned on.
-  This will typically cause the kernel to panic if an error is
-  detected, but it would have probably crashed if the panic weren't
-  there. Comments/questions/problems to linux-scsi mailing list
-  please. See http://www.andante.org/scsi_queue.html for more
-  up-to-date information.
-
 Probe all LUNs on each SCSI device
 CONFIG_SCSI_MULTI_LUN
   If you have a SCSI device that supports more than one LUN (Logical
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/acorn/block/mfmhd.c linux/drivers/acorn/block/mfmhd.c
--- /opt/kernel/linux-2.4.7/drivers/acorn/block/mfmhd.c	Sat Apr 28 20:27:53 2001
+++ linux/drivers/acorn/block/mfmhd.c	Tue Jul 24 15:04:44 2001
@@ -932,26 +932,25 @@
 		dev = MINOR(CURRENT->rq_dev);
 		block = CURRENT->sector;
 		nsect = CURRENT->nr_sectors;
-#ifdef DEBUG
-		/*if ((dev>>6)==1) */ console_printf("mfm_request:                                raw vals: dev=%d (block=512 bytes) block=%d nblocks=%d\n", dev, block, nsect);
-#endif
-		if (dev >= (mfm_drives << 6) ||
-		    block >= mfm[dev].nr_sects || ((block+nsect) > mfm[dev].nr_sects)) {
-			if (dev >= (mfm_drives << 6))
-				printk("mfm: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev));
+
+		if (dev >= (mfm_drives << 6) || (dev & 0x3f) ||
+		    block >= mfm[dev].nr_sects ||
+		    (block+nsect > mfm[dev].nr_sects)) {
+			if (dev >= (mfm_drives << 6) || (dev & 0x3f))
+				printk("mfm: bad minor number: device=%s\n",
+				       kdevname(CURRENT->rq_dev));
 			else
-				printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", (dev >> 6)+'a',
-				       block, nsect, mfm[dev].nr_sects);
+				printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n",
+				       (dev >> 6)+'a', block, nsect,
+				       mfm[dev].nr_sects);
 			printk("mfm: continue 1\n");
 			end_request(0);
 			Busy = 0;
 			continue;
 		}
 
-		block += mfm[dev].start_sect;
-
-		/* DAG: Linux doesn't cope with this - even though it has an array telling
-		   it the hardware block size - silly */
+		/* DAG: Linux doesn't cope with this - even though it has
+		   an array telling it the hardware block size - silly */
 		block <<= 1;	/* Now in 256 byte sectors */
 		nsect <<= 1;	/* Ditto */
 
@@ -1180,22 +1179,21 @@
 
 static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg)
 {
-	struct hd_geometry *geo = (struct hd_geometry *) arg;
 	kdev_t dev;
-	int device, major, minor, err;
+	int device;
 
 	if (!inode || !(dev = inode->i_rdev))
 		return -EINVAL;
 
-	major = MAJOR(dev);
-	minor = MINOR(dev);
-
-	device = DEVICE_NR(MINOR(inode->i_rdev)), err;
+	device = DEVICE_NR(MINOR(inode->i_rdev));
 	if (device >= mfm_drives)
 		return -EINVAL;
 
 	switch (cmd) {
 	case HDIO_GETGEO:
+	{
+		struct hd_geometry *geo = (struct hd_geometry *) arg;
+
 		if (!arg)
 			return -EINVAL;
 		if (put_user (mfm_info[device].heads, &geo->heads))
@@ -1204,31 +1202,21 @@
 			return -EFAULT;
 		if (put_user (mfm_info[device].cylinders, &geo->cylinders))
 			return -EFAULT;
-		if (put_user (mfm[minor].start_sect, &geo->start))
+		if (put_user (get_start_sect(inode->i_rdev), &geo->start))
 			return -EFAULT;
 		return 0;
-
-	case BLKGETSIZE:
-		return put_user (mfm[minor].nr_sects, (long *)arg);
-
-	case BLKFRASET:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		max_readahead[major][minor] = arg;
-		return 0;
-
-	case BLKFRAGET:
-		return put_user(max_readahead[major][minor], (long *) arg);
-
-	case BLKSECTGET:
-		return put_user(max_sectors[major][minor], (long *) arg);
+	}
 
 	case BLKRRPART:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		return mfm_reread_partitions(dev);
 
+	case BLKGETSIZE:
+	case BLKSECTGET:
 	case BLKFLSBUF:
+	case BLKFRASET:
+	case BLKFRAGET:
 	case BLKROSET:
 	case BLKROGET:
 	case BLKRASET:
@@ -1294,8 +1282,10 @@
 		if ((heads < 1) || (mfm_info[drive].cylinders > 1024)) {
 			printk("mfm%c: Insane disc shape! Setting to 512/4/32\n",'a' + (dev >> 6));
 
-			/* These values are fairly arbitary, but are there so that if your
-			 * lucky you can pick apart your disc to find out what is going on -
+			/* 
+			 * These values are fairly arbitary, but are there so
+			 * that if you're lucky you can pick apart your disc
+			 * to find out what is going on -
 			 * I reckon these figures won't hurt MOST drives
 			 */
 			mfm_info[drive].sectors = 32;
@@ -1306,7 +1296,8 @@
 			mfm_specify ();
 		mfm_geometry (drive);
 		mfm[drive << 6].start_sect = 0;
-		mfm[drive << 6].nr_sects = mfm_info[drive].cylinders * mfm_info[drive].heads * mfm_info[drive].sectors / 2;
+		mfm[drive << 6].nr_sects = mfm_info[drive].cylinders
+			* mfm_info[drive].heads * mfm_info[drive].sectors / 2;
 	}
 }
 
@@ -1347,7 +1338,8 @@
 				mfm_drives == 1 ? "" : "s");
 	mfm_gendisk.nr_real = mfm_drives;
 
-	if (request_irq(mfm_irq, mfm_interrupt_handler, SA_INTERRUPT, "MFM harddisk", NULL))
+	if (request_irq(mfm_irq, mfm_interrupt_handler, SA_INTERRUPT,
+			"MFM harddisk", NULL))
 		printk("mfm: unable to get IRQ%d\n", mfm_irq);
 
 	if (mfm_irqenable)
@@ -1450,10 +1442,7 @@
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
 	read_ahead[MAJOR_NR] = 8;	/* 8 sector (4kB?) read ahread */
 
-#ifndef MODULE
-	mfm_gendisk.next = gendisk_head;
-	gendisk_head = &mfm_gendisk;
-#endif
+	add_gendisk(&mfm_gendisk);
 
 	Busy = 0;
 	lastspecifieddrive = -1;
@@ -1469,8 +1458,9 @@
  */
 static int mfm_reread_partitions(kdev_t dev)
 {
-	unsigned int start, i, maxp, target = DEVICE_NR(MINOR(dev));
+	unsigned int target = DEVICE_NR(MINOR(dev));
 	unsigned long flags;
+	int res;
 
 	save_flags_cli(flags);
 	if (mfm_info[target].busy || mfm_info[target].access_count > 1) {
@@ -1480,24 +1470,19 @@
 	mfm_info[target].busy = 1;
 	restore_flags (flags);
 
-	maxp = mfm_gendisk.max_p;
-	start = target << mfm_gendisk.minor_shift;
-
-	for (i = maxp - 1; i >= 0; i--) {
-		int minor = start + i;
-		invalidate_device (MKDEV(MAJOR_NR, minor), 1);
-		mfm_gendisk.part[minor].start_sect = 0;
-		mfm_gendisk.part[minor].nr_sects = 0;
-	}
+	res = wipe_partitions(dev);
+	if (res)
+		goto leave;
 
 	/* Divide by 2, since sectors are 2 times smaller than usual ;-) */
 
-	grok_partitions(&mfm_gendisk, target, 1<<6, mfm_info[target].heads *
+	grok_partitions(dev, mfm_info[target].heads *
 		    mfm_info[target].cylinders * mfm_info[target].sectors / 2);
 
+leave:
 	mfm_info[target].busy = 0;
 	wake_up (&mfm_wait_open);
-	return 0;
+	return res;
 }
 
 #ifdef MODULE
@@ -1512,6 +1497,7 @@
 		outw (0, mfm_irqenable);	/* Required to enable IRQs from MFM podule */
 	free_irq(mfm_irq, NULL);
 	unregister_blkdev(MAJOR_NR, "mfm");
+	del_gendisk(&mfm_gendisk);
 	if (ecs)
 		ecard_release(ecs);
 	if (mfm_addr)
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/DAC960.c linux/drivers/block/DAC960.c
--- /opt/kernel/linux-2.4.7/drivers/block/DAC960.c	Sat Apr 28 20:27:53 2001
+++ linux/drivers/block/DAC960.c	Tue Jul 24 15:04:44 2001
@@ -29,6 +29,7 @@
 #include <linux/blk.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
+#include <linux/genhd.h>
 #include <linux/hdreg.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
@@ -40,6 +41,7 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/pci.h>
+#include <linux/completion.h>
 #include <asm/io.h>
 #include <asm/segment.h>
 #include <asm/uaccess.h>
@@ -300,9 +302,9 @@
 
 static void DAC960_WaitForCommand(DAC960_Controller_T *Controller)
 {
-  spin_unlock_irq(&io_request_lock);
+  spin_unlock_irq(&Controller->RequestQueue->queue_lock);
   __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands);
-  spin_lock_irq(&io_request_lock);
+  spin_lock_irq(&Controller->RequestQueue->queue_lock);
 }
 
 
@@ -484,14 +486,14 @@
 static void DAC960_ExecuteCommand(DAC960_Command_T *Command)
 {
   DAC960_Controller_T *Controller = Command->Controller;
-  DECLARE_MUTEX_LOCKED(Semaphore);
+  DECLARE_COMPLETION(Wait);
   unsigned long ProcessorFlags;
-  Command->Semaphore = &Semaphore;
+  Command->Waiting = &Wait;
   DAC960_AcquireControllerLock(Controller, &ProcessorFlags);
   DAC960_QueueCommand(Command);
   DAC960_ReleaseControllerLock(Controller, &ProcessorFlags);
   if (in_interrupt()) return;
-  down(&Semaphore);
+  wait_for_completion(&Wait);
 }
 
 
@@ -1316,7 +1318,7 @@
 						 *Controller)
 {
   DAC960_V1_DCDB_T DCDBs[DAC960_V1_MaxChannels], *DCDB;
-  Semaphore_T Semaphores[DAC960_V1_MaxChannels], *Semaphore;
+  Completion_T Wait[DAC960_V1_MaxChannels], *wait;
   unsigned long ProcessorFlags;
   int Channel, TargetID;
   for (TargetID = 0; TargetID < Controller->Targets; TargetID++)
@@ -1327,12 +1329,12 @@
 	  DAC960_SCSI_Inquiry_T *InquiryStandardData =
 	    &Controller->V1.InquiryStandardData[Channel][TargetID];
 	  InquiryStandardData->PeripheralDeviceType = 0x1F;
-	  Semaphore = &Semaphores[Channel];
-	  init_MUTEX_LOCKED(Semaphore);
+	  wait = &Wait[Channel];
+	  init_completion(wait);
 	  DCDB = &DCDBs[Channel];
 	  DAC960_V1_ClearCommand(Command);
 	  Command->CommandType = DAC960_ImmediateCommand;
-	  Command->Semaphore = Semaphore;
+	  Command->Waiting = wait;
 	  Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB;
 	  Command->V1.CommandMailbox.Type3.BusAddress = Virtual_to_Bus32(DCDB);
 	  DCDB->Channel = Channel;
@@ -1363,11 +1365,11 @@
 	  DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
 	    &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID];
 	  InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
-	  Semaphore = &Semaphores[Channel];
-	  down(Semaphore);
+	  wait = &Wait[Channel];
+	  wait_for_completion(wait);
 	  if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion)
 	    continue;
-	  Command->Semaphore = Semaphore;
+	  Command->Waiting = wait;
 	  DCDB = &DCDBs[Channel];
 	  DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
 	  DCDB->BusAddress = Virtual_to_Bus32(InquiryUnitSerialNumber);
@@ -1381,7 +1383,7 @@
 	  DAC960_AcquireControllerLock(Controller, &ProcessorFlags);
 	  DAC960_QueueCommand(Command);
 	  DAC960_ReleaseControllerLock(Controller, &ProcessorFlags);
-	  down(Semaphore);
+	  wait_for_completion(wait);
 	}
     }
   return true;
@@ -1804,76 +1806,6 @@
 
 
 /*
-  DAC960_BackMergeFunction is the Back Merge Function for the DAC960 driver.
-*/
-
-static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue,
-				    IO_Request_T *Request,
-				    BufferHeader_T *BufferHeader,
-				    int MaxSegments)
-{
-  DAC960_Controller_T *Controller =
-    (DAC960_Controller_T *) RequestQueue->queuedata;
-  if (Request->bhtail->b_data + Request->bhtail->b_size == BufferHeader->b_data)
-    return true;
-  if (Request->nr_segments < MaxSegments &&
-      Request->nr_segments < Controller->DriverScatterGatherLimit)
-    {
-      Request->nr_segments++;
-      return true;
-    }
-  return false;
-}
-
-
-/*
-  DAC960_FrontMergeFunction is the Front Merge Function for the DAC960 driver.
-*/
-
-static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue,
-				     IO_Request_T *Request,
-				     BufferHeader_T *BufferHeader,
-				     int MaxSegments)
-{
-  DAC960_Controller_T *Controller =
-    (DAC960_Controller_T *) RequestQueue->queuedata;
-  if (BufferHeader->b_data + BufferHeader->b_size == Request->bh->b_data)
-    return true;
-  if (Request->nr_segments < MaxSegments &&
-      Request->nr_segments < Controller->DriverScatterGatherLimit)
-    {
-      Request->nr_segments++;
-      return true;
-    }
-  return false;
-}
-
-
-/*
-  DAC960_MergeRequestsFunction is the Merge Requests Function for the
-  DAC960 driver.
-*/
-
-static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue,
-					IO_Request_T *Request,
-					IO_Request_T *NextRequest,
-					int MaxSegments)
-{
-  DAC960_Controller_T *Controller =
-    (DAC960_Controller_T *) RequestQueue->queuedata;
-  int TotalSegments = Request->nr_segments + NextRequest->nr_segments;
-  if (Request->bhtail->b_data + Request->bhtail->b_size
-      == NextRequest->bh->b_data)
-    TotalSegments--;
-  if (TotalSegments > MaxSegments ||
-      TotalSegments > Controller->DriverScatterGatherLimit)
-    return false;
-  Request->nr_segments = TotalSegments;
-  return true;
-}
-
-
-/*
   DAC960_RegisterBlockDevice registers the Block Device structures
   associated with Controller.
 */
@@ -1881,7 +1813,6 @@
 static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
 {
   int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber;
-  GenericDiskInfo_T *GenericDiskInfo;
   RequestQueue_T *RequestQueue;
   int MinorNumber;
   /*
@@ -1900,25 +1831,20 @@
   RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber);
   blk_init_queue(RequestQueue, DAC960_RequestFunction);
   blk_queue_headactive(RequestQueue, 0);
-  RequestQueue->back_merge_fn = DAC960_BackMergeFunction;
-  RequestQueue->front_merge_fn = DAC960_FrontMergeFunction;
-  RequestQueue->merge_requests_fn = DAC960_MergeRequestsFunction;
   RequestQueue->queuedata = Controller;
+  RequestQueue->max_segments = Controller->DriverScatterGatherLimit;
+  RequestQueue->max_sectors = Controller->MaxBlocksPerCommand;
   Controller->RequestQueue = RequestQueue;
   /*
     Initialize the Disk Partitions array, Partition Sizes array, Block Sizes
     array, and Max Sectors per Request array.
   */
   for (MinorNumber = 0; MinorNumber < DAC960_MinorCount; MinorNumber++)
-    {
       Controller->BlockSizes[MinorNumber] = BLOCK_SIZE;
-      Controller->MaxSectorsPerRequest[MinorNumber] =
-	Controller->MaxBlocksPerCommand;
-    }
+
   Controller->GenericDiskInfo.part = Controller->DiskPartitions;
   Controller->GenericDiskInfo.sizes = Controller->PartitionSizes;
   blksize_size[MajorNumber] = Controller->BlockSizes;
-  max_sectors[MajorNumber] = Controller->MaxSectorsPerRequest;
   /*
     Initialize Read Ahead to 128 sectors.
   */
@@ -1934,15 +1860,10 @@
   Controller->GenericDiskInfo.next = NULL;
   Controller->GenericDiskInfo.fops = &DAC960_BlockDeviceOperations;
   /*
-    Install the Generic Disk Information structure at the end of the list.
+    Install the Generic Disk Information structure.
   */
-  if ((GenericDiskInfo = gendisk_head) != NULL)
-    {
-      while (GenericDiskInfo->next != NULL)
-	GenericDiskInfo = GenericDiskInfo->next;
-      GenericDiskInfo->next = &Controller->GenericDiskInfo;
-    }
-  else gendisk_head = &Controller->GenericDiskInfo;
+  add_gendisk(&Controller->GenericDiskInfo);
+
   /*
     Indicate the Block Device Registration completed successfully,
   */
@@ -1967,27 +1888,16 @@
   */
   blk_cleanup_queue(BLK_DEFAULT_QUEUE(MajorNumber));
   /*
+    Remove the Generic Disk Information structure from the list.
+  */
+  del_gendisk(&Controller->GenericDiskInfo);
+  /*
     Remove the Disk Partitions array, Partition Sizes array, Block Sizes
     array, Max Sectors per Request array, and Max Segments per Request array.
   */
   Controller->GenericDiskInfo.part = NULL;
   Controller->GenericDiskInfo.sizes = NULL;
-  blk_size[MajorNumber] = NULL;
-  blksize_size[MajorNumber] = NULL;
-  max_sectors[MajorNumber] = NULL;
-  /*
-    Remove the Generic Disk Information structure from the list.
-  */
-  if (gendisk_head != &Controller->GenericDiskInfo)
-    {
-      GenericDiskInfo_T *GenericDiskInfo = gendisk_head;
-      while (GenericDiskInfo != NULL &&
-	     GenericDiskInfo->next != &Controller->GenericDiskInfo)
-	GenericDiskInfo = GenericDiskInfo->next;
-      if (GenericDiskInfo != NULL)
-	GenericDiskInfo->next = GenericDiskInfo->next->next;
-    }
-  else gendisk_head = Controller->GenericDiskInfo.next;
+  blk_clear(MajorNumber);
 }
 
 
@@ -2625,23 +2535,24 @@
       CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount;
       while (BufferHeader != NULL)
 	{
-	  if (BufferHeader->b_data == LastDataEndPointer)
+	  if (bio_data(BufferHeader) == LastDataEndPointer)
 	    {
 	      ScatterGatherList[SegmentNumber-1].SegmentByteCount +=
-		BufferHeader->b_size;
-	      LastDataEndPointer += BufferHeader->b_size;
+		bio_size(BufferHeader);
+	      LastDataEndPointer += bio_size(BufferHeader);
 	    }
 	  else
 	    {
 	      ScatterGatherList[SegmentNumber].SegmentDataPointer =
-		Virtual_to_Bus32(BufferHeader->b_data);
+		Virtual_to_Bus32(bio_data(BufferHeader));
 	      ScatterGatherList[SegmentNumber].SegmentByteCount =
-		BufferHeader->b_size;
-	      LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size;
+		bio_size(BufferHeader);
+	      LastDataEndPointer = bio_data(BufferHeader) +
+		bio_size(BufferHeader);
 	      if (SegmentNumber++ > Controller->DriverScatterGatherLimit)
 		panic("DAC960: Scatter/Gather Segment Overflow\n");
 	    }
-	  BufferHeader = BufferHeader->b_reqnext;
+	  BufferHeader = BufferHeader->bi_next;
 	}
       if (SegmentNumber != Command->SegmentCount)
 	panic("DAC960: SegmentNumber != SegmentCount\n");
@@ -2715,23 +2626,24 @@
 				 .ScatterGatherSegments;
       while (BufferHeader != NULL)
 	{
-	  if (BufferHeader->b_data == LastDataEndPointer)
+	  if (bio_data(BufferHeader) == LastDataEndPointer)
 	    {
 	      ScatterGatherList[SegmentNumber-1].SegmentByteCount +=
-		BufferHeader->b_size;
-	      LastDataEndPointer += BufferHeader->b_size;
+		bio_size(BufferHeader);
+	      LastDataEndPointer += bio_size(BufferHeader);
 	    }
 	  else
 	    {
 	      ScatterGatherList[SegmentNumber].SegmentDataPointer =
-		Virtual_to_Bus64(BufferHeader->b_data);
+		Virtual_to_Bus64(bio_data(BufferHeader));
 	      ScatterGatherList[SegmentNumber].SegmentByteCount =
-		BufferHeader->b_size;
-	      LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size;
+		bio_size(BufferHeader);
+	      LastDataEndPointer = bio_data(BufferHeader) +
+		bio_size(BufferHeader);
 	      if (SegmentNumber++ > Controller->DriverScatterGatherLimit)
 		panic("DAC960: Scatter/Gather Segment Overflow\n");
 	    }
-	  BufferHeader = BufferHeader->b_reqnext;
+	  BufferHeader = BufferHeader->bi_next;
 	}
       if (SegmentNumber != Command->SegmentCount)
 	panic("DAC960: SegmentNumber != SegmentCount\n");
@@ -2759,7 +2671,7 @@
   while (true)
     {
       if (list_empty(RequestQueueHead)) return false;
-      Request = blkdev_entry_next_request(RequestQueueHead);
+      Request = elv_next_request(RequestQueue);
       Command = DAC960_AllocateCommand(Controller);
       if (Command != NULL) break;
       if (!WaitForCommand) return false;
@@ -2768,14 +2680,12 @@
   if (Request->cmd == READ)
     Command->CommandType = DAC960_ReadCommand;
   else Command->CommandType = DAC960_WriteCommand;
-  Command->Semaphore = Request->sem;
+  Command->Waiting = Request->waiting;
   Command->LogicalDriveNumber = DAC960_LogicalDriveNumber(Request->rq_dev);
-  Command->BlockNumber =
-    Request->sector
-    + Controller->GenericDiskInfo.part[MINOR(Request->rq_dev)].start_sect;
+  Command->BlockNumber = Request->sector;
   Command->BlockCount = Request->nr_sectors;
   Command->SegmentCount = Request->nr_segments;
-  Command->BufferHeader = Request->bh;
+  Command->BufferHeader = Request->bio;
   Command->RequestBuffer = Request->buffer;
   blkdev_dequeue_request(Request);
   blkdev_release_request(Request);
@@ -2828,8 +2738,10 @@
 static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader,
 						 boolean SuccessfulIO)
 {
-  blk_finished_io(BufferHeader->b_size >> 9);
-  BufferHeader->b_end_io(BufferHeader, SuccessfulIO);
+  if (SuccessfulIO)
+    BufferHeader->bi_flags |= BIO_UPTODATE;
+  blk_finished_io(bio_sectors(BufferHeader));
+  BufferHeader->bi_end_io(BufferHeader);
 }
 
 
@@ -2883,13 +2795,13 @@
 	       Controller, Controller->ControllerNumber,
 	       Command->LogicalDriveNumber, Command->BlockNumber,
 	       Command->BlockNumber + Command->BlockCount - 1);
-  if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0)
+  if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0)
     DAC960_Error("  /dev/rd/c%dd%dp%d: relative blocks %d..%d\n",
 		 Controller, Controller->ControllerNumber,
 		 Command->LogicalDriveNumber,
-		 DAC960_PartitionNumber(Command->BufferHeader->b_rdev),
-		 Command->BufferHeader->b_rsector,
-		 Command->BufferHeader->b_rsector + Command->BlockCount - 1);
+		 DAC960_PartitionNumber(Command->BufferHeader->bi_dev),
+		 Command->BufferHeader->bi_sector,
+		 Command->BufferHeader->bi_sector + Command->BlockCount - 1);
 }
 
 
@@ -2916,25 +2828,25 @@
 	  */
 	  while (BufferHeader != NULL)
 	    {
-	      BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext;
-	      BufferHeader->b_reqnext = NULL;
+	      BufferHeader_T *NextBufferHeader = BufferHeader->bi_next;
+	      BufferHeader->bi_next = NULL;
 	      DAC960_ProcessCompletedBuffer(BufferHeader, true);
 	      BufferHeader = NextBufferHeader;
 	    }
 	  /*
 	    Wake up requestor for swap file paging requests.
 	  */
-	  if (Command->Semaphore != NULL)
+	  if (Command->Waiting)
 	    {
-	      up(Command->Semaphore);
-	      Command->Semaphore = NULL;
+	      complete(Command->Waiting);
+	      Command->Waiting = NULL;
 	    }
 	  add_blkdev_randomness(DAC960_MAJOR + Controller->ControllerNumber);
 	}
       else if ((CommandStatus == DAC960_V1_IrrecoverableDataError ||
 		CommandStatus == DAC960_V1_BadDataEncountered) &&
 	       BufferHeader != NULL &&
-	       BufferHeader->b_reqnext != NULL)
+	       BufferHeader->bi_next != NULL)
 	{
 	  DAC960_V1_CommandMailbox_T *CommandMailbox =
 	    &Command->V1.CommandMailbox;
@@ -2948,10 +2860,10 @@
 	      Command->CommandType = DAC960_WriteRetryCommand;
 	      CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write;
 	    }
-	  Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits;
+	  Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits;
 	  CommandMailbox->Type5.LD.TransferLength = Command->BlockCount;
 	  CommandMailbox->Type5.BusAddress =
-	    Virtual_to_Bus32(BufferHeader->b_data);
+	    Virtual_to_Bus32(bio_data(BufferHeader));
 	  DAC960_QueueCommand(Command);
 	  return;
 	}
@@ -2964,26 +2876,23 @@
 	  */
 	  while (BufferHeader != NULL)
 	    {
-	      BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext;
-	      BufferHeader->b_reqnext = NULL;
+	      BufferHeader_T *NextBufferHeader = BufferHeader->bi_next;
+	      BufferHeader->bi_next = NULL;
 	      DAC960_ProcessCompletedBuffer(BufferHeader, false);
 	      BufferHeader = NextBufferHeader;
 	    }
-	  /*
-	    Wake up requestor for swap file paging requests.
-	  */
-	  if (Command->Semaphore != NULL)
+	  if (Command->Waiting)
 	    {
-	      up(Command->Semaphore);
-	      Command->Semaphore = NULL;
+	      complete(Command->Waiting);
+	      Command->Waiting = NULL;
 	    }
 	}
     }
   else if (CommandType == DAC960_ReadRetryCommand ||
 	   CommandType == DAC960_WriteRetryCommand)
     {
-      BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext;
-      BufferHeader->b_reqnext = NULL;
+      BufferHeader_T *NextBufferHeader = BufferHeader->bi_next;
+      BufferHeader->bi_next = NULL;
       /*
 	Perform completion processing for this single buffer.
       */
@@ -3000,14 +2909,14 @@
 	  DAC960_V1_CommandMailbox_T *CommandMailbox =
 	    &Command->V1.CommandMailbox;
 	  Command->BlockNumber +=
-	    BufferHeader->b_size >> DAC960_BlockSizeBits;
+	    bio_size(BufferHeader) >> DAC960_BlockSizeBits;
 	  Command->BlockCount =
-	    NextBufferHeader->b_size >> DAC960_BlockSizeBits;
+	    bio_size(NextBufferHeader) >> DAC960_BlockSizeBits;
 	  Command->BufferHeader = NextBufferHeader;
 	  CommandMailbox->Type5.LD.TransferLength = Command->BlockCount;
 	  CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber;
 	  CommandMailbox->Type5.BusAddress =
-	    Virtual_to_Bus32(NextBufferHeader->b_data);
+	    Virtual_to_Bus32(bio_data(NextBufferHeader));
 	  DAC960_QueueCommand(Command);
 	  return;
 	}
@@ -3589,8 +3498,8 @@
     }
   if (CommandType == DAC960_ImmediateCommand)
     {
-      up(Command->Semaphore);
-      Command->Semaphore = NULL;
+      complete(Command->Waiting);
+      Command->Waiting = NULL;
       return;
     }
   if (CommandType == DAC960_QueuedCommand)
@@ -3666,13 +3575,13 @@
 	       Controller, Controller->ControllerNumber,
 	       Command->LogicalDriveNumber, Command->BlockNumber,
 	       Command->BlockNumber + Command->BlockCount - 1);
-  if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0)
+  if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0)
     DAC960_Error("  /dev/rd/c%dd%dp%d: relative blocks %d..%d\n",
 		 Controller, Controller->ControllerNumber,
 		 Command->LogicalDriveNumber,
-		 DAC960_PartitionNumber(Command->BufferHeader->b_rdev),
-		 Command->BufferHeader->b_rsector,
-		 Command->BufferHeader->b_rsector + Command->BlockCount - 1);
+		 DAC960_PartitionNumber(Command->BufferHeader->bi_dev),
+		 Command->BufferHeader->bi_sector,
+		 Command->BufferHeader->bi_sector + Command->BlockCount - 1);
 }
 
 
@@ -3926,37 +3835,34 @@
 	  */
 	  while (BufferHeader != NULL)
 	    {
-	      BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext;
-	      BufferHeader->b_reqnext = NULL;
+	      BufferHeader_T *NextBufferHeader = BufferHeader->bi_next;
+	      BufferHeader->bi_next = NULL;
 	      DAC960_ProcessCompletedBuffer(BufferHeader, true);
 	      BufferHeader = NextBufferHeader;
 	    }
-	  /*
-	    Wake up requestor for swap file paging requests.
-	  */
-	  if (Command->Semaphore != NULL)
+	  if (Command->Waiting)
 	    {
-	      up(Command->Semaphore);
-	      Command->Semaphore = NULL;
+	      complete(Command->Waiting);
+	      Command->Waiting = NULL;
 	    }
 	  add_blkdev_randomness(DAC960_MAJOR + Controller->ControllerNumber);
 	}
       else if (Command->V2.RequestSense.SenseKey
 	       == DAC960_SenseKey_MediumError &&
 	       BufferHeader != NULL &&
-	       BufferHeader->b_reqnext != NULL)
+	       BufferHeader->bi_next != NULL)
 	{
 	  if (CommandType == DAC960_ReadCommand)
 	    Command->CommandType = DAC960_ReadRetryCommand;
 	  else Command->CommandType = DAC960_WriteRetryCommand;
-	  Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits;
+	  Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits;
 	  CommandMailbox->SCSI_10.CommandControlBits
 				 .AdditionalScatterGatherListMemory = false;
 	  CommandMailbox->SCSI_10.DataTransferSize =
 	    Command->BlockCount << DAC960_BlockSizeBits;
 	  CommandMailbox->SCSI_10.DataTransferMemoryAddress
 				 .ScatterGatherSegments[0].SegmentDataPointer =
-	    Virtual_to_Bus64(BufferHeader->b_data);
+	    Virtual_to_Bus64(bio_data(BufferHeader));
 	  CommandMailbox->SCSI_10.DataTransferMemoryAddress
 				 .ScatterGatherSegments[0].SegmentByteCount =
 	    CommandMailbox->SCSI_10.DataTransferSize;
@@ -3974,26 +3880,23 @@
 	  */
 	  while (BufferHeader != NULL)
 	    {
-	      BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext;
-	      BufferHeader->b_reqnext = NULL;
+	      BufferHeader_T *NextBufferHeader = BufferHeader->bi_next;
+	      BufferHeader->bi_next = NULL;
 	      DAC960_ProcessCompletedBuffer(BufferHeader, false);
 	      BufferHeader = NextBufferHeader;
 	    }
-	  /*
-	    Wake up requestor for swap file paging requests.
-	  */
-	  if (Command->Semaphore != NULL)
+	  if (Command->Waiting)
 	    {
-	      up(Command->Semaphore);
-	      Command->Semaphore = NULL;
+	      complete(Command->Waiting);
+	      Command->Waiting = NULL;
 	    }
 	}
     }
   else if (CommandType == DAC960_ReadRetryCommand ||
 	   CommandType == DAC960_WriteRetryCommand)
     {
-      BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext;
-      BufferHeader->b_reqnext = NULL;
+      BufferHeader_T *NextBufferHeader = BufferHeader->bi_next;
+      BufferHeader->bi_next = NULL;
       /*
 	Perform completion processing for this single buffer.
       */
@@ -4008,16 +3911,16 @@
       if (NextBufferHeader != NULL)
 	{
 	  Command->BlockNumber +=
-	    BufferHeader->b_size >> DAC960_BlockSizeBits;
+	    bio_size(BufferHeader) >> DAC960_BlockSizeBits;
 	  Command->BlockCount =
-	    NextBufferHeader->b_size >> DAC960_BlockSizeBits;
+	    bio_size(NextBufferHeader) >> DAC960_BlockSizeBits;
 	  Command->BufferHeader = NextBufferHeader;
 	  CommandMailbox->SCSI_10.DataTransferSize =
 	    Command->BlockCount << DAC960_BlockSizeBits;
 	  CommandMailbox->SCSI_10.DataTransferMemoryAddress
 				 .ScatterGatherSegments[0]
 				 .SegmentDataPointer =
-	    Virtual_to_Bus64(NextBufferHeader->b_data);
+	    Virtual_to_Bus64(bio_data(NextBufferHeader));
 	  CommandMailbox->SCSI_10.DataTransferMemoryAddress
 				 .ScatterGatherSegments[0]
 				 .SegmentByteCount =
@@ -4539,8 +4442,8 @@
     }
   if (CommandType == DAC960_ImmediateCommand)
     {
-      up(Command->Semaphore);
-      Command->Semaphore = NULL;
+      complete(Command->Waiting);
+      Command->Waiting = NULL;
       return;
     }
   if (CommandType == DAC960_QueuedCommand)
@@ -5045,7 +4948,8 @@
   int LogicalDriveNumber = DAC960_LogicalDriveNumber(Inode->i_rdev);
   DiskGeometry_T Geometry, *UserGeometry;
   DAC960_Controller_T *Controller;
-  int PartitionNumber;
+  int res;
+
   if (File != NULL && (File->f_flags & O_NONBLOCK))
     return DAC960_UserIOCTL(Inode, File, Request, Argument);
   if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1)
@@ -5094,16 +4998,10 @@
 	    LogicalDeviceInfo->ConfigurableDeviceSizeIn512ByteBlocksOrMB
 	    / (Geometry.heads * Geometry.sectors);
 	}
-      Geometry.start =
-	Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)].start_sect;
+      Geometry.start = get_start_sect(Inode->i_rdev);
       return (copy_to_user(UserGeometry, &Geometry,
 			   sizeof(DiskGeometry_T)) ? -EFAULT : 0);
     case BLKGETSIZE:
-      /* Get Device Size. */
-      if ((long *) Argument == NULL) return -EINVAL;
-      return put_user(Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)]
-						 .nr_sects,
-		      (long *) Argument);
     case BLKRAGET:
       /* Get Read-Ahead. */
       if ((long *) Argument == NULL) return -EINVAL;
@@ -5125,46 +5023,17 @@
       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
       if (Controller->LogicalDriveUsageCount[LogicalDriveNumber] > 1)
 	return -EBUSY;
-      for (PartitionNumber = 0;
-	   PartitionNumber < DAC960_MaxPartitions;
-	   PartitionNumber++)
-	{
-	  KernelDevice_T Device = DAC960_KernelDevice(ControllerNumber,
-						      LogicalDriveNumber,
-						      PartitionNumber);
-	  int MinorNumber = DAC960_MinorNumber(LogicalDriveNumber,
-					       PartitionNumber);
-	  if (Controller->GenericDiskInfo.part[MinorNumber].nr_sects == 0)
-	    continue;
-	  /*
-	    Flush all changes and invalidate buffered state.
-	  */
-	  invalidate_device(Device, 1);
-	  /*
-	    Clear existing partition sizes.
-	  */
-	  if (PartitionNumber > 0)
-	    {
-	      Controller->GenericDiskInfo.part[MinorNumber].start_sect = 0;
-	      Controller->GenericDiskInfo.part[MinorNumber].nr_sects = 0;
-	    }
-	  /*
-	    Reset the Block Size so that the partition table can be read.
-	  */
-	  set_blocksize(Device, BLOCK_SIZE);
-	}
+      res = wipe_partitions(Inode->i_rdev);
+      if (res) /* nothing */
+	return res;
+
       if (Controller->FirmwareType == DAC960_V1_Controller)
-	grok_partitions(&Controller->GenericDiskInfo,
-			LogicalDriveNumber,
-			DAC960_MaxPartitions,
-			Controller->V1.LogicalDriveInformation
-				       [LogicalDriveNumber]
-				       .LogicalDriveSize);
+	grok_partitions(Inode->i_rdev,
+		Controller->V1.LogicalDriveInformation
+				[LogicalDriveNumber]
+				.LogicalDriveSize);
       else
-	grok_partitions(
-	  &Controller->GenericDiskInfo,
-	  LogicalDriveNumber,
-	  DAC960_MaxPartitions,
+	grok_partitions(Inode->i_rdev,
 	  Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]
 			 ->ConfigurableDeviceSizeIn512ByteBlocksOrMB);
       return 0;
@@ -5287,11 +5156,11 @@
 	    while (Controller->V1.DirectCommandActive[DCDB.Channel]
 						     [DCDB.TargetID])
 	      {
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&Controller->RequestQueue->queue_lock);
 		__wait_event(Controller->CommandWaitQueue,
 			     !Controller->V1.DirectCommandActive
 					     [DCDB.Channel][DCDB.TargetID]);
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&Controller->RequestQueue->queue_lock);
 	      }
 	    Controller->V1.DirectCommandActive[DCDB.Channel]
 					      [DCDB.TargetID] = true;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/DAC960.h linux/drivers/block/DAC960.h
--- /opt/kernel/linux-2.4.7/drivers/block/DAC960.h	Wed Feb 21 06:26:22 2001
+++ linux/drivers/block/DAC960.h	Tue Jul 24 15:36:20 2001
@@ -2136,7 +2136,7 @@
   of the Linux Kernel and I/O Subsystem.
 */
 
-typedef struct buffer_head BufferHeader_T;
+typedef struct bio BufferHeader_T;
 typedef struct file File_T;
 typedef struct block_device_operations BlockDeviceOperations_T;
 typedef struct gendisk GenericDiskInfo_T;
@@ -2153,7 +2153,7 @@
 typedef struct pt_regs Registers_T;
 typedef struct request IO_Request_T;
 typedef request_queue_t RequestQueue_T;
-typedef struct semaphore Semaphore_T;
+typedef struct completion Completion_T;
 typedef struct super_block SuperBlock_T;
 typedef struct timer_list Timer_T;
 typedef wait_queue_head_t WaitQueue_T;
@@ -2220,7 +2220,7 @@
   DAC960_CommandType_T CommandType;
   struct DAC960_Controller *Controller;
   struct DAC960_Command *Next;
-  Semaphore_T *Semaphore;
+  Completion_T *Waiting;
   unsigned int LogicalDriveNumber;
   unsigned int BlockNumber;
   unsigned int BlockCount;
@@ -2414,7 +2414,6 @@
   DiskPartition_T DiskPartitions[DAC960_MinorCount];
   int PartitionSizes[DAC960_MinorCount];
   int BlockSizes[DAC960_MinorCount];
-  int MaxSectorsPerRequest[DAC960_MinorCount];
   unsigned char ProgressBuffer[DAC960_ProgressBufferSize];
   unsigned char UserStatusBuffer[DAC960_UserMessageSize];
 }
@@ -2448,7 +2447,7 @@
 void DAC960_AcquireControllerLock(DAC960_Controller_T *Controller,
 				  ProcessorFlags_T *ProcessorFlags)
 {
-  spin_lock_irqsave(&io_request_lock, *ProcessorFlags);
+  spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags);
 }
 
 
@@ -2460,13 +2459,13 @@
 void DAC960_ReleaseControllerLock(DAC960_Controller_T *Controller,
 				  ProcessorFlags_T *ProcessorFlags)
 {
-  spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags);
+  spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags);
 }
 
 
 /*
   DAC960_AcquireControllerLockRF acquires exclusive access to Controller,
-  but is only called from the request function with the io_request_lock held.
+  but is only called from the request function with the queue lock held.
 */
 
 static inline
@@ -2478,7 +2477,7 @@
 
 /*
   DAC960_ReleaseControllerLockRF releases exclusive access to Controller,
-  but is only called from the request function with the io_request_lock held.
+  but is only called from the request function with the queue lock held.
 */
 
 static inline
@@ -2497,7 +2496,7 @@
 void DAC960_AcquireControllerLockIH(DAC960_Controller_T *Controller,
 				    ProcessorFlags_T *ProcessorFlags)
 {
-  spin_lock_irqsave(&io_request_lock, *ProcessorFlags);
+  spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags);
 }
 
 
@@ -2510,7 +2509,7 @@
 void DAC960_ReleaseControllerLockIH(DAC960_Controller_T *Controller,
 				    ProcessorFlags_T *ProcessorFlags)
 {
-  spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags);
+  spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags);
 }
 
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/acsi.c linux/drivers/block/acsi.c
--- /opt/kernel/linux-2.4.7/drivers/block/acsi.c	Sat Apr 28 20:27:53 2001
+++ linux/drivers/block/acsi.c	Tue Jul 24 15:04:44 2001
@@ -1014,7 +1014,6 @@
 		goto repeat;
 	}
 	
-	block += acsi_part[dev].start_sect;
 	target = acsi_info[DEVICE_NR(dev)].target;
 	lun    = acsi_info[DEVICE_NR(dev)].lun;
 
@@ -1126,7 +1125,7 @@
 	    put_user( 64, &geo->heads );
 	    put_user( 32, &geo->sectors );
 	    put_user( acsi_info[dev].size >> 11, &geo->cylinders );
-		put_user( acsi_part[MINOR(inode->i_rdev)].start_sect, &geo->start );
+		put_user(get_start_sect(inode->i_rdev), &geo->start);
 		return 0;
 	  }
 		
@@ -1137,10 +1136,7 @@
 		put_user( 0, &((Scsi_Idlun *) arg)->host_unique_id );
 		return 0;
 		
-	  case BLKGETSIZE:   /* Return device size */
-		return put_user(acsi_part[MINOR(inode->i_rdev)].nr_sects,
-				(long *) arg);
-
+	  case BLKGETSIZE:
 	  case BLKROSET:
 	  case BLKROGET:
 	  case BLKFLSBUF:
@@ -1795,8 +1791,7 @@
 	
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
 	read_ahead[MAJOR_NR] = 8;		/* 8 sector (4kB) read-ahead */
-	acsi_gendisk.next = gendisk_head;
-	gendisk_head = &acsi_gendisk;
+	add_gendisk(&acsi_gendisk);
 
 #ifdef CONFIG_ATARI_SLM
 	err = slm_init();
@@ -1820,8 +1815,6 @@
 
 void cleanup_module(void)
 {
-	struct gendisk ** gdp;
-
 	del_timer( &acsi_timer );
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
 	atari_stram_free( acsi_buffer );
@@ -1829,13 +1822,7 @@
 	if (devfs_unregister_blkdev( MAJOR_NR, "ad" ) != 0)
 		printk( KERN_ERR "acsi: cleanup_module failed\n");
 
-	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
-		if (*gdp == &acsi_gendisk)
-			break;
-	if (!*gdp)
-		printk( KERN_ERR "acsi: entry in disk chain missing!\n" );
-	else
-		*gdp = (*gdp)->next;
+	del_gendisk(&acsi_gendisk);
 }
 #endif
 
@@ -1866,7 +1853,7 @@
 {
 	int device;
 	struct gendisk * gdev;
-	int max_p, start, i;
+	int res;
 	struct acsi_info_struct *aip;
 	
 	device = DEVICE_NR(MINOR(dev));
@@ -1881,16 +1868,7 @@
 	DEVICE_BUSY = 1;
 	sti();
 
-	max_p = gdev->max_p;
-	start = device << gdev->minor_shift;
-
-	for( i = max_p - 1; i >= 0 ; i-- ) {
-		if (gdev->part[start + i].nr_sects != 0) {
-			invalidate_device(MKDEV(MAJOR_NR, start + i), 1);
-			gdev->part[start + i].nr_sects = 0;
-		}
-		gdev->part[start+i].start_sect = 0;
-	};
+	res = wipe_partitions(dev);
 
 	stdma_lock( NULL, NULL );
 
@@ -1905,12 +1883,13 @@
 
 	ENABLE_IRQ();
 	stdma_release();
-	
-	grok_partitions(gdev, device, (aip->type==HARDDISK)?1<<4:1, aip->size);
+
+	if (!res)
+		grok_partitions(dev, aip->size);
 
 	DEVICE_BUSY = 0;
 	wake_up(&busy_wait);
-	return 0;
+	return res;
 }
 
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/amiflop.c linux/drivers/block/amiflop.c
--- /opt/kernel/linux-2.4.7/drivers/block/amiflop.c	Sat Apr 28 20:27:53 2001
+++ linux/drivers/block/amiflop.c	Wed Jan  1 00:07:23 1997
@@ -1890,10 +1890,9 @@
 	free_irq(IRQ_AMIGA_DSKBLK, NULL);
 	custom.dmacon = DMAF_DISK; /* disable DMA */
 	amiga_chip_free(raw_buf);
-	blk_size[MAJOR_NR] = NULL;
-	blksize_size[MAJOR_NR] = NULL;
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
 	release_mem_region(CUSTOM_PHYSADDR+0x20, 8);
 	unregister_blkdev(MAJOR_NR, "fd");
+	blk_clear(MAJOR_NR);
 }
 #endif
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/blkpg.c linux/drivers/block/blkpg.c
--- /opt/kernel/linux-2.4.7/drivers/block/blkpg.c	Sun May 20 20:34:05 2001
+++ linux/drivers/block/blkpg.c	Tue Jul 24 15:07:27 2001
@@ -54,17 +54,6 @@
  * Note that several drives may have the same major.
  */
 
-/* a linear search, superfluous when dev is a pointer */
-static struct gendisk *get_gendisk(kdev_t dev) {
-	struct gendisk *g;
-	int m = MAJOR(dev);
-
-	for (g = gendisk_head; g; g = g->next)
-		if (g->major == m)
-			break;
-	return g;
-}
-
 /*
  * Add a partition.
  *
@@ -208,6 +197,9 @@
 {
 	int intval;
 
+	if (!dev)
+		return -EINVAL;
+
 	switch (cmd) {
 		case BLKROSET:
 			if (!capable(CAP_SYS_ADMIN))
@@ -216,6 +208,7 @@
 				return -EFAULT;
 			set_device_ro(dev, intval);
 			return 0;
+
 		case BLKROGET:
 			intval = (is_read_only(dev) != 0);
 			return put_user(intval, (int *)(arg));
@@ -223,20 +216,47 @@
 		case BLKRASET:
 			if(!capable(CAP_SYS_ADMIN))
 				return -EACCES;
-			if(!dev || arg > 0xff)
+			if(arg > 0xff)
 				return -EINVAL;
 			read_ahead[MAJOR(dev)] = arg;
 			return 0;
+
 		case BLKRAGET:
 			if (!arg)
 				return -EINVAL;
 			return put_user(read_ahead[MAJOR(dev)], (long *) arg);
 
+		case BLKFRASET:
+		{
+			int *mr;
+
+			if (!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			if (!(mr = max_readahead[MAJOR(dev)]))
+				return -EINVAL;
+			mr[MINOR(dev)] = arg;
+			return 0;
+		}
+
+		case BLKFRAGET:
+		{
+			int *mr;
+			if (!(mr = max_readahead[MAJOR(dev)]))
+				return -EINVAL;
+			return put_user(mr[MINOR(dev)], (long *) arg);
+		}
+
+		case BLKSECTGET:
+		{
+			request_queue_t *q = blk_get_queue(dev);
+			if (!q)
+				return -ENODEV;
+			return put_user(q->max_sectors, (unsigned short *) arg);
+		}
+
 		case BLKFLSBUF:
 			if(!capable(CAP_SYS_ADMIN))
 				return -EACCES;
-			if (!dev)
-				return -EINVAL;
 			fsync_dev(dev);
 			invalidate_buffers(dev);
 			return 0;
@@ -273,6 +293,25 @@
 		case BLKELVSET:
 			return blkelvset_ioctl(&blk_get_queue(dev)->elevator,
 					       (blkelv_ioctl_arg_t *) arg);
+		case BLKHASHPROF: {
+			request_queue_t *q = blk_get_queue(dev);
+
+			if (!q)
+				return -EINVAL;
+			if (copy_to_user((struct bio_hash_stats *) arg, &q->queue_hash.st, sizeof(struct bio_hash_stats)))
+				return -EFAULT;
+			return 0;
+			}
+
+		case BLKHASHCLEAR: {
+			request_queue_t *q = blk_get_queue(dev);
+			if (!q)
+				return -EINVAL;
+			spin_lock_irq(&q->queue_lock);
+			memset(&q->queue_hash.st, 0, sizeof(struct bio_hash_stats));
+			spin_unlock_irq(&q->queue_lock);
+			return 0;
+			}
 
 		default:
 			return -EINVAL;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cciss.c linux/drivers/block/cciss.c
--- /opt/kernel/linux-2.4.7/drivers/block/cciss.c	Mon Jul  2 22:56:40 2001
+++ linux/drivers/block/cciss.c	Tue Jul 24 15:04:44 2001
@@ -83,7 +83,7 @@
 #define MAX_CONFIG_WAIT 1000 
 
 #define READ_AHEAD 	 128
-#define NR_CMDS		 128 /* #commands that can be outstanding */
+#define NR_CMDS		 384 /* #commands that can be outstanding */
 #define MAX_CTLR 8
 
 #define CCISS_DMA_MASK	0xFFFFFFFF	/* 32 bit DMA */
@@ -145,7 +145,6 @@
                 "       IRQ: %d\n"
                 "       Logical drives: %d\n"
                 "       Current Q depth: %d\n"
-		"       Current # commands on controller %d\n"
                 "       Max Q depth since init: %d\n"
 		"       Max # commands on controller since init: %d\n"
 		"       Max SG entries since init: %d\n\n",
@@ -156,8 +155,7 @@
                 (unsigned long)h->vaddr,
                 (unsigned int)h->intr,
                 h->num_luns, 
-                h->Qdepth, h->commands_outstanding,
-		h->maxQsinceinit, h->max_outstanding, h->maxSG);
+                h->Qdepth, h->maxQsinceinit, h->max_outstanding, h->maxSG);
 
         pos += size; len += size;
 	for(i=0; i<h->num_luns; i++) {
@@ -235,7 +233,7 @@
                 	i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS);
                         if (i == NR_CMDS)
                                 return NULL;
-                } while(test_and_set_bit(i%32, h->cmd_pool_bits+(i/32)) != 0);
+                } while(test_and_set_bit(i & 31, h->cmd_pool_bits+(i/32)) != 0);
 #ifdef CCISS_DEBUG
 		printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
 #endif
@@ -306,13 +304,10 @@
 
 		/* for each partition */ 
 		for(j=0; j<MAX_PART; j++)
-		{
 			hba[ctlr]->blocksizes[(i<<NWD_SHIFT) + j] = 1024; 
 
-			hba[ctlr]->hardsizes[ (i<<NWD_SHIFT) + j] = 
-				drv->block_size;
-		}
 		hba[ctlr]->gendisk.nr_real++;
+		(BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->block_size;
 	}
 }
 /*
@@ -377,8 +372,6 @@
 {
 	int ctlr = MAJOR(inode->i_rdev) - MAJOR_NR;
 	int dsk  = MINOR(inode->i_rdev) >> NWD_SHIFT;
-	int diskinfo[4];
-	struct hd_geometry *geo = (struct hd_geometry *)arg;
 
 #ifdef CCISS_DEBUG
 	printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
@@ -386,6 +379,10 @@
 	
 	switch(cmd) {
 	case HDIO_GETGEO:
+	{
+		struct hd_geometry *geo = (struct hd_geometry *)arg;
+		int diskinfo[4];
+
 		if (hba[ctlr]->drv[dsk].cylinders) {
 			diskinfo[0] = hba[ctlr]->drv[dsk].heads;
 			diskinfo[1] = hba[ctlr]->drv[dsk].sectors;
@@ -393,25 +390,24 @@
 		} else {
 			diskinfo[0] = 0xff;
 			diskinfo[1] = 0x3f;
-			diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f);		}
+			diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f);
+		}
 		put_user(diskinfo[0], &geo->heads);
 		put_user(diskinfo[1], &geo->sectors);
 		put_user(diskinfo[2], &geo->cylinders);
-		put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].start_sect, &geo->start);
-		return 0;
-	case BLKGETSIZE:
-		if (!arg) return -EINVAL;
-		put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects, (long*)arg);
+		put_user(get_start_sect(inode->i_rdev), &geo->start);
 		return 0;
+	}
 	case BLKRRPART:
 		return revalidate_logvol(inode->i_rdev, 1);
+	case BLKGETSIZE:
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKROGET:
 	case BLKRASET:
 	case BLKRAGET:
 	case BLKPG:
-		return( blk_ioctl(inode->i_rdev, cmd, arg));
+		return blk_ioctl(inode->i_rdev, cmd, arg);
 	case CCISS_GETPCIINFO:
 	{
 		cciss_pci_info_struct pciinfo;
@@ -453,16 +449,7 @@
 //			printk("cciss_ioctl: delay and count cannot be 0\n");
 			return( -EINVAL);
 		}
-		spin_lock_irqsave(&io_request_lock, flags);
-		/* Can only safely update if no commands outstanding */ 
-		if (c->commands_outstanding > 0 )
-		{
-//			printk("cciss_ioctl: cannot change coalasing "
-//				"%d commands outstanding on controller\n", 
-//					c->commands_outstanding);
-			spin_unlock_irqrestore(&io_request_lock, flags);
-			return(-EINVAL);
-		}
+		spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
 		/* Update the field, and then ring the doorbell */ 
 		writel( intinfo.delay, 
 			&(c->cfgtable->HostWrite.CoalIntDelay));
@@ -478,7 +465,7 @@
 			/* delay and try again */
 			udelay(1000);
 		}	
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 		if (i >= MAX_CONFIG_WAIT)
 			return( -EFAULT);
                 return(0);
@@ -509,7 +496,7 @@
 		if (copy_from_user(NodeName, (void *) arg, sizeof( NodeName_type)))
 			return -EFAULT;
 
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
 
 			/* Update the field, and then ring the doorbell */ 
 		for(i=0;i<16;i++)
@@ -525,7 +512,7 @@
 			/* delay and try again */
 			udelay(1000);
 		}	
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 		if (i >= MAX_CONFIG_WAIT)
 			return( -EFAULT);
                 return(0);
@@ -652,11 +639,11 @@
 			c->SG[0].Ext = 0;  // we are not chaining
 		}
 		/* Put the request on the tail of the request queue */
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
 		addQ(&h->reqQ, c);
 		h->Qdepth++;
 		start_io(h);
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 
 		/* Wait for completion */
 		while(c->cmd_type != CMD_IOCTL_DONE)
@@ -704,42 +691,32 @@
         int ctlr, target;
         struct gendisk *gdev;
         unsigned long flags;
-        int max_p;
-        int start;
-        int i;
+        int res;
 
         target = MINOR(dev) >> NWD_SHIFT;
         ctlr = MAJOR(dev) - MAJOR_NR;
         gdev = &(hba[ctlr]->gendisk);
 
-        spin_lock_irqsave(&io_request_lock, flags);
+        spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
         if (hba[ctlr]->drv[target].usage_count > maxusage) {
-                spin_unlock_irqrestore(&io_request_lock, flags);
+                spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
                 printk(KERN_WARNING "cciss: Device busy for "
                         "revalidation (usage=%d)\n",
                         hba[ctlr]->drv[target].usage_count);
                 return -EBUSY;
         }
         hba[ctlr]->drv[target].usage_count++;
-        spin_unlock_irqrestore(&io_request_lock, flags);
+        spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 
-        max_p = gdev->max_p;
-        start = target << gdev->minor_shift;
+	res = wipe_partitions(dev);
+	if (res)
+		goto leave;
 
-        for(i=max_p-1; i>=0; i--) {
-                int minor = start+i;
-                invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1);
-                gdev->part[minor].start_sect = 0;
-                gdev->part[minor].nr_sects = 0;
-
-                /* reset the blocksize so we can read the partition table */
-                blksize_size[MAJOR_NR+ctlr][minor] = 1024;
-        }
 	/* setup partitions per disk */
-	grok_partitions(gdev, target, MAX_PART, 
-			hba[ctlr]->drv[target].nr_blocks);
+	grok_partitions(dev, hba[ctlr]->drv[target].nr_blocks);
+leave:
         hba[ctlr]->drv[target].usage_count--;
-        return 0;
+        return res;
 }
 
 static int frevalidate_logvol(kdev_t dev)
@@ -770,15 +747,15 @@
         if (MINOR(dev) != 0)
                 return -ENXIO;
 
-        spin_lock_irqsave(&io_request_lock, flags);
+        spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
         if (hba[ctlr]->usage_count > 1) {
-                spin_unlock_irqrestore(&io_request_lock, flags);
+                spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
                 printk(KERN_WARNING "cciss: Device busy for volume"
                         " revalidation (usage=%d)\n", hba[ctlr]->usage_count);
                 return -EBUSY;
         }
-        spin_unlock_irqrestore(&io_request_lock, flags);
         hba[ctlr]->usage_count++;
+	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 
         /*
          * Set the partition and block size structures for all volumes
@@ -787,7 +764,6 @@
 	memset(hba[ctlr]->hd,         0, sizeof(struct hd_struct) * 256);
         memset(hba[ctlr]->sizes,      0, sizeof(int) * 256);
         memset(hba[ctlr]->blocksizes, 0, sizeof(int) * 256);
-        memset(hba[ctlr]->hardsizes,  0, sizeof(int) * 256);
         memset(hba[ctlr]->drv,        0, sizeof(drive_info_struct)
 						* CISS_MAX_LUN);
         hba[ctlr]->gendisk.nr_real = 0;
@@ -1083,11 +1059,11 @@
 	while(( c = h->reqQ) != NULL )
 	{
 		/* can't do anything if fifo is full */
-		if ((h->access.fifo_full(h)))
-		{
-			printk(KERN_WARNING "cciss: fifo full \n");
-			return;
+		if ((h->access.fifo_full(h))) {
+			printk("cciss: fifo full\n");
+			break;
 		}
+
 		/* Get the frist entry from the Request Q */ 
 		removeQ(&(h->reqQ), c);
 		h->Qdepth--;
@@ -1100,17 +1076,16 @@
 	}
 }
 
-static inline void complete_buffers( struct buffer_head *bh, int status)
+static inline void complete_buffers( struct bio *bio, int status)
 {
-	struct buffer_head *xbh;
+	struct bio *xbh;
 	
-	while(bh)
-	{
-		xbh = bh->b_reqnext; 
-		bh->b_reqnext = NULL; 
-		blk_finished_io(bh->b_size >> 9);
-		bh->b_end_io(bh, status);
-		bh = xbh;
+	while(bio) {
+		xbh = bio->bi_next; 
+		bio->bi_next = NULL; 
+		blk_finished_io(bio_sectors(bio));
+		bio_endio(bio, status);
+		bio = xbh;
 	}
 } 
 /* checks the status of the job and calls complete buffers to mark all 
@@ -1129,7 +1104,7 @@
 	{
 		temp64.val32.lower = cmd->SG[i].Addr.lower;
 		temp64.val32.upper = cmd->SG[i].Addr.upper;
-		pci_unmap_single(hba[cmd->ctlr]->pdev,
+		pci_unmap_page(hba[cmd->ctlr]->pdev,
 			temp64.val, cmd->SG[i].Len, 
 			(cmd->Request.Type.Direction == XFER_READ) ? 
 				PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
@@ -1208,79 +1183,33 @@
 				status=0;
 		}
 	}
-	complete_buffers(cmd->bh, status);
-}
-
-
-static inline int cpq_new_segment(request_queue_t *q, struct request *rq,
-                                  int max_segments)
-{
-        if (rq->nr_segments < MAXSGENTRIES) {
-                rq->nr_segments++;
-                return 1;
-        }
-        return 0;
-}
-
-static int cpq_back_merge_fn(request_queue_t *q, struct request *rq,
-                             struct buffer_head *bh, int max_segments)
-{
-        if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data)
-                return 1;
-        return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_front_merge_fn(request_queue_t *q, struct request *rq,
-                             struct buffer_head *bh, int max_segments)
-{
-        if (bh->b_data + bh->b_size == rq->bh->b_data)
-                return 1;
-        return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq,
-                                 struct request *nxt, int max_segments)
-{
-        int total_segments = rq->nr_segments + nxt->nr_segments;
-
-        if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data)
-                total_segments--;
-
-        if (total_segments > MAXSGENTRIES)
-                return 0;
-
-        rq->nr_segments = total_segments;
-        return 1;
+	complete_buffers(cmd->bio, status);
 }
 
 /* 
  * Get a request and submit it to the controller. 
- * Currently we do one request at a time.  Ideally we would like to send
- * everything to the controller on the first call, but there is a danger
- * of holding the io_request_lock for to long.  
  */
 static void do_cciss_request(request_queue_t *q)
 {
 	ctlr_info_t *h= q->queuedata; 
 	CommandList_struct *c;
 	int log_unit, start_blk, seg, sect;
-	char *lastdataend;
-	struct buffer_head *bh;
+	unsigned long lastdataend;
+	struct bio *bio;
 	struct list_head *queue_head = &q->queue_head;
 	struct request *creq;
 	u64bit temp64;
 	struct my_sg tmp_sg[MAXSGENTRIES];
-	int i;
+	int i, dir;
 
-    // Loop till the queue is empty if or it is plugged
-    while (1)
-    {
-	if (q->plugged || list_empty(queue_head)) {
-                start_io(h);
-                return;
-        }
+	if (blk_queue_plugged(q))
+		goto startio;
 
-	creq =	blkdev_entry_next_request(queue_head); 
+queue:
+	if (list_empty(queue_head))
+		goto startio;
+
+	creq = elv_next_request(q);
 	if (creq->nr_segments > MAXSGENTRIES)
                 BUG();
 
@@ -1289,18 +1218,15 @@
                 printk(KERN_WARNING "doreq cmd for %d, %x at %p\n",
                                 h->ctlr, creq->rq_dev, creq);
                 blkdev_dequeue_request(creq);
-                complete_buffers(creq->bh, 0);
-                start_io(h);
-                return;
+                complete_buffers(creq->bio, 0);
+		goto startio;
         }
 
 	if (( c = cmd_alloc(h, 1)) == NULL)
-	{
-		start_io(h);
-		return;
-	}
+		goto startio;
+
 	c->cmd_type = CMD_RWREQ;      
-	bh = c->bh = creq->bh;
+	bio = c->bio = creq->bio;
 	
 	/* fill in the request */ 
 	log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; 
@@ -1315,43 +1241,43 @@
 		(creq->cmd == READ) ? XFER_READ: XFER_WRITE; 
 	c->Request.Timeout = 0; // Don't time out	
 	c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE;
-	start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector;
+	start_blk = creq->sector;
 #ifdef CCISS_DEBUG
-	if (bh == NULL)
-		panic("cciss: bh== NULL?");
+	if (bio == NULL)
+		panic("cciss: bio== NULL?");
 	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector,
 		(int) creq->nr_sectors);	
 #endif /* CCISS_DEBUG */
 	seg = 0; 
-	lastdataend = NULL;
+	lastdataend = 0;
 	sect = 0;
-	while(bh)
-	{
-		sect += bh->b_size/512;
-		if (bh->b_data == lastdataend)
+	while(bio) {
+		sect += bio_sectors(bio);
+		if (bio_to_bus(bio) == lastdataend)
 		{  // tack it on to the last segment 
-			tmp_sg[seg-1].len +=bh->b_size;
-			lastdataend += bh->b_size;
-		} else
-		{
+			tmp_sg[seg-1].len += bio_size(bio);
+			lastdataend += bio_size(bio);
+		} else {
 			if (seg == MAXSGENTRIES)
 				BUG();
-			tmp_sg[seg].len = bh->b_size;
-			tmp_sg[seg].start_addr = bh->b_data;
-			lastdataend = bh->b_data + bh->b_size;
+			tmp_sg[seg].len = bio_size(bio);
+			tmp_sg[seg].offset = bio_offset(bio);
+			tmp_sg[seg].page = bio_page(bio);
+			lastdataend = bio_to_bus(bio) + bio_size(bio);
 			seg++;
 		}
-		bh = bh->b_reqnext;
+		bio = bio->bi_next;
 	}
 	/* get the DMA records for the setup */ 
+	if (c->Request.Type.Direction == XFER_READ)
+		dir = PCI_DMA_FROMDEVICE;
+	else
+		dir = PCI_DMA_TODEVICE;
 	for (i=0; i<seg; i++)
 	{
 		c->SG[i].Len = tmp_sg[i].len;
-		temp64.val = (__u64) pci_map_single( h->pdev,
-			tmp_sg[i].start_addr,
-			tmp_sg[i].len,
-			(c->Request.Type.Direction == XFER_READ) ? 
-                                PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
+		temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page,
+			 		  tmp_sg[i].len, tmp_sg[i].offset, dir);
 		c->SG[i].Addr.lower = temp64.val32.lower;
                 c->SG[i].Addr.upper = temp64.val32.upper;
                 c->SG[i].Ext = 0;  // we are not chaining
@@ -1375,10 +1301,8 @@
 	c->Request.CDB[8]= sect & 0xff; 
 	c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
 
-			
 	blkdev_dequeue_request(creq);
 
-	
         /*
          * ehh, we can't really end the request here since it's not
          * even started yet. for now it shouldn't hurt though
@@ -1392,7 +1316,10 @@
 	h->Qdepth++;
 	if(h->Qdepth > h->maxQsinceinit)
 		h->maxQsinceinit = h->Qdepth; 
-   }  // while loop
+
+	goto queue;
+startio:
+	start_io(h);
 }
 
 static void do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs)
@@ -1411,7 +1338,7 @@
 	 * If there are completed commands in the completion queue,
 	 * we had better do something about it.
 	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
 	while( h->access.intr_pending(h))
 	{
 		while((a = h->access.command_completed(h)) != FIFO_EMPTY) 
@@ -1444,11 +1371,16 @@
 			}
 		}
 	}
+
 	/*
 	 * See if we can queue up some more IO
 	 */
+#if 0
+	blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
+#else
 	do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
-	spin_unlock_irqrestore(&io_request_lock, flags);
+#endif
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 }
 /* 
  *  We cannot read the structure directly, for portablity we must use 
@@ -1941,18 +1873,14 @@
         q->queuedata = hba[i];
         blk_init_queue(q, do_cciss_request);
         blk_queue_headactive(q, 0);		
+	blk_queue_bounce_limit(q, BLK_BOUNCE_4G);
+	q->max_segments = MAXSGENTRIES;
+	blk_queue_max_sectors(q, 512);
 
 	/* fill in the other Kernel structs */
 	blksize_size[MAJOR_NR+i] = hba[i]->blocksizes;
-        hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes;
         read_ahead[MAJOR_NR+i] = READ_AHEAD;
 
-	/* Set the pointers to queue functions */ 
-	q->back_merge_fn = cpq_back_merge_fn;
-        q->front_merge_fn = cpq_front_merge_fn;
-        q->merge_requests_fn = cpq_merge_requests_fn;
-
-
 	/* Fill in the gendisk data */ 	
 	hba[i]->gendisk.major = MAJOR_NR + i;
 	hba[i]->gendisk.major_name = "cciss";
@@ -1963,8 +1891,7 @@
 	hba[i]->gendisk.nr_real = hba[i]->num_luns;
 
 	/* Get on the disk list */ 
-	hba[i]->gendisk.next = gendisk_head;
-	gendisk_head = &(hba[i]->gendisk); 
+	add_gendisk(&(hba[i]->gendisk));
 
 	cciss_geninit(i);
 	for(j=0; j<NWD; j++)
@@ -1980,7 +1907,6 @@
 {
 	ctlr_info_t *tmp_ptr;
 	int i;
-	struct gendisk *g;
 
 	if (pdev->driver_data == NULL)
 	{
@@ -2003,23 +1929,11 @@
 	unregister_blkdev(MAJOR_NR+i, hba[i]->devname);
 	remove_proc_entry(hba[i]->devname, proc_cciss);	
 	
-
 	/* remove it from the disk list */
-	if (gendisk_head == &(hba[i]->gendisk))
-	{
-		gendisk_head = hba[i]->gendisk.next;
-	} else
-	{
-		for(g=gendisk_head; g ; g=g->next)
-		{
-			if(g->next == &(hba[i]->gendisk))
-			{
-				g->next = hba[i]->gendisk.next;
-			}
-		}
-	}
-	pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), 
-		hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
+	del_gendisk(&(hba[i]->gendisk));
+
+	pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct),
+			    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
 	pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct),
 		hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
 	kfree(hba[i]->cmd_pool_bits);
@@ -2027,32 +1941,31 @@
 }	
 
 static struct pci_driver cciss_pci_driver = {
-	 name:   "cciss",
-	probe:  cciss_init_one,
-	remove:  cciss_remove_one,
-	id_table:  cciss_pci_device_id, /* id_table */
+	name:		"cciss",
+	probe:		cciss_init_one,
+	remove:		cciss_remove_one,
+	id_table:	cciss_pci_device_id, /* id_table */
 };
 
 /*
-*  This is it.  Register the PCI driver information for the cards we control
-*  the OS will call our registered routines when it finds one of our cards. 
-*/
+ *  This is it.  Register the PCI driver information for the cards we control
+ *  the OS will call our registered routines when it finds one of our cards. 
+ */
 int __init cciss_init(void)
 {
-
 	printk(KERN_INFO DRIVER_NAME "\n");
+
 	/* Register for out PCI devices */
 	if (pci_register_driver(&cciss_pci_driver) > 0 )
 		return 0;
 	else 
 		return -ENODEV;
 
- }
+}
 
 EXPORT_NO_SYMBOLS;
 static int __init init_cciss_module(void)
 {
-
 	return ( cciss_init());
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cciss.h linux/drivers/block/cciss.h
--- /opt/kernel/linux-2.4.7/drivers/block/cciss.h	Tue May 22 19:23:16 2001
+++ linux/drivers/block/cciss.h	Tue Jul 24 15:36:42 2001
@@ -17,7 +17,8 @@
 
 struct my_sg {
 	int len;
-	char *start_addr;
+	int offset;
+	struct page *page;
 };
 
 struct ctlr_info;
@@ -85,9 +86,8 @@
 	struct gendisk   gendisk;
 	   // indexed by minor numbers
 	struct hd_struct hd[256];
-	int              sizes[256];
+	int		 sizes[256];
 	int              blocksizes[256];
-	int              hardsizes[256];
 };
 
 /*  Defining the diffent access_menthods */
@@ -247,5 +247,8 @@
 	char	*product_name;
 	struct access_method *access;
 };
+
+#define CCISS_LOCK(i)	(&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock))
+
 #endif /* CCISS_H */
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h
--- /opt/kernel/linux-2.4.7/drivers/block/cciss_cmd.h	Tue May 22 19:23:16 2001
+++ linux/drivers/block/cciss_cmd.h	Wed Jan  1 00:07:23 1997
@@ -7,7 +7,7 @@
 
 //general boundary defintions
 #define SENSEINFOBYTES          32//note that this value may vary between host implementations
-#define MAXSGENTRIES            31
+#define MAXSGENTRIES            32
 #define MAXREPLYQS              256
 
 //Command Status value
@@ -228,7 +228,7 @@
   int			   cmd_type; 
   struct _CommandList_struct *prev;
   struct _CommandList_struct *next;
-  struct buffer_head *	   bh;
+  struct bio *		   bio;
 } CommandList_struct;
 
 //Configuration Table Structure
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c
--- /opt/kernel/linux-2.4.7/drivers/block/cpqarray.c	Tue May 22 19:23:16 2001
+++ linux/drivers/block/cpqarray.c	Tue Jul 24 15:36:53 2001
@@ -102,7 +102,6 @@
 static struct hd_struct * ida;
 static int * ida_sizes;
 static int * ida_blocksizes;
-static int * ida_hardsizes;
 static struct gendisk ida_gendisk[MAX_CTLR];
 
 static struct proc_dir_entry *proc_array;
@@ -147,7 +146,7 @@
 
 static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c);
 static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c);
-static inline void complete_buffers(struct buffer_head *bh, int ok);
+static inline void complete_buffers(struct bio *bio, int ok);
 static inline void complete_command(cmdlist_t *cmd, int timeout);
 
 static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs);
@@ -178,12 +177,11 @@
 		ida_sizes[(ctlr<<CTLR_SHIFT) + (i<<NWD_SHIFT)] =
 				drv->nr_blks;
 
-		for(j=0; j<16; j++) {
+		for(j=0; j<16; j++)
 			ida_blocksizes[(ctlr<<CTLR_SHIFT) + (i<<NWD_SHIFT)+j] =
 				1024;
-			ida_hardsizes[(ctlr<<CTLR_SHIFT) + (i<<NWD_SHIFT)+j] =
-				drv->blk_size;
-		}
+
+		(BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->blk_size;
 		ida_gendisk[ctlr].nr_real++;
 	}
 
@@ -314,7 +312,6 @@
 void cleanup_module(void)
 {
 	int i;
-	struct gendisk *g;
 
 	remove_proc_entry("array", proc_root_driver);
 
@@ -331,66 +328,15 @@
 			hba[i]->cmd_pool_dhandle);
 		kfree(hba[i]->cmd_pool_bits);
 
-		if (gendisk_head == &ida_gendisk[i]) {
-			gendisk_head = ida_gendisk[i].next;
-		} else {
-			for(g=gendisk_head; g; g=g->next) {
-				if (g->next == &ida_gendisk[i]) {
-					g->next = ida_gendisk[i].next;
-					break;
-				}
-			}
-		}
+		del_gendisk(&ida_gendisk[i]);
 	}
 
 	kfree(ida);
 	kfree(ida_sizes);
-	kfree(ida_hardsizes);
 	kfree(ida_blocksizes);
 }
 #endif /* MODULE */
 
-static inline int cpq_new_segment(request_queue_t *q, struct request *rq,
-				  int max_segments)
-{
-	if (rq->nr_segments < SG_MAX) {
-		rq->nr_segments++;
-		return 1;
-	}
-	return 0;
-}
-
-static int cpq_back_merge_fn(request_queue_t *q, struct request *rq,
-			     struct buffer_head *bh, int max_segments)
-{
-	if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data)
-		return 1;
-	return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_front_merge_fn(request_queue_t *q, struct request *rq,
-			     struct buffer_head *bh, int max_segments)
-{
-	if (bh->b_data + bh->b_size == rq->bh->b_data)
-		return 1;
-	return cpq_new_segment(q, rq, max_segments);
-}
-
-static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq,
-				 struct request *nxt, int max_segments)
-{
-	int total_segments = rq->nr_segments + nxt->nr_segments;
-
-	if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data)
-		total_segments--;
-
-	if (total_segments > SG_MAX)
-		return 0;
-
-	rq->nr_segments = total_segments;
-	return 1;
-}
-
 /*
  *  This is it.  Find all the controllers and register them.  I really hate
  *  stealing all these major device numbers.
@@ -437,20 +383,9 @@
 		return(num_cntlrs_reg);
 	}
 
-	ida_hardsizes = kmalloc(sizeof(int)*nr_ctlr*NWD*16, GFP_KERNEL);
-	if(ida_hardsizes==NULL)
-	{
-		kfree(ida);
-		kfree(ida_sizes); 
-		kfree(ida_blocksizes);
-		printk( KERN_ERR "cpqarray: out of memory");
-		return(num_cntlrs_reg);
-	}
-
 	memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16);
 	memset(ida_sizes, 0, sizeof(int)*nr_ctlr*NWD*16);
 	memset(ida_blocksizes, 0, sizeof(int)*nr_ctlr*NWD*16);
-	memset(ida_hardsizes, 0, sizeof(int)*nr_ctlr*NWD*16);
 	memset(ida_gendisk, 0, sizeof(struct gendisk)*MAX_CTLR);
 
 		/* 
@@ -508,7 +443,6 @@
 			{
 				kfree(ida);
 				kfree(ida_sizes);
-				kfree(ida_hardsizes);
 				kfree(ida_blocksizes);
 			}
                 	return(num_cntlrs_reg);
@@ -529,14 +463,11 @@
 		q->queuedata = hba[i];
 		blk_init_queue(q, do_ida_request);
 		blk_queue_headactive(q, 0);
+		blk_queue_bounce_limit(q, BLK_BOUNCE_4G);
+		q->max_segments = SG_MAX;
 		blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256);
-		hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256);
 		read_ahead[MAJOR_NR+i] = READ_AHEAD;
 
-		q->back_merge_fn = cpq_back_merge_fn;
-		q->front_merge_fn = cpq_front_merge_fn;
-		q->merge_requests_fn = cpq_merge_requests_fn;
-
 		ida_gendisk[i].major = MAJOR_NR + i;
 		ida_gendisk[i].major_name = "ida";
 		ida_gendisk[i].minor_shift = NWD_SHIFT;
@@ -546,8 +477,7 @@
 		ida_gendisk[i].nr_real = 0; 
 	
 		/* Get on the disk list */
-		ida_gendisk[i].next = gendisk_head;
-		gendisk_head = &ida_gendisk[i];
+		add_gendisk(&ida_gendisk[i]);
 
 		init_timer(&hba[i]->timer);
 		hba[i]->timer.expires = jiffies + IDA_TIMER;
@@ -919,22 +849,27 @@
 	ctlr_info_t *h = q->queuedata;
 	cmdlist_t *c;
 	int seg, sect;
-	char *lastdataend;
 	struct list_head * queue_head = &q->queue_head;
-	struct buffer_head *bh;
+	struct bio *bio;
 	struct request *creq;
 	struct my_sg tmp_sg[SG_MAX];
-	int i;
+	unsigned long lastdataend;
+	int i, dir;
+
+	if (blk_queue_plugged(q)) {
+		start_io(h);
+		return;
+	}
 
-// Loop till the queue is empty if or it is plugged 
+// Loop till the queue is empty
    while (1)
 {
-	if (q->plugged || list_empty(queue_head)) {
+	if (list_empty(queue_head)) {
 		start_io(h);
 		return;
 	}
 
-	creq = blkdev_entry_next_request(queue_head);
+	creq = elv_next_request(q);
 	if (creq->nr_segments > SG_MAX)
 		BUG();
 
@@ -943,7 +878,7 @@
 		printk(KERN_WARNING "doreq cmd for %d, %x at %p\n",
 				h->ctlr, creq->rq_dev, creq);
 		blkdev_dequeue_request(creq);
-		complete_buffers(creq->bh, 0);
+		complete_buffers(creq->bio, 0);
 		start_io(h);
                 return;
 	}
@@ -954,47 +889,50 @@
                 return;
         }
 
-	bh = creq->bh;
+	bio = creq->bio;
 
 	c->ctlr = h->ctlr;
 	c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT;
 	c->hdr.size = sizeof(rblk_t) >> 2;
 	c->size += sizeof(rblk_t);
 
-	c->req.hdr.blk = ida[(h->ctlr<<CTLR_SHIFT) + MINOR(creq->rq_dev)].start_sect + creq->sector;
-	c->bh = bh;
+	c->req.hdr.blk = creq->sector;
+	c->bio = bio;
 DBGPX(
-	if (bh == NULL)
-		panic("bh == NULL?");
+	if (bio == NULL)
+		panic("bio == NULL?");
 	
 	printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
 );
-	seg = 0; lastdataend = NULL;
+	seg = lastdataend = 0;
 	sect = 0;
-	while(bh) {
-		sect += bh->b_size/512;
-		if (bh->b_data == lastdataend) {
-			tmp_sg[seg-1].size += bh->b_size;
-			lastdataend += bh->b_size;
+	while(bio) {
+		sect += bio_sectors(bio);
+		if (bio_to_bus(bio) == lastdataend) {
+			tmp_sg[seg-1].size += bio_size(bio);
+			lastdataend += bio_size(bio);
 		} else {
 			if (seg == SG_MAX)
 				BUG();
-			tmp_sg[seg].size = bh->b_size;
-			tmp_sg[seg].start_addr = bh->b_data;
-			lastdataend = bh->b_data + bh->b_size;
+			tmp_sg[seg].size = bio_size(bio);
+			tmp_sg[seg].page = bio_page(bio);
+			tmp_sg[seg].offset = bio_offset(bio);
+			lastdataend = bio_to_bus(bio) + bio_size(bio);
 			seg++;
 		}
-		bh = bh->b_reqnext;
+		bio = bio->bi_next;
 	}
 	/* Now do all the DMA Mappings */
+	if (creq->cmd == READ)
+		dir = PCI_DMA_FROMDEVICE;
+	else
+		dir = PCI_DMA_TODEVICE;
 	for( i=0; i < seg; i++)
 	{
 		c->req.sg[i].size = tmp_sg[i].size;
-		c->req.sg[i].addr = (__u32) pci_map_single(
-                		h->pci_dev, tmp_sg[i].start_addr, 
-				tmp_sg[i].size,
-                                (creq->cmd == READ) ? 
-					PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
+		c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, tmp_sg[i].page,
+						 tmp_sg[i].size,
+						 tmp_sg[i].offset, dir);
 	}
 DBGPX(	printk("Submitting %d sectors in %d segments\n", sect, seg); );
 	c->req.hdr.sg_cnt = seg;
@@ -1056,17 +994,17 @@
 	}
 }
 
-static inline void complete_buffers(struct buffer_head *bh, int ok)
+static inline void complete_buffers(struct bio *bio, int ok)
 {
-	struct buffer_head *xbh;
-	while(bh) {
-		xbh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
+	struct bio *xbh;
+	while(bio) {
+		xbh = bio->bi_next;
+		bio->bi_next = NULL;
 		
-		blk_finished_io(bh->b_size >> 9);
-		bh->b_end_io(bh, ok);
+		blk_finished_io(bio_sectors(bio));
+		bio_endio(bio, ok);
 
-		bh = xbh;
+		bio = xbh;
 	}
 }
 /*
@@ -1099,11 +1037,11 @@
 	/* unmap the DMA mapping for all the scatter gather elements */
         for(i=0; i<cmd->req.hdr.sg_cnt; i++)
         {
-                pci_unmap_single(hba[cmd->ctlr]->pci_dev,
+                pci_unmap_page(hba[cmd->ctlr]->pci_dev,
                         cmd->req.sg[i].addr, cmd->req.sg[i].size,
                         (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
         }
-	complete_buffers(cmd->bh, ok);
+	complete_buffers(cmd->bio, ok);
 }
 
 /*
@@ -1128,7 +1066,7 @@
 	 * If there are completed commands in the completion queue,
 	 * we had better do something about it.
 	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(IDA_LOCK(h->ctlr), flags);
 	if (istat & FIFO_NOT_EMPTY) {
 		while((a = h->access.command_completed(h))) {
 			a1 = a; a &= ~3;
@@ -1162,8 +1100,12 @@
 	/*
 	 * See if we can queue up some more IO
 	 */
+#if 0
+	blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
+#else
 	do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
-	spin_unlock_irqrestore(&io_request_lock, flags);
+#endif
+	spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags);
 }
 
 /*
@@ -1209,14 +1151,10 @@
 		put_user(diskinfo[0], &geo->heads);
 		put_user(diskinfo[1], &geo->sectors);
 		put_user(diskinfo[2], &geo->cylinders);
-		put_user(ida[(ctlr<<CTLR_SHIFT)+MINOR(inode->i_rdev)].start_sect, &geo->start);
+		put_user(get_start_sect(inode->i_rdev), &geo->start);
 		return 0;
 	case IDAGETDRVINFO:
 		return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t));
-	case BLKGETSIZE:
-		if (!arg) return -EINVAL;
-		put_user(ida[(ctlr<<CTLR_SHIFT)+MINOR(inode->i_rdev)].nr_sects, (long*)arg);
-		return 0;
 	case BLKRRPART:
 		return revalidate_logvol(inode->i_rdev, 1);
 	case IDAPASSTHRU:
@@ -1252,6 +1190,7 @@
 		return(0);
 	}	
 
+	case BLKGETSIZE:
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKROGET:
@@ -1352,11 +1291,11 @@
 	}
 	
 	/* Put the request on the tail of the request queue */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(IDA_LOCK(ctlr), flags);
 	addQ(&h->reqQ, c);
 	h->Qdepth++;
 	start_io(h);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
 
 	/* Wait for completion */
 	while(c->type != CMD_IOCTL_DONE)
@@ -1566,15 +1505,15 @@
 	if (MINOR(dev) != 0)
 		return -ENXIO;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(IDA_LOCK(ctlr), flags);
 	if (hba[ctlr]->usage_count > 1) {
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
 		printk(KERN_WARNING "cpqarray: Device busy for volume"
 			" revalidation (usage=%d)\n", hba[ctlr]->usage_count);
 		return -EBUSY;
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
 	hba[ctlr]->usage_count++;
+	spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
 
 	/*
 	 * Set the partition and block size structures for all volumes
@@ -1583,7 +1522,6 @@
 	memset(ida+(ctlr*256),            0, sizeof(struct hd_struct)*NWD*16);
 	memset(ida_sizes+(ctlr*256),      0, sizeof(int)*NWD*16);
 	memset(ida_blocksizes+(ctlr*256), 0, sizeof(int)*NWD*16);
-	memset(ida_hardsizes+(ctlr*256),  0, sizeof(int)*NWD*16);
 	memset(hba[ctlr]->drv,            0, sizeof(drv_info_t)*NWD);
 	ida_gendisk[ctlr].nr_real = 0;
 
@@ -1611,17 +1549,15 @@
 	int ctlr, target;
 	struct gendisk *gdev;
 	unsigned long flags;
-	int max_p;
-	int start;
-	int i;
+	int res;
 
 	target = DEVICE_NR(dev);
 	ctlr = MAJOR(dev) - MAJOR_NR;
 	gdev = &ida_gendisk[ctlr];
 	
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(IDA_LOCK(ctlr), flags);
 	if (hba[ctlr]->drv[target].usage_count > maxusage) {
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
 		printk(KERN_WARNING "cpqarray: Device busy for "
 			"revalidation (usage=%d)\n",
 			hba[ctlr]->drv[target].usage_count);
@@ -1629,25 +1565,14 @@
 	}
 
 	hba[ctlr]->drv[target].usage_count++;
-	spin_unlock_irqrestore(&io_request_lock, flags);
-
-	max_p = gdev->max_p;
-	start = target << gdev->minor_shift;
+	spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
 
-	for(i=max_p-1; i>=0; i--) {
-		int minor = start+i;
-		invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1);
-		gdev->part[minor].start_sect = 0;	
-		gdev->part[minor].nr_sects = 0;	
+	res = wipe_partitions(dev);
+	if (!res)
+		grok_partitions(dev, hba[ctlr]->drv[target].nr_blks);
 
-		/* reset the blocksize so we can read the partition table */
-		blksize_size[MAJOR_NR+ctlr][minor] = 1024;
-	}
-
-	/* 16 minors per disk... */
-	grok_partitions(gdev, target, 16, hba[ctlr]->drv[target].nr_blks);
 	hba[ctlr]->drv[target].usage_count--;
-	return 0;
+	return res;
 }
 
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h
--- /opt/kernel/linux-2.4.7/drivers/block/cpqarray.h	Tue May 22 19:23:16 2001
+++ linux/drivers/block/cpqarray.h	Tue Jul 24 15:34:39 2001
@@ -58,7 +58,8 @@
 
 struct my_sg {
 	int size;
-	char *start_addr;
+	int offset;
+	struct page *page;
 };
 
 struct ctlr_info;
@@ -121,6 +122,9 @@
 	struct timer_list timer;
 	unsigned int misc_tflags;
 };
+
+#define IDA_LOCK(i)	(&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock))
+
 #endif
 
 #endif /* CPQARRAY_H */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/elevator.c linux/drivers/block/elevator.c
--- /opt/kernel/linux-2.4.7/drivers/block/elevator.c	Fri Jul 20 05:59:41 2001
+++ linux/drivers/block/elevator.c	Wed Jan  1 00:07:23 1997
@@ -18,8 +18,13 @@
  * Removed tests for max-bomb-segments, which was breaking elvtune
  *  when run without -bN
  *
+ * Jens:
+ * - Rework again to work with bio instead of buffer_heads
+ * - added merge by hash-lookup
+ * - loose bi_dev comparisons, partition handling is right now
+ * - completely modularize elevator setup and teardown
+ *
  */
-
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
@@ -28,38 +33,38 @@
 #include <asm/uaccess.h>
 
 /*
- * This is a bit tricky. It's given that bh and rq are for the same
+ * This is a bit tricky. It's given that bio and rq are for the same
  * device, but the next request might of course not be. Run through
  * the tests below to check if we want to insert here if we can't merge
- * bh into an existing request
+ * bio into an existing request
  */
-inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq,
-			    struct list_head *head)
+inline int bio_rq_in_between(struct bio *bio, struct request *rq,
+			     struct list_head *head)
 {
 	struct list_head *next;
 	struct request *next_rq;
 
-	next = rq->queue.next;
+	next = rq->queuelist.next;
 	if (next == head)
 		return 0;
 
 	/*
 	 * if the device is different (usually on a different partition),
-	 * just check if bh is after rq
+	 * just check if bio is after rq
 	 */
-	next_rq = blkdev_entry_to_request(next);
+	next_rq = list_entry(next, struct request, queuelist);
 	if (next_rq->rq_dev != rq->rq_dev)
-		return bh->b_rsector > rq->sector;
+		return bio->bi_dev >= rq->rq_dev && bio->bi_dev <= next_rq->rq_dev;
 
 	/*
-	 * ok, rq, next_rq and bh are on the same device. if bh is in between
+	 * ok, rq, next_rq and bio are on the same device. if bio is in between
 	 * the two, this is the sweet spot
 	 */
-	if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector)
+	if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector)
 		return 1;
 
 	/*
-	 * next_rq is ordered wrt rq, but bh is not in between the two
+	 * next_rq is ordered wrt rq, but bio is not in between the two
 	 */
 	if (next_rq->sector > rq->sector)
 		return 0;
@@ -68,66 +73,101 @@
 	 * next_rq and rq not ordered, if we happen to be either before
 	 * next_rq or after rq insert here anyway
 	 */
-	if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector)
+	if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector)
 		return 1;
 
 	return 0;
 }
 
-
 int elevator_linus_merge(request_queue_t *q, struct request **req,
-			 struct list_head * head,
-			 struct buffer_head *bh, int rw,
-			 int max_sectors)
+			 struct list_head * head, struct bio *bio)
 {
 	struct list_head *entry = &q->queue_head;
-	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+	unsigned int count = bio_sectors(bio);
+	struct request *__rq;
+	struct bio *bio_hash;
+	int rw = bio_rw(bio);
 
+	/*
+	 * first try a back merge, then front, then give up and scan. this
+	 * will of course fail for different size bios on the same queue,
+	 * however that isn't an issue
+	 */
+	bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector - count);
+	q->queue_hash.st.q_nr_back_lookups++;
+	if (bio_hash) {
+		q->queue_hash.st.q_nr_back_hits++;
+		__rq = bio_hash->bi_req;
+		if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd &&
+		    __rq->sector + __rq->nr_sectors == bio->bi_sector &&
+		   !__rq->waiting && !__rq->special && !bio_hash->bi_next) {
+			q->queue_hash.st.q_nr_back_merges++;
+			*req = __rq;
+			bio->bi_req = __rq;
+			return ELEVATOR_BACK_MERGE;
+		}
+#if 0
+		bio_put(bio_hash);
+#endif
+	}
+
+	bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector + count);
+	q->queue_hash.st.q_nr_front_lookups++;
+	if (bio_hash) {
+		q->queue_hash.st.q_nr_front_hits++;
+		__rq = bio_hash->bi_req;
+		if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd &&
+		    __rq->sector - count == bio->bi_sector &&
+		   !__rq->waiting && !__rq->special && !bio_hash->bi_next) {
+			q->queue_hash.st.q_nr_front_merges++;
+			*req = __rq;
+			bio->bi_req = __rq;
+			return ELEVATOR_FRONT_MERGE;
+		}
+#if 0
+		bio_put(bio_hash);
+#endif
+	}
+
+	/*
+	 * no merge possible, scan for insertion
+	 */
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = blkdev_entry_to_request(entry);
+		__rq = list_entry(entry, struct request, queuelist);
 
 		/*
-		 * simply "aging" of requests in queue
+		 * get next entry into L1 cache
 		 */
-		if (__rq->elevator_sequence-- <= 0)
-			break;
+		prefetch(entry->prev);
 
 		if (__rq->waiting)
 			continue;
-		if (__rq->rq_dev != bh->b_rdev)
-			continue;
-		if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head))
-			*req = __rq;
-		if (__rq->cmd != rw)
-			continue;
-		if (__rq->nr_sectors + count > max_sectors)
-			continue;
-		if (__rq->elevator_sequence < count)
-			break;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
-			ret = ELEVATOR_BACK_MERGE;
+		if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head))
 			*req = __rq;
+
+		/*
+		 * simply "aging" of requests in queue
+		 */
+		if (__rq->elevator_sequence-- <= 0)
 			break;
-		} else if (__rq->sector - count == bh->b_rsector) {
-			ret = ELEVATOR_FRONT_MERGE;
-			__rq->elevator_sequence -= count;
-			*req = __rq;
+		else if (__rq->elevator_sequence < count)
 			break;
-		}
 	}
 
-	return ret;
+	return ELEVATOR_NO_MERGE;
 }
 
 void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
 {
-	struct list_head *entry = &req->queue, *head = &q->queue_head;
+	struct list_head *entry, *head = &q->queue_head;
 
 	/*
 	 * second pass scan of requests that got passed over, if any
 	 */
+	entry = &req->queuelist;
 	while ((entry = entry->next) != head) {
-		struct request *tmp = blkdev_entry_to_request(entry);
+		struct request *tmp =list_entry(entry,struct request,queuelist);
+		prefetch(entry->next);
 		tmp->elevator_sequence -= count;
 	}
 }
@@ -142,39 +182,41 @@
  * See if we can find a request that this buffer can be coalesced with.
  */
 int elevator_noop_merge(request_queue_t *q, struct request **req,
-			struct list_head * head,
-			struct buffer_head *bh, int rw,
-			int max_sectors)
+			struct list_head * head, struct bio *bio)
 {
-	struct list_head *entry;
-	unsigned int count = bh->b_size >> 9;
-
-	if (list_empty(&q->queue_head))
-		return ELEVATOR_NO_MERGE;
-
-	entry = &q->queue_head;
-	while ((entry = entry->prev) != head) {
-		struct request *__rq = blkdev_entry_to_request(entry);
+	struct bio *bio_hash;
+	struct request *__rq = NULL;
+	int rw, count, ret;
+
+	count = bio_sectors(bio);
+	rw = bio_rw(bio);
+	ret = ELEVATOR_NO_MERGE;
+
+	bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector - count);
+	if (bio_hash) {
+		__rq = bio_hash->bi_req;
+		if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd &&
+		    __rq->sector + __rq->nr_sectors == bio->bi_sector &&
+		   !__rq->waiting && !__rq->special) {
+			ret = ELEVATOR_BACK_MERGE;
+			goto out;
+		}
+	}
 
-		if (__rq->cmd != rw)
-			continue;
-		if (__rq->rq_dev != bh->b_rdev)
-			continue;
-		if (__rq->nr_sectors + count > max_sectors)
-			continue;
-		if (__rq->waiting)
-			continue;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
-			*req = __rq;
-			return ELEVATOR_BACK_MERGE;
-		} else if (__rq->sector - count == bh->b_rsector) {
-			*req = __rq;
-			return ELEVATOR_FRONT_MERGE;
+	bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector + count);
+	if (bio_hash) {
+		__rq = bio_hash->bi_req;
+		if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd &&
+		    __rq->sector - count == bio->bi_sector &&
+		   !__rq->waiting && !__rq->special) {
+			ret = ELEVATOR_FRONT_MERGE;
+			goto out;
 		}
 	}
 
-	*req = blkdev_entry_to_request(q->queue_head.prev);
-	return ELEVATOR_NO_MERGE;
+out:
+	*req = bio->bi_req = __rq;
+	return ret;
 }
 
 void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
@@ -196,16 +238,14 @@
 	return 0;
 }
 
-int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+int blkelvset_ioctl(elevator_t *elevator, const blkelv_ioctl_arg_t *arg)
 {
 	blkelv_ioctl_arg_t input;
 
 	if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
 		return -EFAULT;
 
-	if (input.read_latency < 0)
-		return -EINVAL;
-	if (input.write_latency < 0)
+	if (input.read_latency < 0 || input.write_latency < 0)
 		return -EINVAL;
 
 	elevator->read_latency		= input.read_latency;
@@ -213,10 +253,15 @@
 	return 0;
 }
 
-void elevator_init(elevator_t * elevator, elevator_t type)
+int elevator_init(request_queue_t *q, elevator_t *elevator, elevator_t type)
 {
 	static unsigned int queue_ID;
 
 	*elevator = type;
 	elevator->queue_ID = queue_ID++;
+
+	if (elevator->elevator_init_fn)
+		return elevator->elevator_init_fn(q, elevator);
+
+	return 0;
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/floppy.c linux/drivers/block/floppy.c
--- /opt/kernel/linux-2.4.7/drivers/block/floppy.c	Fri Feb  9 20:30:22 2001
+++ linux/drivers/block/floppy.c	Tue Jul 24 15:37:39 2001
@@ -468,7 +468,7 @@
  */
 static struct floppy_struct user_params[N_DRIVE];
 
-static int floppy_sizes[256];
+static sector_t floppy_sizes[256];
 static int floppy_blocksizes[256];
 
 /*
@@ -570,7 +570,7 @@
 static struct floppy_struct *_floppy = floppy_type;
 static unsigned char current_drive;
 static long current_count_sectors;
-static unsigned char sector_t; /* sector in track */
+static unsigned char fsector_t; /* sector in track */
 static unsigned char in_sector_offset;	/* offset within physical sector,
 					 * expressed in units of 512 bytes */
 
@@ -2282,7 +2282,6 @@
 static void request_done(int uptodate)
 {
 	int block;
-	unsigned long flags;
 
 	probing = 0;
 	reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate);
@@ -2301,7 +2300,6 @@
 			DRS->maxtrack = 1;
 
 		/* unlock chained buffers */
-		spin_lock_irqsave(&io_request_lock, flags);
 		while (current_count_sectors && !QUEUE_EMPTY &&
 		       current_count_sectors >= CURRENT->current_nr_sectors){
 			current_count_sectors -= CURRENT->current_nr_sectors;
@@ -2309,7 +2307,6 @@
 			CURRENT->sector += CURRENT->current_nr_sectors;
 			end_request(1);
 		}
-		spin_unlock_irqrestore(&io_request_lock, flags);
 
 		if (current_count_sectors && !QUEUE_EMPTY){
 			/* "unlock" last subsector */
@@ -2334,9 +2331,7 @@
 			DRWE->last_error_sector = CURRENT->sector;
 			DRWE->last_error_generation = DRS->generation;
 		}
-		spin_lock_irqsave(&io_request_lock, flags);
 		end_request(0);
-		spin_unlock_irqrestore(&io_request_lock, flags);
 	}
 }
 
@@ -2382,7 +2377,7 @@
 		printk("rt=%d t=%d\n", R_TRACK, TRACK);
 		printk("heads=%d eoc=%d\n", heads, eoc);
 		printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK,
-		       sector_t, ssize);
+		       fsector_t, ssize);
 		printk("in_sector_offset=%d\n", in_sector_offset);
 	}
 #endif
@@ -2429,7 +2424,7 @@
 	} else if (CT(COMMAND) == FD_READ){
 		buffer_track = raw_cmd->track;
 		buffer_drive = current_drive;
-		INFBOUND(buffer_max, nr_sectors + sector_t);
+		INFBOUND(buffer_max, nr_sectors + fsector_t);
 	}
 	cont->redo();
 }
@@ -2437,19 +2432,19 @@
 /* Compute maximal contiguous buffer size. */
 static int buffer_chain_size(void)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 	int size;
 	char *base;
 
 	base = CURRENT->buffer;
 	size = CURRENT->current_nr_sectors << 9;
-	bh = CURRENT->bh;
+	bio = CURRENT->bio;
 
-	if (bh){
-		bh = bh->b_reqnext;
-		while (bh && bh->b_data == base + size){
-			size += bh->b_size;
-			bh = bh->b_reqnext;
+	if (bio){
+		bio = bio->bi_next;
+		while (bio && bio_data(bio) == base + size){
+			size += bio_size(bio);
+			bio = bio->bi_next;
 		}
 	}
 	return size >> 9;
@@ -2458,13 +2453,13 @@
 /* Compute the maximal transfer size */
 static int transfer_size(int ssize, int max_sector, int max_size)
 {
-	SUPBOUND(max_sector, sector_t + max_size);
+	SUPBOUND(max_sector, fsector_t + max_size);
 
 	/* alignment */
 	max_sector -= (max_sector % _floppy->sect) % ssize;
 
 	/* transfer size, beginning not aligned */
-	current_count_sectors = max_sector - sector_t ;
+	current_count_sectors = max_sector - fsector_t ;
 
 	return max_sector;
 }
@@ -2475,7 +2470,7 @@
 static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 {
 	int remaining; /* number of transferred 512-byte sectors */
-	struct buffer_head *bh;
+	struct bio *bio;
 	char *buffer, *dma_buffer;
 	int size;
 
@@ -2484,8 +2479,8 @@
 				   CURRENT->nr_sectors);
 
 	if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
-	    buffer_max > sector_t + CURRENT->nr_sectors)
-		current_count_sectors = minimum(buffer_max - sector_t,
+	    buffer_max > fsector_t + CURRENT->nr_sectors)
+		current_count_sectors = minimum(buffer_max - fsector_t,
 						CURRENT->nr_sectors);
 
 	remaining = current_count_sectors << 9;
@@ -2496,7 +2491,7 @@
 		printk("current_count_sectors=%ld\n", current_count_sectors);
 		printk("remaining=%d\n", remaining >> 9);
 		printk("CURRENT->nr_sectors=%ld\n",CURRENT->nr_sectors);
-		printk("CURRENT->current_nr_sectors=%ld\n",
+		printk("CURRENT->current_nr_sectors=%u\n",
 		       CURRENT->current_nr_sectors);
 		printk("max_sector=%d\n", max_sector);
 		printk("ssize=%d\n", ssize);
@@ -2505,9 +2500,9 @@
 
 	buffer_max = maximum(max_sector, buffer_max);
 
-	dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9);
+	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
 
-	bh = CURRENT->bh;
+	bio = CURRENT->bio;
 	size = CURRENT->current_nr_sectors << 9;
 	buffer = CURRENT->buffer;
 
@@ -2519,8 +2514,8 @@
 		    dma_buffer < floppy_track_buffer){
 			DPRINT("buffer overrun in copy buffer %d\n",
 				(int) ((floppy_track_buffer - dma_buffer) >>9));
-			printk("sector_t=%d buffer_min=%d\n",
-			       sector_t, buffer_min);
+			printk("fsector_t=%d buffer_min=%d\n",
+			       fsector_t, buffer_min);
 			printk("current_count_sectors=%ld\n",
 			       current_count_sectors);
 			if (CT(COMMAND) == FD_READ)
@@ -2541,15 +2536,15 @@
 			break;
 
 		dma_buffer += size;
-		bh = bh->b_reqnext;
+		bio = bio->bi_next;
 #ifdef FLOPPY_SANITY_CHECK
-		if (!bh){
+		if (!bio){
 			DPRINT("bh=null in copy buffer after copy\n");
 			break;
 		}
 #endif
-		size = bh->b_size;
-		buffer = bh->b_data;
+		size = bio_size(bio);
+		buffer = bio_data(bio);
 	}
 #ifdef FLOPPY_SANITY_CHECK
 	if (remaining){
@@ -2641,7 +2636,7 @@
 	max_sector = _floppy->sect * _floppy->head;
 
 	TRACK = CURRENT->sector / max_sector;
-	sector_t = CURRENT->sector % max_sector;
+	fsector_t = CURRENT->sector % max_sector;
 	if (_floppy->track && TRACK >= _floppy->track) {
 		if (CURRENT->current_nr_sectors & 1) {
 			current_count_sectors = 1;
@@ -2649,17 +2644,17 @@
 		} else
 			return 0;
 	}
-	HEAD = sector_t / _floppy->sect;
+	HEAD = fsector_t / _floppy->sect;
 
 	if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) &&
-	    sector_t < _floppy->sect)
+	    fsector_t < _floppy->sect)
 		max_sector = _floppy->sect;
 
 	/* 2M disks have phantom sectors on the first track */
 	if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){
 		max_sector = 2 * _floppy->sect / 3;
-		if (sector_t >= max_sector){
-			current_count_sectors = minimum(_floppy->sect - sector_t,
+		if (fsector_t >= max_sector){
+			current_count_sectors = minimum(_floppy->sect - fsector_t,
 							CURRENT->nr_sectors);
 			return 1;
 		}
@@ -2681,7 +2676,7 @@
 	GAP = _floppy->gap;
 	CODE2SIZE;
 	SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
-	SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1;
+	SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1;
 
 	/* tracksize describes the size which can be filled up with sectors
 	 * of size ssize.
@@ -2689,11 +2684,11 @@
 	tracksize = _floppy->sect - _floppy->sect % ssize;
 	if (tracksize < _floppy->sect){
 		SECT_PER_TRACK ++;
-		if (tracksize <= sector_t % _floppy->sect)
+		if (tracksize <= fsector_t % _floppy->sect)
 			SECTOR--;
 
 		/* if we are beyond tracksize, fill up using smaller sectors */
-		while (tracksize <= sector_t % _floppy->sect){
+		while (tracksize <= fsector_t % _floppy->sect){
 			while(tracksize + ssize > _floppy->sect){
 				SIZECODE--;
 				ssize >>= 1;
@@ -2709,12 +2704,12 @@
 		max_sector = _floppy->sect;
 	}
 
-	in_sector_offset = (sector_t % _floppy->sect) % ssize;
-	aligned_sector_t = sector_t - in_sector_offset;
+	in_sector_offset = (fsector_t % _floppy->sect) % ssize;
+	aligned_sector_t = fsector_t - in_sector_offset;
 	max_size = CURRENT->nr_sectors;
 	if ((raw_cmd->track == buffer_track) && 
 	    (current_drive == buffer_drive) &&
-	    (sector_t >= buffer_min) && (sector_t < buffer_max)) {
+	    (fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
 		/* data already in track buffer */
 		if (CT(COMMAND) == FD_READ) {
 			copy_buffer(1, max_sector, buffer_max);
@@ -2722,8 +2717,8 @@
 		}
 	} else if (in_sector_offset || CURRENT->nr_sectors < ssize){
 		if (CT(COMMAND) == FD_WRITE){
-			if (sector_t + CURRENT->nr_sectors > ssize &&
-			    sector_t + CURRENT->nr_sectors < ssize + ssize)
+			if (fsector_t + CURRENT->nr_sectors > ssize &&
+			    fsector_t + CURRENT->nr_sectors < ssize + ssize)
 				max_size = ssize + ssize;
 			else
 				max_size = ssize;
@@ -2736,7 +2731,7 @@
 		int direct, indirect;
 
 		indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) -
-			sector_t;
+			fsector_t;
 
 		/*
 		 * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide
@@ -2751,7 +2746,7 @@
 		if (CROSS_64KB(CURRENT->buffer, max_size << 9))
 			max_size = (K_64 - 
 				    ((unsigned long)CURRENT->buffer) % K_64)>>9;
-		direct = transfer_size(ssize,max_sector,max_size) - sector_t;
+		direct = transfer_size(ssize,max_sector,max_size) - fsector_t;
 		/*
 		 * We try to read tracks, but if we get too many errors, we
 		 * go back to reading just one sector at a time.
@@ -2770,8 +2765,8 @@
 			raw_cmd->length = current_count_sectors << 9;
 			if (raw_cmd->length == 0){
 				DPRINT("zero dma transfer attempted from make_raw_request\n");
-				DPRINT("indirect=%d direct=%d sector_t=%d",
-					indirect, direct, sector_t);
+				DPRINT("indirect=%d direct=%d fsector_t=%d",
+					indirect, direct, fsector_t);
 				return 0;
 			}
 /*			check_dma_crossing(raw_cmd->kernel_data, 
@@ -2789,12 +2784,12 @@
 	/* claim buffer track if needed */
 	if (buffer_track != raw_cmd->track ||  /* bad track */
 	    buffer_drive !=current_drive || /* bad drive */
-	    sector_t > buffer_max ||
-	    sector_t < buffer_min ||
+	    fsector_t > buffer_max ||
+	    fsector_t < buffer_min ||
 	    ((CT(COMMAND) == FD_READ ||
 	      (!in_sector_offset && CURRENT->nr_sectors >= ssize))&&
 	     max_sector > 2 * max_buffer_sectors + buffer_min &&
-	     max_size + sector_t > 2 * max_buffer_sectors + buffer_min)
+	     max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)
 	    /* not enough space */){
 		buffer_track = -1;
 		buffer_drive = current_drive;
@@ -2841,7 +2836,7 @@
 				       floppy_track_buffer) >> 9),
 			       current_count_sectors);
 		printk("st=%d ast=%d mse=%d msi=%d\n",
-		       sector_t, aligned_sector_t, max_sector, max_size);
+		       fsector_t, aligned_sector_t, max_sector, max_size);
 		printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE);
 		printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n",
 		       COMMAND, SECTOR, HEAD, TRACK);
@@ -2859,8 +2854,8 @@
 		    raw_cmd->kernel_data + raw_cmd->length >
 		    floppy_track_buffer + (max_buffer_sectors  << 10)){
 			DPRINT("buffer overrun in schedule dma\n");
-			printk("sector_t=%d buffer_min=%d current_count=%ld\n",
-			       sector_t, buffer_min,
+			printk("fsector_t=%d buffer_min=%d current_count=%ld\n",
+			       fsector_t, buffer_min,
 			       raw_cmd->length >> 9);
 			printk("current_count_sectors=%ld\n",
 			       current_count_sectors);
@@ -2913,8 +2908,6 @@
 		}
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic(DEVICE_NAME ": request list destroyed");
-		if (CURRENT->bh && !buffer_locked(CURRENT->bh))
-			panic(DEVICE_NAME ": block not locked");
 
 		device = CURRENT->rq_dev;
 		set_fdc(DRIVE(device));
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/ida_cmd.h linux/drivers/block/ida_cmd.h
--- /opt/kernel/linux-2.4.7/drivers/block/ida_cmd.h	Mon Dec 11 21:50:39 2000
+++ linux/drivers/block/ida_cmd.h	Tue Jul 24 15:34:38 2001
@@ -96,7 +96,7 @@
 	int	ctlr;
 	struct cmdlist *prev;
 	struct cmdlist *next;
-	struct buffer_head *bh;
+	struct bio *bio;
 	int type;
 } cmdlist_t;
 	
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
--- /opt/kernel/linux-2.4.7/drivers/block/ll_rw_blk.c	Fri Jul 20 05:51:23 2001
+++ linux/drivers/block/ll_rw_blk.c	Tue Jul 24 14:26:33 2001
@@ -6,6 +6,7 @@
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 
 /*
@@ -22,6 +23,7 @@
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
+#include <linux/bootmem.h>
 #include <linux/completion.h>
 
 #include <asm/system.h>
@@ -51,27 +53,13 @@
  */
 DECLARE_TASK_QUEUE(tq_disk);
 
-/*
- * Protect the request list against multiple users..
- *
- * With this spinlock the Linux block IO subsystem is 100% SMP threaded
- * from the IRQ event side, and almost 100% SMP threaded from the syscall
- * side (we still have protect against block device array operations, and
- * the do_request() side is casually still unsafe. The kernel lock protects
- * this part currently.).
- *
- * there is a fair chance that things will work just OK if these functions
- * are called with no global kernel lock held ...
- */
-spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
-
 /* This specifies how many sectors to read ahead on the disk. */
 
 int read_ahead[MAX_BLKDEV];
 
 /* blk_dev_struct is:
- *	*request_fn
- *	*current_request
+ *	request_queue
+ *	*queue
  */
 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
 
@@ -83,7 +71,7 @@
  *
  * if (!blk_size[MAJOR]) then no minor size checking is done.
  */
-int * blk_size[MAX_BLKDEV];
+sector_t *blk_size[MAX_BLKDEV];
 
 /*
  * blksize_size contains the size of all block-devices:
@@ -95,18 +83,9 @@
 int * blksize_size[MAX_BLKDEV];
 
 /*
- * hardsect_size contains the size of the hardware sector of a device.
- *
- * hardsect_size[MAJOR][MINOR]
- *
- * if (!hardsect_size[MAJOR])
- *		then 512 bytes is assumed.
- * else
- *		sector_size is hardsect_size[MAJOR][MINOR]
- * This is currently set by some scsi devices and read by the msdos fs driver.
- * Other uses may appear later.
+ * blk_gendisk contains pointers to the gendisk structures
  */
-int * hardsect_size[MAX_BLKDEV];
+struct gendisk *blk_gendisk[MAX_BLKDEV];
 
 /*
  * The following tunes the read-ahead algorithm in mm/filemap.c
@@ -114,11 +93,6 @@
 int * max_readahead[MAX_BLKDEV];
 
 /*
- * Max number of sectors per request
- */
-int * max_sectors[MAX_BLKDEV];
-
-/*
  * queued sectors for all devices, used to make sure we don't fill all
  * of memory with locked buffers
  */
@@ -130,15 +104,20 @@
 static int high_queued_sectors, low_queued_sectors;
 static int batch_requests, queue_nr_requests;
 static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait);
+unsigned long blk_max_low_pfn;
 
-static inline int get_max_sectors(kdev_t dev)
-{
-	if (!max_sectors[MAJOR(dev)])
-		return MAX_SECTORS;
-	return max_sectors[MAJOR(dev)][MINOR(dev)];
-}
-
-inline request_queue_t *__blk_get_queue(kdev_t dev)
+/**
+ * blk_get_queue: - return the queue that matches the given device
+ * @dev:    device
+ *
+ * Description:
+ *     Given a specific device, return the queue that will hold I/O
+ *     for it. This is either a &struct blk_dev_struct lookup and a
+ *     call to the ->queue() function defined, or the default queue
+ *     stored in the same location.
+ *
+ **/
+inline request_queue_t *blk_get_queue(kdev_t dev)
 {
 	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 
@@ -148,69 +127,6 @@
 		return &blk_dev[MAJOR(dev)].request_queue;
 }
 
-/*
- * NOTE: the device-specific queue() functions
- * have to be atomic!
- */
-request_queue_t *blk_get_queue(kdev_t dev)
-{
-	request_queue_t *ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&io_request_lock,flags);
-	ret = __blk_get_queue(dev);
-	spin_unlock_irqrestore(&io_request_lock,flags);
-
-	return ret;
-}
-
-static int __blk_cleanup_queue(struct list_head *head)
-{
-	struct request *rq;
-	int i = 0;
-
-	if (list_empty(head))
-		return 0;
-
-	do {
-		rq = list_entry(head->next, struct request, table);
-		list_del(&rq->table);
-		kmem_cache_free(request_cachep, rq);
-		i++;
-	} while (!list_empty(head));
-
-	return i;
-}
-
-/**
- * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
- * @q:    the request queue to be released
- *
- * Description:
- *     blk_cleanup_queue is the pair to blk_init_queue().  It should
- *     be called when a request queue is being released; typically
- *     when a block device is being de-registered.  Currently, its
- *     primary task it to free all the &struct request structures that
- *     were allocated to the queue.
- * Caveat: 
- *     Hopefully the low level driver will have finished any
- *     outstanding requests first...
- **/
-void blk_cleanup_queue(request_queue_t * q)
-{
-	int count = queue_nr_requests;
-
-	count -= __blk_cleanup_queue(&q->request_freelist[READ]);
-	count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
-	count -= __blk_cleanup_queue(&q->pending_freelist[READ]);
-	count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]);
-
-	if (count)
-		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
-
-	memset(q, 0, sizeof(*q));
-}
-
 /**
  * blk_queue_headactive - indicate whether head of request queue may be active
  * @q:       The queue which this applies to.
@@ -234,10 +150,9 @@
  *
  *    When a queue is plugged the head will be assumed to be inactive.
  **/
- 
 void blk_queue_headactive(request_queue_t * q, int active)
 {
-	q->head_active = active;
+	set_bit(QUEUE_FLAG_HEADACTIVE, &q->queue_flags);
 }
 
 /**
@@ -246,7 +161,7 @@
  * @mfn: the alternate make_request function
  *
  * Description:
- *    The normal way for &struct buffer_heads to be passed to a device
+ *    The normal way for &struct bios to be passed to a device
  *    driver is for them to be collected into requests on a request
  *    queue, and then to allow the device driver to select requests
  *    off that queue when it is ready.  This works well for many block
@@ -258,19 +173,103 @@
  *
  * Caveat:
  *    The driver that does this *must* be able to deal appropriately
- *    with buffers in "highmemory", either by calling bh_kmap() to get
- *    a kernel mapping, to by calling create_bounce() to create a
- *    buffer in normal memory.
+ *    with buffers in "highmemory". This can be accomplished by either calling
+ *    bio_kmap() to get a temporary kernel mapping, or by calling
+ *    blk_queue_bounce() to create a buffer in normal memory.
  **/
-
 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 {
+	q->max_segments = MAX_SEGMENTS;
 	q->make_request_fn = mfn;
+	blk_queue_max_sectors(q, MAX_SECTORS);
+	blk_queue_hardsect_size(q, 512);
+
+	q->queue_state = Queue_up;
 }
 
-static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+/**
+ * blk_queue_bounce_limit - set bounce buffer limit for queue
+ * @q:  the request queue for the device
+ * @bus_addr:   bus address limit
+ *
+ * Description:
+ *    Different hardware can have different requirements as to what pages
+ *    it can do I/O directly to. A low level driver can call
+ *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
+ *    buffers for doing I/O to pages residing above @page. By default
+ *    the block layer sets this to the highest numbered "low" memory page, ie
+ *    one the driver can still call bio_page() and get a valid address on.
+ **/
+void blk_queue_bounce_limit(request_queue_t *q, unsigned long long dma_addr)
+{
+	q->bounce_limit = mem_map + (dma_addr >> PAGE_SHIFT);
+
+	/*
+	 * set page alloc gfp mask for bounce pages
+	 */
+	q->bounce_gfp = GFP_NOIO;
+
+	/*
+	 * until the zoning design is decided on, always go low when
+	 * getting a bounce page
+	 */
+#if 0
+	if (dma_addr > BLK_BOUNCE_HIGH)
+		q->bounce_gfp |= __GFP_DMA32;
+#endif
+}
+
+/**
+ * blk_queue_max_sectors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
+ * @max_sectors:  max sectors in the usual 512b unit
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the size of
+ *    received requests.
+ **/
+void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
+{
+	q->max_sectors = max_sectors;
+}
+
+/**
+ * blk_queue_max_segments - set max segments for a request for this queue
+ * @q:  the request queue for the device
+ * @max_segments:  max number of segments
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the number of
+ *    data segments in a request
+ **/
+void blk_queue_max_segments(request_queue_t *q, unsigned short max_segments)
 {
-	if (req->nr_segments < max_segments) {
+	q->max_segments = max_segments;
+}
+
+/**
+ * blk_queue_hardsect_size - set hardware sector size for the queue
+ * @q:  the request queue for the device
+ * @size:  the hardware sector size, in bytes
+ *
+ * Description:
+ *   This should typically be set to the lowest possible sector size
+ *   that the hardware can operate on (possible without reverting to
+ *   even internal read-modify-write operations). Usually the default
+ *   of 512 covers most hardware.
+ **/
+void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
+{
+	q->hardsect_size = size;
+}
+
+/*
+ * the standard queue merge functions, can be overridden with device
+ * specific ones if so desired
+ */
+static inline int ll_new_segment(request_queue_t *q, struct request *req)
+{
+	if (req->nr_segments < q->max_segments) {
 		req->nr_segments++;
 		return 1;
 	}
@@ -278,36 +277,65 @@
 }
 
 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
-			    struct buffer_head *bh, int max_segments)
+			    struct bio *bio)
 {
-	if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+	if (req->nr_sectors + bio_size(bio) > q->max_sectors)
+		return 0;
+	if (BIO_CONTIG(req->biotail, bio))
 		return 1;
-	return ll_new_segment(q, req, max_segments);
+
+	return ll_new_segment(q, req);
 }
 
 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
-			     struct buffer_head *bh, int max_segments)
+			     struct bio *bio)
 {
-	if (bh->b_data + bh->b_size == req->bh->b_data)
+	if (req->nr_sectors + bio_size(bio) > q->max_sectors)
+		return 0;
+	if (BIO_CONTIG(bio, req->bio))
 		return 1;
-	return ll_new_segment(q, req, max_segments);
+
+	return ll_new_segment(q, req);
 }
 
 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
-				struct request *next, int max_segments)
+				struct request *next)
 {
 	int total_segments = req->nr_segments + next->nr_segments;
 
-	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+	if (BIO_CONTIG(req->biotail, next->bio))
 		total_segments--;
     
-	if (total_segments > max_segments)
+	if (total_segments > q->max_segments)
 		return 0;
 
 	req->nr_segments = total_segments;
 	return 1;
 }
 
+/**
+ * blk_wake_queue - restart a queue that wasn't fully emptied at request_fn time
+ * @q:    The &request_queue_t in question
+ *
+ * Description:
+ *   Sometimes hardware can run out of resources, so no more commands can
+ *   be queued. If a driver breaks out of request_fn while there are still
+ *   requests left on there to be serviced, it will be left in a state where
+ *   it is still unplugged but not be recalled by the block layer. 
+ *   not be replugged, and thus request_fn will be run. Once a driver has
+ *   freed enough resources to start queueing new requests again, it must
+ *   call blk_wake_queue to start processing again.
+ **/
+void inline blk_wake_queue(request_queue_t *q)
+{
+#if 1
+	if (!blk_set_plugged(q))
+		queue_task(&q->plug_tq, &tq_disk);
+#else
+	q->request_fn(q);
+#endif
+}
+
 /*
  * "plug" the device if there are no outstanding requests: this will
  * force the transfer to start only after we have put all the requests
@@ -316,16 +344,12 @@
  * This is called with interrupts off and no requests on the queue.
  * (and with the request spinlock acquired)
  */
-static void generic_plug_device(request_queue_t *q, kdev_t dev)
+static void blk_plug_device(request_queue_t *q)
 {
-	/*
-	 * no need to replug device
-	 */
-	if (!list_empty(&q->queue_head) || q->plugged)
+	if (!list_empty(&q->queue_head))
 		return;
 
-	q->plugged = 1;
-	queue_task(&q->plug_tq, &tq_disk);
+	blk_wake_queue(q);
 }
 
 /*
@@ -333,24 +357,91 @@
  */
 static inline void __generic_unplug_device(request_queue_t *q)
 {
-	if (q->plugged) {
-		q->plugged = 0;
-		if (!list_empty(&q->queue_head))
-			q->request_fn(q);
-	}
+	if (blk_set_unplugged(q) && !list_empty(&q->queue_head))
+		q->request_fn(q);
 }
 
+/**
+ * generic_unplug_device - fire a request queue
+ * @q:    The &request_queue_t in question
+ *
+ * Description:
+ *   Linux uses plugging to build bigger requests queues before letting
+ *   the device have at them. If a queue is plugged, the I/O scheduler
+ *   is still adding and merging requests on the queue. Once the queue
+ *   gets unplugged (either by manually calling this function, or by
+ *   running the tq_disk task queue), the request_fn defined for the
+ *   queue is invoked and transfers started.
+ **/
 void generic_unplug_device(void *data)
 {
 	request_queue_t *q = (request_queue_t *) data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&q->queue_lock, flags);
 	__generic_unplug_device(q);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 
-static void blk_init_free_list(request_queue_t *q)
+static int __blk_cleanup_rqlist(struct list_head *head)
+{
+	struct request *rq;
+	int i = 0;
+
+	if (list_empty(head))
+		return 0;
+
+	do {
+		rq = list_entry(head->next, struct request, queuelist);
+		list_del(&rq->queuelist);
+		kmem_cache_free(request_cachep, rq);
+		i++;
+	} while (!list_empty(head));
+
+	return i;
+}
+
+static int __blk_cleanup_queue(request_queue_t *q)
+{
+	int count;
+
+	count = __blk_cleanup_rqlist(&q->request_freelist[READ]);
+	count += __blk_cleanup_rqlist(&q->request_freelist[WRITE]);
+	count += __blk_cleanup_rqlist(&q->pending_freelist[READ]);
+	count += __blk_cleanup_rqlist(&q->pending_freelist[WRITE]);
+
+	return count;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q:    the request queue to be released
+ *
+ * Description:
+ *     blk_cleanup_queue is the pair to blk_init_queue().  It should
+ *     be called when a request queue is being released; typically
+ *     when a block device is being de-registered.  Currently, its
+ *     primary task it to free all the &struct request structures that
+ *     were allocated to the queue.
+ * Caveat: 
+ *     Hopefully the low level driver will have finished any
+ *     outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+	int count = queue_nr_requests;
+
+	count -= __blk_cleanup_queue(q);
+
+	if (count)
+		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+
+	bio_hash_cleanup(&q->queue_hash);
+
+	memset(q, 0, sizeof(*q));
+}
+
+static int blk_init_free_list(request_queue_t *q)
 {
 	struct request *rq;
 	int i;
@@ -366,21 +457,27 @@
 	 */
 	for (i = 0; i < queue_nr_requests; i++) {
 		rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
-		if (rq == NULL) {
-			/* We'll get a `leaked requests' message from blk_cleanup_queue */
-			printk(KERN_EMERG "blk_init_free_list: error allocating requests\n");
-			break;
-		}
+		if (!rq)
+			goto nomem;
+
 		memset(rq, 0, sizeof(struct request));
 		rq->rq_status = RQ_INACTIVE;
-		list_add(&rq->table, &q->request_freelist[i & 1]);
+		if (i < queue_nr_requests >> 1)
+			list_add(&rq->queuelist, &q->request_freelist[READ]);
+		else
+			list_add(&rq->queuelist, &q->request_freelist[WRITE]);
 	}
 
-	init_waitqueue_head(&q->wait_for_request);
+	init_waitqueue_head(&q->wait_for_request[READ]);
+	init_waitqueue_head(&q->wait_for_request[WRITE]);
 	spin_lock_init(&q->queue_lock);
+	return 0;
+nomem:
+	__blk_cleanup_queue(q);
+	return 1;
 }
 
-static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+static int __make_request(request_queue_t *, struct bio *);
 
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
@@ -403,8 +500,8 @@
  *    requests on the queue, it is responsible for arranging that the requests
  *    get dealt with eventually.
  *
- *    A global spin lock $io_request_lock must be held while manipulating the
- *    requests on the request queue.
+ *    The queue spin lock must be held while manipulating the requests on the
+ *    request queue.
  *
  *    The request on the head of the queue is by default assumed to be
  *    potentially active, and it is not considered for re-ordering or merging
@@ -415,33 +512,49 @@
  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
-void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+int blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 {
+	int ret = -ENOMEM;
+
 	INIT_LIST_HEAD(&q->queue_head);
-	elevator_init(&q->elevator, ELEVATOR_LINUS);
-	blk_init_free_list(q);
+
+	if (blk_init_free_list(q))
+		goto out_err;
+
+	if (bio_hash_init(&q->queue_hash, queue_nr_requests >> 2))
+		goto cleanup_queue;
+
+	if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS)))
+		goto cleanup_hash;
+
 	q->request_fn     	= rfn;
 	q->back_merge_fn       	= ll_back_merge_fn;
 	q->front_merge_fn      	= ll_front_merge_fn;
 	q->merge_requests_fn	= ll_merge_requests_fn;
-	q->make_request_fn	= __make_request;
 	q->plug_tq.sync		= 0;
 	q->plug_tq.routine	= &generic_unplug_device;
 	q->plug_tq.data		= q;
-	q->plugged        	= 0;
+
 	/*
-	 * These booleans describe the queue properties.  We set the
-	 * default (and most common) values here.  Other drivers can
-	 * use the appropriate functions to alter the queue properties.
-	 * as appropriate.
+	 * by default assume old behaviour and bounce for any highmem page
 	 */
-	q->plug_device_fn 	= generic_plug_device;
-	q->head_active    	= 1;
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
+	blk_queue_make_request(q, __make_request);
+	blk_set_unplugged(q);
+	blk_mark_headactive(q);
+	return 0;
+cleanup_hash:
+	bio_hash_cleanup(&q->queue_hash);
+cleanup_queue:
+	blk_cleanup_queue(q);
+out_err:
+	return ret;
 }
 
-#define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
 /*
- * Get a free request. io_request_lock must be held and interrupts
+ * Get a free request. queue lock must be held and interrupts
  * disabled on the way in.
  */
 static inline struct request *get_request(request_queue_t *q, int rw)
@@ -450,7 +563,7 @@
 
 	if (!list_empty(&q->request_freelist[rw])) {
 		rq = blkdev_free_rq(&q->request_freelist[rw]);
-		list_del(&rq->table);
+		list_del(&rq->queuelist);
 		rq->rq_status = RQ_ACTIVE;
 		rq->special = NULL;
 		rq->q = q;
@@ -467,34 +580,24 @@
 	register struct request *rq;
 	DECLARE_WAITQUEUE(wait, current);
 
-	add_wait_queue_exclusive(&q->wait_for_request, &wait);
+	spin_lock_prefetch(&q->queue_lock);
+
+	add_wait_queue_exclusive(&q->wait_for_request[rw], &wait);
 	for (;;) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&q->queue_lock);
 		rq = get_request(q, rw);
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&q->queue_lock);
 		if (rq)
 			break;
 		generic_unplug_device(q);
 		schedule();
 	}
-	remove_wait_queue(&q->wait_for_request, &wait);
+	remove_wait_queue(&q->wait_for_request[rw], &wait);
 	current->state = TASK_RUNNING;
 	return rq;
 }
 
-static inline struct request *get_request_wait(request_queue_t *q, int rw)
-{
-	register struct request *rq;
-
-	spin_lock_irq(&io_request_lock);
-	rq = get_request(q, rw);
-	spin_unlock_irq(&io_request_lock);
-	if (rq)
-		return rq;
-	return __get_request_wait(q, rw);
-}
-
 /* RO fail safe mechanism */
 
 static long ro_bits[MAX_BLKDEV][8];
@@ -543,7 +646,7 @@
 
 /*
  * add-request adds a request to the linked list.
- * io_request_lock is held and interrupts disabled, as we muck with the
+ * queue lock is held and interrupts disabled, as we muck with the
  * request queue list.
  *
  * By this point, req->cmd is always either READ/WRITE, never READA,
@@ -552,18 +655,19 @@
 static inline void add_request(request_queue_t * q, struct request * req,
 			       struct list_head *insert_here)
 {
+	elevator_t *e = &q->elevator;
+
 	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 
-	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
-		spin_unlock_irq(&io_request_lock);
+	if (!blk_queue_plugged(q) && blk_queue_headlive(q)
+	    && insert_here == &q->queue_head)
 		BUG();
-	}
 
 	/*
 	 * elevator indicated where it wants this request to be
 	 * inserted at elevator_merge time
 	 */
-	list_add(&req->queue, insert_here);
+	e->elevator_add_req_fn(q, req, insert_here);
 }
 
 inline void blk_refill_freelist(request_queue_t *q, int rw)
@@ -576,7 +680,7 @@
 }
 
 /*
- * Must be called with io_request_lock held and interrupts disabled
+ * Must be called with queue lock held and interrupts disabled
  */
 inline void blkdev_release_request(struct request *req)
 {
@@ -601,12 +705,12 @@
 		/*
 		 * Add to pending free list and batch wakeups
 		 */
-		list_add(&req->table, &q->pending_freelist[rw]);
+		list_add(&req->queuelist, &q->pending_freelist[rw]);
 
 		if (++q->pending_free[rw] >= batch_requests) {
 			int wake_up = q->pending_free[rw];
 			blk_refill_freelist(q, rw);
-			wake_up_nr(&q->wait_for_request, wake_up);
+			wake_up_nr(&q->wait_for_request[rw], wake_up);
 		}
 	}
 }
@@ -614,10 +718,7 @@
 /*
  * Has to be called with the request spinlock acquired
  */
-static void attempt_merge(request_queue_t * q,
-			  struct request *req,
-			  int max_sectors,
-			  int max_segments)
+static void attempt_merge(request_queue_t *q, struct request *req)
 {
 	struct request *next;
   
@@ -626,8 +727,8 @@
 		return;
 	if (req->cmd != next->cmd
 	    || req->rq_dev != next->rq_dev
-	    || req->nr_sectors + next->nr_sectors > max_sectors
-	    || next->waiting)
+	    || req->nr_sectors + next->nr_sectors > q->max_sectors
+	    || next->waiting || next->special)
 		return;
 	/*
 	 * If we are not allowed to merge these requests, then
@@ -635,135 +736,135 @@
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if (!q->merge_requests_fn(q, req, next, max_segments))
-		return;
+	if (q->merge_requests_fn(q, req, next)) {
+		q->elevator.elevator_merge_req_fn(req, next);
+		req->biotail->bi_next = next->bio;
+		req->biotail = next->biotail;
+		req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+		blkdev_dequeue_request(next);
+		blkdev_release_request(next);
+	}
+}
 
-	q->elevator.elevator_merge_req_fn(req, next);
-	req->bhtail->b_reqnext = next->bh;
-	req->bhtail = next->bhtail;
-	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
-	list_del(&next->queue);
-	blkdev_release_request(next);
+static inline void attempt_back_merge(request_queue_t *q, struct request *rq)
+{
+	if (&rq->queuelist != q->queue_head.prev)
+		attempt_merge(q, rq);
 }
 
-static inline void attempt_back_merge(request_queue_t * q,
-				      struct request *req,
-				      int max_sectors,
-				      int max_segments)
+static inline void attempt_front_merge(request_queue_t *q,
+				       struct list_head *head,
+				       struct request *rq)
 {
-	if (&req->queue == q->queue_head.prev)
-		return;
-	attempt_merge(q, req, max_sectors, max_segments);
+	struct list_head *prev = rq->queuelist.prev;
+
+	if (prev != head)
+		attempt_merge(q, blkdev_entry_to_request(prev));
 }
 
-static inline void attempt_front_merge(request_queue_t * q,
-				       struct list_head * head,
-				       struct request *req,
-				       int max_sectors,
-				       int max_segments)
+static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq)
+{
+	if (rq->queuelist.next != &q->queue_head)
+		attempt_merge(q, rq);
+}
+/**
+ * blk_attempt_remerge  - attempt to remerge active head with next request
+ * @q:    The &request_queue_t belonging to the device
+ * @rq:   The head request (usually)
+ *
+ * Description:
+ *    For head-active devices, the queue can easily be unplugged so quickly
+ *    that proper merging is not done on the front request. This may hurt
+ *    performance greatly for some devices. The block layer cannot safely
+ *    do merging on that first request for these queues, but the driver can
+ *    call this function and make it happen any way. Only the driver knows
+ *    when it is safe to do so.
+ **/
+void blk_attempt_remerge(request_queue_t *q, struct request *rq)
 {
-	struct list_head * prev;
+	unsigned long flags;
 
-	prev = req->queue.prev;
-	if (head == prev)
-		return;
-	attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+	spin_lock_irqsave(&q->queue_lock, flags);
+	__blk_attempt_remerge(q, rq);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 
-static int __make_request(request_queue_t * q, int rw,
-				  struct buffer_head * bh)
+static int __make_request(request_queue_t *q, struct bio *bio)
 {
-	unsigned int sector, count;
-	int max_segments = MAX_SEGMENTS;
-	struct request * req, *freereq = NULL;
-	int rw_ahead, max_sectors, el_ret;
+	struct request *req, *freereq = NULL;
+	int el_ret, latency = 0, rw, count;
 	struct list_head *head, *insert_here;
-	int latency;
 	elevator_t *elevator = &q->elevator;
+	sector_t sector;
 
-	count = bh->b_size >> 9;
-	sector = bh->b_rsector;
+	sector = bio->bi_sector;
+	count = bio_sectors(bio);
+	rw = bio_rw(bio);
 
-	rw_ahead = 0;	/* normal case; gets changed below for READA */
-	switch (rw) {
-		case READA:
-			rw_ahead = 1;
-			rw = READ;	/* drop into READ */
-		case READ:
-		case WRITE:
-			latency = elevator_request_latency(elevator, rw);
-			break;
-		default:
-			BUG();
-			goto end_io;
-	}
-
-	/* We'd better have a real physical mapping!
-	   Check this bit only if the buffer was dirty and just locked
-	   down by us so at this point flushpage will block and
-	   won't clear the mapped bit under us. */
-	if (!buffer_mapped(bh))
-		BUG();
-
-	/*
-	 * Temporary solution - in 2.5 this will be done by the lowlevel
-	 * driver. Create a bounce buffer if the buffer data points into
-	 * high memory - keep the original buffer otherwise.
-	 */
-#if CONFIG_HIGHMEM
-	bh = create_bounce(rw, bh);
-#endif
+	latency = elevator_request_latency(elevator, rw);
 
-/* look for a free request. */
 	/*
-	 * Try to coalesce the new request with old requests
+	 * low level driver can indicate that it wants pages above a
+	 * certain limit bounced to low memory (ie for highmem, or even
+	 * ISA dma in theory)
 	 */
-	max_sectors = get_max_sectors(bh->b_rdev);
+	bio = blk_queue_bounce(q, bio);
 
 again:
+	spin_lock_prefetch(&q->queue_lock);
 	req = NULL;
 	head = &q->queue_head;
+
+	spin_lock_irq(&q->queue_lock);
+
 	/*
-	 * Now we acquire the request spinlock, we have to be mega careful
-	 * not to schedule or do something nonatomic
+	 * barrier write must not be passed - so insert with 0 latency
+	 * and invalidate the entire existing merge hash
 	 */
-	spin_lock_irq(&io_request_lock);
+	if ((bio->bi_flags & BIO_BARRIER) && !freereq) {
+		latency = 0;
+		__bio_hash_inval(&q->queue_hash);
+	}
 
 	insert_here = head->prev;
 	if (list_empty(head)) {
-		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+		blk_plug_device(q);
 		goto get_rq;
-	} else if (q->head_active && !q->plugged)
+	} else if (blk_queue_headlive(q) && !blk_queue_plugged(q))
 		head = head->next;
 
-	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bio);
 	switch (el_ret) {
-
 		case ELEVATOR_BACK_MERGE:
-			if (!q->back_merge_fn(q, req, bh, max_segments))
+			if (!q->back_merge_fn(q, req, bio))
 				break;
 			elevator->elevator_merge_cleanup_fn(q, req, count);
-			req->bhtail->b_reqnext = bh;
-			req->bhtail = bh;
+			req->biotail->bi_next = bio;
+			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += count;
 			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
-			attempt_back_merge(q, req, max_sectors, max_segments);
+			attempt_back_merge(q, req);
 			goto out;
 
 		case ELEVATOR_FRONT_MERGE:
-			if (!q->front_merge_fn(q, req, bh, max_segments))
+			if (!q->front_merge_fn(q, req, bio))
 				break;
 			elevator->elevator_merge_cleanup_fn(q, req, count);
-			bh->b_reqnext = req->bh;
-			req->bh = bh;
-			req->buffer = bh->b_data;
-			req->current_nr_sectors = count;
+			bio->bi_next = req->bio;
+			req->bio = bio;
+			/*
+			 * may not be valid. if the low level driver said
+			 * it didn't need a bounce buffer then it better
+			 * not touch req->buffer either...
+			 */
+			req->buffer = bio_data(bio);
+			req->current_nr_sectors = req->hard_cur_sectors = count;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += count;
 			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
-			attempt_front_merge(q, head, req, max_sectors, max_segments);
+			attempt_front_merge(q, head, req);
 			goto out;
 
 		/*
@@ -776,7 +877,7 @@
 			 * of the queue
 			 */
 			if (req)
-				insert_here = &req->queue;
+				insert_here = &req->queuelist;
 			break;
 
 		default:
@@ -794,107 +895,140 @@
 		req = freereq;
 		freereq = NULL;
 	} else if ((req = get_request(q, rw)) == NULL) {
-		spin_unlock_irq(&io_request_lock);
-		if (rw_ahead)
+		spin_unlock_irq(&q->queue_lock);
+		if (bio->bi_flags & BIO_RW_AHEAD) {
+			bio->bi_flags |= BIO_RW_BLOCK;
 			goto end_io;
+		}
 
 		freereq = __get_request_wait(q, rw);
 		goto again;
 	}
 
+	bio->bi_req = req;
+
 /* fill up the request-info, and add it to the queue */
 	req->elevator_sequence = latency;
 	req->cmd = rw;
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
 	req->hard_nr_sectors = req->nr_sectors = count;
-	req->current_nr_sectors = count;
+	req->current_nr_sectors = req->hard_cur_sectors = count;
 	req->nr_segments = 1; /* Always 1 for a new request. */
 	req->nr_hw_segments = 1; /* Always 1 for a new request. */
-	req->buffer = bh->b_data;
+	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->waiting = NULL;
-	req->bh = bh;
-	req->bhtail = bh;
-	req->rq_dev = bh->b_rdev;
+	req->bio = req->biotail = bio;
+	req->rq_dev = bio->bi_dev;
 	blk_started_io(count);
 	add_request(q, req, insert_here);
 out:
 	if (freereq)
 		blkdev_release_request(freereq);
-	spin_unlock_irq(&io_request_lock);
+	if (__bio_hash_add_unique(&q->queue_hash, bio))
+		printk("ll_rw_blk: %lu for %s already there\n", bio->bi_sector, kdevname(bio->bi_dev));
+	spin_unlock_irq(&q->queue_lock);
 	return 0;
 end_io:
-	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+	bio->bi_end_io(bio);
 	return 0;
 }
 
+
+/*
+ * If bio->bi_dev is a partition, remap the location
+ */
+static inline void blk_partition_remap(struct bio *bio)
+{
+	int major, minor, drive, minor0;
+	struct gendisk *g;
+	kdev_t dev0;
+
+	major = MAJOR(bio->bi_dev);
+	if ((g = blk_gendisk[major])) {
+		minor = MINOR(bio->bi_dev);
+		drive = (minor >> g->minor_shift);
+		minor0 = (drive << g->minor_shift); /* whole disk device */
+		/* that is, minor0 = (minor & ~((1<<g->minor_shift)-1)); */
+		dev0 = MKDEV(major, minor0);
+		if (dev0 != bio->bi_dev) {
+			bio->bi_dev = dev0;
+			bio->bi_sector += g->part[minor].start_sect;
+		}
+		/* lots of checks are possible */
+	}
+}
+
 /**
- * generic_make_request: hand a buffer head to it's device driver for I/O
- * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
- * @bh:  The buffer head describing the location in memory and on the device.
+ * generic_make_request: hand a buffer to it's device driver for I/O
+ * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct buffer_head and a &rw value.  The
- * %READ and %WRITE options are (hopefully) obvious in meaning.  The
- * %READA value means that a read is required, but that the driver is
- * free to fail the request if, for example, it cannot get needed
- * resources immediately.
+ * devices. It is passed a &struct bio, which describes the I/O that needs
+ * to be done.
  *
  * generic_make_request() does not return any status.  The
  * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bh->b_end_io
+ * completion, is delivered asynchronously through the bio->bi_end_io
  * function described (one day) else where.
  *
- * The caller of generic_make_request must make sure that b_page,
- * b_addr, b_size are set to describe the memory buffer, that b_rdev
- * and b_rsector are set to describe the device address, and the
- * b_end_io and optionally b_private are set to describe how
- * completion notification should be signaled.  BH_Mapped should also
- * be set (to confirm that b_dev and b_blocknr are valid).
- *
- * generic_make_request and the drivers it calls may use b_reqnext,
- * and may change b_rdev and b_rsector.  So the values of these fields
+ * The caller of generic_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffer, and that bi_dev and bi_sector are
+ & set to describe the device address, and the
+ * bi_end_io and optionally bi_private are set to describe how
+ * completion notification should be signaled.
+ *
+ * generic_make_request and the drivers it calls may use bi_next if this
+ * bio happens to be merged with someone else, and may change bi_dev and
+ * bi_rsector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
- * Because of this, the caller should record the device address
- * information in b_dev and b_blocknr.
  *
- * Apart from those fields mentioned above, no other fields, and in
- * particular, no other flags, are changed by generic_make_request or
- * any lower level drivers.
  * */
-void generic_make_request (int rw, struct buffer_head * bh)
+void generic_make_request(struct bio *bio)
 {
-	int major = MAJOR(bh->b_rdev);
-	int minorsize = 0;
+	int major = MAJOR(bio->bi_dev);
+	int minor = MINOR(bio->bi_dev);
 	request_queue_t *q;
+	int rw = bio_rw(bio);
+	sector_t minorsize = 0;
 
-	if (!bh->b_end_io)
-		BUG();
+	/*
+	 * don't lock any more buffers if we are above the high
+	 * water mark. instead start I/O on the queued stuff.
+	 */
+	if (atomic_read(&queued_sectors) >= high_queued_sectors) {
+		if (bio->bi_flags & BIO_RW_AHEAD) {
+			bio->bi_flags |= BIO_RW_BLOCK;
+			goto end_io;
+		}
+		run_task_queue(&tq_disk);
+		wait_event(blk_buffers_wait,
+			atomic_read(&queued_sectors) < low_queued_sectors);
+	}
 
-	/* Test device size, when known. */
+	/* Test device or partition size, when known. */
 	if (blk_size[major])
-		minorsize = blk_size[major][MINOR(bh->b_rdev)];
+		minorsize = blk_size[major][minor];
 	if (minorsize) {
 		unsigned long maxsector = (minorsize << 1) + 1;
-		unsigned long sector = bh->b_rsector;
-		unsigned int count = bh->b_size >> 9;
+		unsigned long sector = bio->bi_sector;
+		unsigned int count = bio_sectors(bio);
 
 		if (maxsector < count || maxsector - count < sector) {
-			/* Yecch */
-			bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
-
-			/* This may well happen - the kernel calls bread()
-			   without checking the size of the device, e.g.,
-			   when mounting a device. */
-			printk(KERN_INFO
-			       "attempt to access beyond end of device\n");
-			printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
-			       kdevname(bh->b_rdev), rw,
-			       (sector + count)>>1, minorsize);
-
-			/* Yecch again */
-			bh->b_end_io(bh, 0);
-			return;
+			if (blk_size[major][minor]) {
+				
+				/* This may well happen - the kernel calls
+				 * bread() without checking the size of the
+				 * device, e.g., when mounting a device. */
+				printk(KERN_INFO
+				       "attempt to access beyond end of device\n");
+				printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%Lu\n",
+				       kdevname(bio->bi_dev), rw,
+				       (sector + count)>>1,
+				       (u64) blk_size[major][minor]);
+			}
+			bio->bi_flags |= BIO_EOF;
+			goto end_io;
 		}
 	}
 
@@ -902,63 +1036,124 @@
 	 * Resolve the mapping until finished. (drivers are
 	 * still free to implement/resolve their own stacking
 	 * by explicitly returning 0)
-	 */
-	/* NOTE: we don't repeat the blk_size check for each new device.
+	 *
+	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	do {
-		q = blk_get_queue(bh->b_rdev);
+		q = blk_get_queue(bio->bi_dev);
 		if (!q) {
 			printk(KERN_ERR
-			       "generic_make_request: Trying to access "
-			       "nonexistent block-device %s (%ld)\n",
-			       kdevname(bh->b_rdev), bh->b_rsector);
-			buffer_IO_error(bh);
+			       "generic_make_request: Trying to access nonexistent block-device %s (%Lu)\n",
+			       kdevname(bio->bi_dev), (u64) bio->bi_sector);
+end_io:
+			bio->bi_end_io(bio);
 			break;
 		}
-	} while (q->make_request_fn(q, rw, bh));
+
+		/*
+		 * just a reminder, will be changed of course
+		 */
+		if (q->queue_state != Queue_up)
+			printk("ll_rw_blk: request for downed queue\n");
+
+		/*
+		 * If this device has partitions, remap block n
+		 * of partition p to block n+start(p) of the disk.
+		 */
+		blk_partition_remap(bio);
+
+	} while (q->make_request_fn(q, bio));
 }
 
+/*
+ * our default bio end_io callback handler for a buffer_head mapping. it's
+ * pretty simple, because no bio will ever contain more than one bio_vec
+ */
+static void end_bio_bh_io_sync(struct bio *bio)
+{
+	struct buffer_head *bh = bio->bi_private;
+
+	bh->b_end_io(bh, bio->bi_flags & BIO_UPTODATE);
+	bio_put(bio);
+}
 
 /**
- * submit_bh: submit a buffer_head to the block device later for I/O
+ * submit_bio: submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
- * @bh: The &struct buffer_head which describes the I/O
+ * @bio: The &struct bio which describes the I/O
  *
- * submit_bh() is very similar in purpose to generic_make_request(), and
- * uses that function to do most of the work.
+ * submit_bio() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work. Both are fairly rough
+ * interfaces, @bio must be presetup and ready for I/O.
  *
- * The extra functionality provided by submit_bh is to determine
- * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
- * This is is appropriate for IO requests that come from the buffer
- * cache and page cache which (currently) always use aligned blocks.
  */
+void submit_bio(int rw, struct bio *bio)
+{
+	int count = bio_sectors(bio);
+
+	/*
+	 * do some validity checks...
+	 */
+	if (!bio->bi_end_io)
+		BUG();
+	if (bio_size(bio) > PAGE_SIZE) {
+		printk("bio: invalid size %d\n", bio_size(bio));
+		BUG();
+	} else if ((bio_offset(bio) + bio_size(bio)) > PAGE_SIZE) {
+		printk("bio: size/off %d/%d\n", bio_size(bio), bio_offset(bio));
+		BUG();
+	}
+
+	if (rw & WRITE) {
+		kstat.pgpgout += count;
+		bio->bi_flags |= BIO_WRITE;
+	} else {
+		kstat.pgpgin += count;
+		bio->bi_flags |= BIO_READ;
+		if (rw == READA)
+			bio->bi_flags |= BIO_RW_AHEAD;
+	}
+
+	generic_make_request(bio);
+}
+
+/**
+ * submit_bh: submit a buffer_head to the block device layer for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ **/
 void submit_bh(int rw, struct buffer_head * bh)
 {
-	int count = bh->b_size >> 9;
+	struct bio *bio;
 
 	if (!test_bit(BH_Lock, &bh->b_state))
 		BUG();
+	if (!buffer_mapped(bh))
+		BUG();
+	if (!bh->b_end_io)
+		BUG();
 
 	set_bit(BH_Req, &bh->b_state);
 
 	/*
-	 * First step, 'identity mapping' - RAID or LVM might
-	 * further remap this.
+	 * from here on down, it's all bio -- do the initial mapping,
+	 * submit_bio -> generic_make_request may further map this bio around
 	 */
-	bh->b_rdev = bh->b_dev;
-	bh->b_rsector = bh->b_blocknr * count;
+	bio = bio_alloc(GFP_NOIO);
 
-	generic_make_request(rw, bh);
+	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_dev = bh->b_dev;
+	bio->bi_next = NULL;
+	bio->bi_private = bh;
+	bio->bi_end_io = end_bio_bh_io_sync;
+
+	bio->bi_io_vec.bv_page = bh->b_page;
+	bio->bi_io_vec.bv_len = bh->b_size;
+	bio->bi_io_vec.bv_offset = bh_offset(bh);
 
-	switch (rw) {
-		case WRITE:
-			kstat.pgpgout += count;
-			break;
-		default:
-			kstat.pgpgin += count;
-			break;
-	}
+	submit_bio(rw, bio);
 }
 
 /**
@@ -990,8 +1185,9 @@
  *
  * Caveat:
  *  All of the buffers must be for the same device, and must also be
- *  of the current approved size for the device.  */
-
+ *  a multiple of the current approved size for the device.
+ *
+ **/
 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
 {
 	unsigned int major;
@@ -1014,7 +1210,7 @@
 	/* Verify requested block sizes. */
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
-		if (bh->b_size % correct_size) {
+		if (bh->b_size & (correct_size - 1)) {
 			printk(KERN_NOTICE "ll_rw_block: device %s: "
 			       "only %d-char blocks implemented (%u)\n",
 			       kdevname(bhs[0]->b_dev),
@@ -1032,16 +1228,6 @@
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
 
-		/*
-		 * don't lock any more buffers if we are above the high
-		 * water mark. instead start I/O on the queued stuff.
-		 */
-		if (atomic_read(&queued_sectors) >= high_queued_sectors) {
-			run_task_queue(&tq_disk);
-			wait_event(blk_buffers_wait,
-			 atomic_read(&queued_sectors) < low_queued_sectors);
-		}
-
 		/* Only one thread can actually submit the I/O. */
 		if (test_and_set_bit(BH_Lock, &bh->b_state))
 			continue;
@@ -1086,11 +1272,47 @@
 #endif
 
 
+inline int __end_that_request_first(struct request *req, int uptodate)
+{
+	struct bio *bio;
+	int nsect;
+
+	req->errors = 0;
+	if (!uptodate)
+		printk("end_request: I/O error, dev %s, sector %lu\n",
+			kdevname(req->rq_dev), req->sector);
+
+	if ((bio = req->bio) != NULL) {
+		nsect = bio_sectors(bio);
+		blk_finished_io(nsect);
+		req->bio = bio->bi_next;
+		bio->bi_next = NULL;
+		bio->bi_req = NULL;
+		bio_endio(bio, uptodate);
+		if ((bio = req->bio) != NULL) {
+			req->hard_sector += nsect;
+			req->hard_nr_sectors -= nsect;
+			req->sector = req->hard_sector;
+			req->nr_sectors = req->hard_nr_sectors;
+
+			req->current_nr_sectors = bio_sectors(bio);
+			req->hard_cur_sectors = req->current_nr_sectors;
+			if (req->nr_sectors < req->current_nr_sectors) {
+				req->nr_sectors = req->current_nr_sectors;
+				printk("end_request: buffer-list destroyed\n");
+			}
+			req->buffer = bio_data(bio);
+			return 1;
+		}
+	}
+	return 0;
+}
+
 /**
  * end_that_request_first - end I/O on one buffer.
+ * &q:        queue that finished request
  * @req:      the request being processed
  * @uptodate: 0 for I/O error
- * @name:     the name printed for an I/O error
  *
  * Description:
  *     Ends I/O on the first buffer attached to @req, and sets it up
@@ -1105,43 +1327,21 @@
  *     blk_finished_io() appropriately.
  **/
 
-int end_that_request_first (struct request *req, int uptodate, char *name)
+int end_that_request_first(request_queue_t *q, struct request *rq, int uptodate)
 {
-	struct buffer_head * bh;
-	int nsect;
+	unsigned long flags;
+	int ret;
 
-	req->errors = 0;
-	if (!uptodate)
-		printk("end_request: I/O error, dev %s (%s), sector %lu\n",
-			kdevname(req->rq_dev), name, req->sector);
+	spin_lock_irqsave(&q->queue_lock, flags);
+	ret = __end_that_request_first(rq, uptodate);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 
-	if ((bh = req->bh) != NULL) {
-		nsect = bh->b_size >> 9;
-		blk_finished_io(nsect);
-		req->bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-		bh->b_end_io(bh, uptodate);
-		if ((bh = req->bh) != NULL) {
-			req->hard_sector += nsect;
-			req->hard_nr_sectors -= nsect;
-			req->sector = req->hard_sector;
-			req->nr_sectors = req->hard_nr_sectors;
-
-			req->current_nr_sectors = bh->b_size >> 9;
-			if (req->nr_sectors < req->current_nr_sectors) {
-				req->nr_sectors = req->current_nr_sectors;
-				printk("end_request: buffer-list destroyed\n");
-			}
-			req->buffer = bh->b_data;
-			return 1;
-		}
-	}
-	return 0;
+	return ret;
 }
 
 void end_that_request_last(struct request *req)
 {
-	if (req->waiting != NULL)
+	if (req->waiting)
 		complete(req->waiting);
 
 	blkdev_release_request(req);
@@ -1166,7 +1366,6 @@
 
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
-	memset(max_sectors, 0, sizeof(max_sectors));
 
 	atomic_set(&queued_sectors, 0);
 	total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
@@ -1205,123 +1404,37 @@
 						low_queued_sectors / 2,
 						queue_nr_requests);
 
-#ifdef CONFIG_AMIGA_Z2RAM
-	z2_init();
-#endif
-#ifdef CONFIG_STRAM_SWAP
-	stram_device_init();
-#endif
-#ifdef CONFIG_BLK_DEV_RAM
-	rd_init();
-#endif
-#ifdef CONFIG_ISP16_CDI
-	isp16_init();
-#endif
+	blk_max_low_pfn = max_low_pfn;
+
 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
 	ide_init();		/* this MUST precede hd_init */
 #endif
 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
 	hd_init();
 #endif
-#ifdef CONFIG_BLK_DEV_PS2
-	ps2esdi_init();
-#endif
-#ifdef CONFIG_BLK_DEV_XD
-	xd_init();
-#endif
-#ifdef CONFIG_BLK_DEV_MFM
-	mfm_init();
-#endif
-#ifdef CONFIG_PARIDE
-	{ extern void paride_init(void); paride_init(); };
-#endif
-#ifdef CONFIG_MAC_FLOPPY
-	swim3_init();
-#endif
-#ifdef CONFIG_BLK_DEV_SWIM_IOP
-	swimiop_init();
-#endif
-#ifdef CONFIG_AMIGA_FLOPPY
-	amiga_floppy_init();
-#endif
-#ifdef CONFIG_ATARI_FLOPPY
-	atari_floppy_init();
-#endif
-#ifdef CONFIG_BLK_DEV_FD
-	floppy_init();
-#else
 #if defined(__i386__)	/* Do we even need this? */
 	outb_p(0xc, 0x3f2);
 #endif
-#endif
-#ifdef CONFIG_CDU31A
-	cdu31a_init();
-#endif
-#ifdef CONFIG_ATARI_ACSI
-	acsi_init();
-#endif
-#ifdef CONFIG_MCD
-	mcd_init();
-#endif
-#ifdef CONFIG_MCDX
-	mcdx_init();
-#endif
-#ifdef CONFIG_SBPCD
-	sbpcd_init();
-#endif
-#ifdef CONFIG_AZTCD
-	aztcd_init();
-#endif
-#ifdef CONFIG_CDU535
-	sony535_init();
-#endif
-#ifdef CONFIG_GSCD
-	gscd_init();
-#endif
-#ifdef CONFIG_CM206
-	cm206_init();
-#endif
-#ifdef CONFIG_OPTCD
-	optcd_init();
-#endif
-#ifdef CONFIG_SJCD
-	sjcd_init();
-#endif
-#ifdef CONFIG_APBLOCK
-	ap_init();
-#endif
-#ifdef CONFIG_DDV
-	ddv_init();
-#endif
-#ifdef CONFIG_MDISK
-	mdisk_init();
-#endif
-#ifdef CONFIG_DASD
-	dasd_init();
-#endif
-#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
-	tapeblock_init();
-#endif
-#ifdef CONFIG_BLK_DEV_XPRAM
-        xpram_init();
-#endif
 
-#ifdef CONFIG_SUN_JSFLASH
-	jsfd_init();
-#endif
 	return 0;
 };
 
-EXPORT_SYMBOL(io_request_lock);
 EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(__end_that_request_first);
 EXPORT_SYMBOL(end_that_request_last);
 EXPORT_SYMBOL(blk_init_queue);
 EXPORT_SYMBOL(blk_get_queue);
-EXPORT_SYMBOL(__blk_get_queue);
 EXPORT_SYMBOL(blk_cleanup_queue);
 EXPORT_SYMBOL(blk_queue_headactive);
 EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(blk_queue_bounce_limit);
 EXPORT_SYMBOL(generic_make_request);
 EXPORT_SYMBOL(blkdev_release_request);
 EXPORT_SYMBOL(generic_unplug_device);
 EXPORT_SYMBOL(queued_sectors);
+EXPORT_SYMBOL(blk_wake_queue);
+EXPORT_SYMBOL(blk_attempt_remerge);
+EXPORT_SYMBOL(blk_max_low_pfn);
+EXPORT_SYMBOL(blk_queue_max_sectors);
+EXPORT_SYMBOL(blk_queue_max_segments);
+EXPORT_SYMBOL(blk_queue_hardsect_size);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/loop.c linux/drivers/block/loop.c
--- /opt/kernel/linux-2.4.7/drivers/block/loop.c	Sat Jun 30 01:16:56 2001
+++ linux/drivers/block/loop.c	Wed Jan  1 00:07:23 1997
@@ -75,8 +75,8 @@
 #define MAJOR_NR LOOP_MAJOR
 
 static int max_loop = 8;
-static struct loop_device *loop_dev;
-static int *loop_sizes;
+static struct loop_device *loop_dev, **loop_lookup;
+static sector_t *loop_sizes;
 static int *loop_blksizes;
 static devfs_handle_t devfs_handle;      /*  For the directory */
 
@@ -86,10 +86,12 @@
 static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
 			 char *loop_buf, int size, int real_block)
 {
-	if (cmd == READ)
-		memcpy(loop_buf, raw_buf, size);
-	else
-		memcpy(raw_buf, loop_buf, size);
+	if (raw_buf != loop_buf) {
+		if (cmd == READ)
+			memcpy(loop_buf, raw_buf, size);
+		else
+			memcpy(raw_buf, loop_buf, size);
+	}
 
 	return 0;
 }
@@ -117,6 +119,7 @@
 
 static int none_status(struct loop_device *lo, struct loop_info *info)
 {
+	lo->lo_flags |= LO_FLAGS_BH_REMAP;
 	return 0;
 }
 
@@ -164,8 +167,7 @@
 					lo->lo_device);
 }
 
-static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		   loff_t pos)
+static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
 	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
@@ -178,8 +180,8 @@
 
 	index = pos >> PAGE_CACHE_SHIFT;
 	offset = pos & (PAGE_CACHE_SIZE - 1);
-	len = bh->b_size;
-	data = bh->b_data;
+	len = bio_size(bio);
+	data = bio_data(bio);
 	while (len > 0) {
 		int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
 		size = PAGE_CACHE_SIZE - offset;
@@ -251,18 +253,17 @@
 	return size;
 }
 
-static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		      loff_t pos)
+static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
 	struct lo_read_data cookie;
 	read_descriptor_t desc;
 	struct file *file;
 
 	cookie.lo = lo;
-	cookie.data = bh->b_data;
+	cookie.data = bio_data(bio);
 	cookie.bsize = bsize;
 	desc.written = 0;
-	desc.count = bh->b_size;
+	desc.count = bio_size(bio);
 	desc.buf = (char*)&cookie;
 	desc.error = 0;
 	spin_lock_irq(&lo->lo_lock);
@@ -298,42 +299,46 @@
 	return IV;
 }
 
-static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
+static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 {
 	loff_t pos;
 	int ret;
 
-	pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
+	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 
-	if (rw == WRITE)
-		ret = lo_send(lo, bh, loop_get_bs(lo), pos);
+	if (bio->bi_flags & BIO_WRITE)
+		ret = lo_send(lo, bio, loop_get_bs(lo), pos);
 	else
-		ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
+		ret = lo_receive(lo, bio, loop_get_bs(lo), pos);
 
 	return ret;
 }
 
-static void loop_put_buffer(struct buffer_head *bh)
+static void loop_end_io_transfer(struct bio *);
+static void loop_put_buffer(struct bio *bio)
 {
-	if (bh) {
-		__free_page(bh->b_page);
-		kmem_cache_free(bh_cachep, bh);
+	/*
+	 * check bi_end_io, may just be a remapped bio
+	 */
+	if (bio && bio->bi_end_io == loop_end_io_transfer) {
+		__free_page(bio_page(bio));
+		bio_put(bio);
 	}
 }
 
 /*
- * Add buffer_head to back of pending list
+ * Add bio to back of pending list
  */
-static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
+static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&lo->lo_lock, flags);
-	if (lo->lo_bhtail) {
-		lo->lo_bhtail->b_reqnext = bh;
-		lo->lo_bhtail = bh;
+	if (lo->lo_biotail) {
+		lo->lo_biotail->bi_next = bio;
+		lo->lo_biotail = bio;
 	} else
-		lo->lo_bh = lo->lo_bhtail = bh;
+		lo->lo_bio = lo->lo_biotail = bio;
 	spin_unlock_irqrestore(&lo->lo_lock, flags);
 
 	up(&lo->lo_bh_mutex);
@@ -342,65 +347,56 @@
 /*
  * Grab first pending buffer
  */
-static struct buffer_head *loop_get_bh(struct loop_device *lo)
+static struct bio *loop_get_bio(struct loop_device *lo)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 
 	spin_lock_irq(&lo->lo_lock);
-	if ((bh = lo->lo_bh)) {
-		if (bh == lo->lo_bhtail)
-			lo->lo_bhtail = NULL;
-		lo->lo_bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
+	if ((bio = lo->lo_bio)) {
+		if (bio == lo->lo_biotail)
+			lo->lo_biotail = NULL;
+		lo->lo_bio = bio->bi_next;
+		bio->bi_next = NULL;
 	}
 	spin_unlock_irq(&lo->lo_lock);
 
-	return bh;
+	return bio;
 }
 
 /*
- * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
- * and lo->transfer stuff has already been done. if not, it was a READ
- * so queue it for the loop thread and let it do the transfer out of
- * b_end_io context (we don't want to do decrypt of a page with irqs
+ * if this was a WRITE lo->transfer stuff has already been done. for READs,
+ * queue it for the loop thread and let it do the transfer out of
+ * bi_end_io context (we don't want to do decrypt of a page with irqs
  * disabled)
  */
-static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
+static void loop_end_io_transfer(struct bio *bio)
 {
-	struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
+	struct loop_device *lo = loop_lookup[MINOR(bio->bi_dev)];
 
-	if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
-		struct buffer_head *rbh = bh->b_private;
+	if (bio->bi_flags & (BIO_UPTODATE | BIO_WRITE)) {
+		struct bio *rbh = bio->bi_private;
 
-		rbh->b_end_io(rbh, uptodate);
+		bio_endio(rbh, bio->bi_flags & BIO_UPTODATE);
 		if (atomic_dec_and_test(&lo->lo_pending))
 			up(&lo->lo_bh_mutex);
-		loop_put_buffer(bh);
+		loop_put_buffer(bio);
 	} else
-		loop_add_bh(lo, bh);
+		loop_add_bio(lo, bio);
 }
 
-static struct buffer_head *loop_get_buffer(struct loop_device *lo,
-					   struct buffer_head *rbh)
+static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 
-	do {
-		bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
-		if (bh)
-			break;
+	/*
+	 * for xfer_funcs that can operate on the same bh, do that
+	 */
+	if (lo->lo_flags & LO_FLAGS_BH_REMAP) {
+		bio = rbh;
+		goto out_bh;
+	}
 
-		run_task_queue(&tq_disk);
-		schedule_timeout(HZ);
-	} while (1);
-	memset(bh, 0, sizeof(*bh));
-
-	bh->b_size = rbh->b_size;
-	bh->b_dev = rbh->b_rdev;
-	spin_lock_irq(&lo->lo_lock);
-	bh->b_rdev = lo->lo_device;
-	spin_unlock_irq(&lo->lo_lock);
-	bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
+	bio = bio_alloc(GFP_NOIO);
 
 	/*
 	 * easy way out, although it does waste some memory for < PAGE_SIZE
@@ -408,66 +404,61 @@
 	 * so can we :-)
 	 */
 	do {
-		bh->b_page = alloc_page(GFP_NOIO);
-		if (bh->b_page)
+		bio->bi_io_vec.bv_page = alloc_page(GFP_NOIO);
+		if (bio->bi_io_vec.bv_page)
 			break;
 
 		run_task_queue(&tq_disk);
 		schedule_timeout(HZ);
 	} while (1);
 
-	bh->b_data = page_address(bh->b_page);
-	bh->b_end_io = loop_end_io_transfer;
-	bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
-	init_waitqueue_head(&bh->b_wait);
+	bio->bi_io_vec.bv_len = bio_size(rbh);
+	bio->bi_io_vec.bv_offset = bio_offset(rbh);
+
+	bio->bi_end_io = loop_end_io_transfer;
+	bio->bi_private = rbh;
+
+out_bh:
+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
+	bio->bi_flags |= rbh->bi_flags & BIO_RW_MASK;
+	spin_lock_irq(&lo->lo_lock);
+	bio->bi_dev = lo->lo_device;
+	spin_unlock_irq(&lo->lo_lock);
 
-	return bh;
+	return bio;
 }
 
-static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
+static int loop_make_request(request_queue_t *q, struct bio *rbh)
 {
-	struct buffer_head *bh = NULL;
+	struct bio *bh = NULL;
 	struct loop_device *lo;
 	unsigned long IV;
 
-	if (!buffer_locked(rbh))
-		BUG();
-
-	if (MINOR(rbh->b_rdev) >= max_loop)
+	if (MINOR(rbh->bi_dev) >= max_loop)
 		goto out;
 
-	lo = &loop_dev[MINOR(rbh->b_rdev)];
+	lo = &loop_dev[MINOR(rbh->bi_dev)];
 	spin_lock_irq(&lo->lo_lock);
 	if (lo->lo_state != Lo_bound)
 		goto inactive;
 	atomic_inc(&lo->lo_pending);
 	spin_unlock_irq(&lo->lo_lock);
 
-	if (rw == WRITE) {
+	if (rbh->bi_flags & BIO_WRITE) {
 		if (lo->lo_flags & LO_FLAGS_READ_ONLY)
 			goto err;
-	} else if (rw == READA) {
-		rw = READ;
-	} else if (rw != READ) {
-		printk(KERN_ERR "loop: unknown command (%d)\n", rw);
+	} else if (!(rbh->bi_flags & BIO_READ)) {
+		printk(KERN_ERR "loop: unknown command (%lx)\n", rbh->bi_flags);
 		goto err;
 	}
 
-#if CONFIG_HIGHMEM
-	rbh = create_bounce(rw, rbh);
-#endif
+	rbh = blk_queue_bounce(q, rbh);
 
 	/*
 	 * file backed, queue for loop_thread to handle
 	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		/*
-		 * rbh locked at this point, noone else should clear
-		 * the dirty flag
-		 */
-		if (rw == WRITE)
-			set_bit(BH_Dirty, &rbh->b_state);
-		loop_add_bh(lo, rbh);
+		loop_add_bio(lo, rbh);
 		return 0;
 	}
 
@@ -475,16 +466,14 @@
 	 * piggy old buffer on original, and submit for I/O
 	 */
 	bh = loop_get_buffer(lo, rbh);
-	bh->b_private = rbh;
-	IV = loop_get_iv(lo, bh->b_rsector);
-	if (rw == WRITE) {
-		set_bit(BH_Dirty, &bh->b_state);
-		if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data,
-				   bh->b_size, IV))
+	IV = loop_get_iv(lo, rbh->bi_sector);
+	if (rbh->bi_flags & BIO_WRITE) {
+		if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh),
+				   bio_size(bh), IV))
 			goto err;
 	}
 
-	generic_make_request(rw, bh);
+	generic_make_request(bh);
 	return 0;
 
 err:
@@ -492,14 +481,14 @@
 		up(&lo->lo_bh_mutex);
 	loop_put_buffer(bh);
 out:
-	buffer_IO_error(rbh);
+	bio_io_error(rbh);
 	return 0;
 inactive:
 	spin_unlock_irq(&lo->lo_lock);
 	goto out;
 }
 
-static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
+static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 {
 	int ret;
 
@@ -507,19 +496,17 @@
 	 * For block backed loop, we know this is a READ
 	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
-
-		ret = do_bh_filebacked(lo, bh, rw);
-		bh->b_end_io(bh, !ret);
+		ret = do_bio_filebacked(lo, bio);
+		bio_endio(bio, !ret);
 	} else {
-		struct buffer_head *rbh = bh->b_private;
-		unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
+		struct bio *rbh = bio->bi_private;
+		unsigned long IV = loop_get_iv(lo, rbh->bi_sector);
 
-		ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
-				     bh->b_size, IV);
+		ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh),
+				     bio_size(bio), IV);
 
-		rbh->b_end_io(rbh, !ret);
-		loop_put_buffer(bh);
+		bio_endio(rbh, !ret);
+		loop_put_buffer(bio);
 	}
 }
 
@@ -532,7 +519,7 @@
 static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
-	struct buffer_head *bh;
+	struct bio *bio;
 
 	daemonize();
 	exit_files(current);
@@ -566,12 +553,12 @@
 		if (!atomic_read(&lo->lo_pending))
 			break;
 
-		bh = loop_get_bh(lo);
-		if (!bh) {
-			printk("loop: missing bh\n");
+		bio = loop_get_bio(lo);
+		if (!bio) {
+			printk("loop: missing bio\n");
 			continue;
 		}
-		loop_handle_bh(lo, bh);
+		loop_handle_bio(lo, bio);
 
 		/*
 		 * upped both for pending work and tear-down, lo_pending
@@ -600,7 +587,7 @@
 	error = -EBUSY;
 	if (lo->lo_state != Lo_unbound)
 		goto out;
-	 
+
 	error = -EBADF;
 	file = fget(arg);
 	if (!file)
@@ -620,7 +607,6 @@
 		 * If we can't read - sorry. If we only can't write - well,
 		 * it's going to be read-only.
 		 */
-		error = -EINVAL;
 		if (!aops->readpage)
 			goto out_putf;
 
@@ -649,6 +635,7 @@
 	figure_loop_size(lo);
 	lo->old_gfp_mask = inode->i_mapping->gfp_mask;
 	inode->i_mapping->gfp_mask = GFP_NOIO;
+	loop_lookup[MINOR(lo_device)] = lo;
 
 	bs = 0;
 	if (blksize_size[MAJOR(lo_device)])
@@ -658,7 +645,7 @@
 
 	set_blocksize(dev, bs);
 
-	lo->lo_bh = lo->lo_bhtail = NULL;
+	lo->lo_bio = lo->lo_biotail = NULL;
 	kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 	down(&lo->lo_sem);
 
@@ -852,7 +839,7 @@
 			err = -EINVAL;
 			break;
 		}
-		err = put_user(loop_sizes[lo->lo_number] << 1, (long *) arg);
+		err = put_user(loop_sizes[lo->lo_number] << 1, (sector_t *)arg);
 		break;
 	default:
 		err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
@@ -983,13 +970,17 @@
 	if (!loop_dev)
 		return -ENOMEM;
 
-	loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
+	loop_lookup = kmalloc(max_loop*sizeof(struct loop_device *),GFP_KERNEL);
+	if (!loop_lookup)
+		goto out_mem;
+
+	loop_sizes = kmalloc(max_loop * sizeof(sector_t), GFP_KERNEL);
 	if (!loop_sizes)
-		goto out_sizes;
+		goto out_mem;
 
 	loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
 	if (!loop_blksizes)
-		goto out_blksizes;
+		goto out_mem;
 
 	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
 
@@ -1003,8 +994,9 @@
 		spin_lock_init(&lo->lo_lock);
 	}
 
-	memset(loop_sizes, 0, max_loop * sizeof(int));
+	memset(loop_sizes, 0, max_loop * sizeof(sector_t));
 	memset(loop_blksizes, 0, max_loop * sizeof(int));
+	memset(loop_lookup, 0, max_loop * sizeof(struct loop_device *));
 	blk_size[MAJOR_NR] = loop_sizes;
 	blksize_size[MAJOR_NR] = loop_blksizes;
 	for (i = 0; i < max_loop; i++)
@@ -1013,9 +1005,9 @@
 	printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
 	return 0;
 
-out_sizes:
+out_mem:
 	kfree(loop_dev);
-out_blksizes:
+	kfree(loop_lookup);
 	kfree(loop_sizes);
 	printk(KERN_ERR "loop: ran out of memory\n");
 	return -ENOMEM;
@@ -1028,6 +1020,7 @@
 		printk(KERN_WARNING "loop: cannot unregister blkdev\n");
 
 	kfree(loop_dev);
+	kfree(loop_lookup);
 	kfree(loop_sizes);
 	kfree(loop_blksizes);
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/nbd.c linux/drivers/block/nbd.c
--- /opt/kernel/linux-2.4.7/drivers/block/nbd.c	Sat Jun 30 01:15:41 2001
+++ linux/drivers/block/nbd.c	Wed Jan  1 00:07:23 1997
@@ -56,7 +56,7 @@
 
 static int nbd_blksizes[MAX_NBD];
 static int nbd_blksize_bits[MAX_NBD];
-static int nbd_sizes[MAX_NBD];
+static sector_t nbd_sizes[MAX_NBD];
 static u64 nbd_bytesizes[MAX_NBD];
 
 static struct nbd_device nbd_dev[MAX_NBD];
@@ -166,14 +166,14 @@
 		FAIL("Sendmsg failed for control.");
 
 	if (req->cmd == WRITE) {
-		struct buffer_head *bh = req->bh;
+		struct bio *bio = req->bio;
 		DEBUG("data, ");
 		do {
-			result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE);
+			result = nbd_xmit(1, sock, bio_data(bio), bio_size(bio), bio->bi_next == NULL ? 0 : MSG_MORE);
 			if (result <= 0)
 				FAIL("Send data failed.");
-			bh = bh->b_reqnext;
-		} while(bh);
+			bio = bio->bi_next;
+		} while(bio);
 	}
 	return;
 
@@ -206,14 +206,14 @@
 	if (ntohl(reply.error))
 		FAIL("Other side returned error.");
 	if (req->cmd == READ) {
-		struct buffer_head *bh = req->bh;
+		struct bio *bio = req->bio;
 		DEBUG("data, ");
 		do {
-			result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL);
+			result = nbd_xmit(0, lo->sock, bio_data(bio), bio_size(bio), MSG_WAITALL);
 			if (result <= 0)
 				HARDFAIL("Recv data failed.");
-			bh = bh->b_reqnext;
-		} while(bh);
+			bio = bio->bi_next;
+		} while(bio);
 	}
 	DEBUG("done.\n");
 	return req;
@@ -251,7 +251,7 @@
 			goto out;
 		}
 #endif
-		list_del(&req->queue);
+		blkdev_dequeue_request(req);
 		up (&lo->queue_lock);
 		
 		nbd_end_request(req);
@@ -286,7 +286,7 @@
 		}
 #endif
 		req->errors++;
-		list_del(&req->queue);
+		blkdev_dequeue_request(req);
 		up(&lo->queue_lock);
 
 		nbd_end_request(req);
@@ -334,22 +334,22 @@
 #endif
 		req->errors = 0;
 		blkdev_dequeue_request(req);
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&q->queue_lock);
 
 		down (&lo->queue_lock);
-		list_add(&req->queue, &lo->queue_head);
+		list_add(&req->queuelist, &lo->queue_head);
 		nbd_send_req(lo->sock, req);	/* Why does this block?         */
 		up (&lo->queue_lock);
 
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&q->queue_lock);
 		continue;
 
 	      error_out:
 		req->errors++;
 		blkdev_dequeue_request(req);
-		spin_unlock(&io_request_lock);
+		spin_unlock(&q->queue_lock);
 		nbd_end_request(req);
-		spin_lock(&io_request_lock);
+		spin_lock(&q->queue_lock);
 	}
 	return;
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c
--- /opt/kernel/linux-2.4.7/drivers/block/paride/pd.c	Sat Apr 28 20:27:53 2001
+++ linux/drivers/block/paride/pd.c	Tue Jul 24 15:04:44 2001
@@ -329,7 +329,6 @@
 static int pd_cmd;			/* current command READ/WRITE */
 static int pd_unit;			/* unit of current request */
 static int pd_dev;			/* minor of current request */
-static int pd_poffs;			/* partition offset of current minor */
 static char * pd_buf;                   /* buffer for request in progress */
 
 static DECLARE_WAIT_QUEUE_HEAD(pd_wait_open);
@@ -455,8 +454,7 @@
         
 	pd_gendisk.major = major;
 	pd_gendisk.major_name = name;
-	pd_gendisk.next = gendisk_head;
-	gendisk_head = &pd_gendisk;
+	add_gendisk(&pd_gendisk);
 
 	for(i=0;i<PD_DEVS;i++) pd_blocksizes[i] = 1024;
 	blksize_size[MAJOR_NR] = pd_blocksizes;
@@ -498,47 +496,42 @@
 
 static int pd_ioctl(struct inode *inode,struct file *file,
                     unsigned int cmd, unsigned long arg)
+{
+	struct hd_geometry *geo = (struct hd_geometry *) arg;
+	int err, unit;
 
-{       struct hd_geometry *geo = (struct hd_geometry *) arg;
-        int dev, err, unit;
-
-        if ((!inode) || (!inode->i_rdev)) return -EINVAL;
-        dev = MINOR(inode->i_rdev);
+	if (!inode || !inode->i_rdev)
+		return -EINVAL;
 	unit = DEVICE_NR(inode->i_rdev);
-        if (dev >= PD_DEVS) return -EINVAL;
-	if (!PD.present) return -ENODEV;
+	if (!PD.present)
+		return -ENODEV;
 
-        switch (cmd) {
+	switch (cmd) {
 	    case CDROMEJECT:
 		if (PD.access == 1) pd_eject(unit);
 		return 0;
-            case HDIO_GETGEO:
-                if (!geo) return -EINVAL;
-                err = verify_area(VERIFY_WRITE,geo,sizeof(*geo));
-                if (err) return err;
+	    case HDIO_GETGEO:
+		if (!geo) return -EINVAL;
+		err = verify_area(VERIFY_WRITE,geo,sizeof(*geo));
+		if (err) return err;
 
 		if (PD.alt_geom) {
-                    put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), 
+		    put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), 
 		    		(short *) &geo->cylinders);
-                    put_user(PD_LOG_HEADS, (char *) &geo->heads);
-                    put_user(PD_LOG_SECTS, (char *) &geo->sectors);
+		    put_user(PD_LOG_HEADS, (char *) &geo->heads);
+		    put_user(PD_LOG_SECTS, (char *) &geo->sectors);
 		} else {
-                    put_user(PD.cylinders, (short *) &geo->cylinders);
-                    put_user(PD.heads, (char *) &geo->heads);
-                    put_user(PD.sectors, (char *) &geo->sectors);
+		    put_user(PD.cylinders, (short *) &geo->cylinders);
+		    put_user(PD.heads, (char *) &geo->heads);
+		    put_user(PD.sectors, (char *) &geo->sectors);
 		}
-                put_user(pd_hd[dev].start_sect,(long *)&geo->start);
-                return 0;
-            case BLKGETSIZE:
-                if (!arg) return -EINVAL;
-                err = verify_area(VERIFY_WRITE,(long *) arg,sizeof(long));
-                if (err) return (err);
-                put_user(pd_hd[dev].nr_sects,(long *) arg);
-                return (0);
-            case BLKRRPART:
+		put_user(get_start_sect(inode->i_rdev), (long *)&geo->start);
+		return 0;
+	    case BLKRRPART:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
-                return pd_revalidate(inode->i_rdev);
+		return pd_revalidate(inode->i_rdev);
+	    case BLKGETSIZE:
 	    case BLKROSET:
 	    case BLKROGET:
 	    case BLKRASET:
@@ -546,9 +539,9 @@
 	    case BLKFLSBUF:
 	    case BLKPG:
 		return blk_ioctl(inode->i_rdev, cmd, arg);
-            default:
-                return -EINVAL;
-        }
+	    default:
+		return -EINVAL;
+	}
 }
 
 static int pd_release (struct inode *inode, struct file *file)
@@ -586,36 +579,32 @@
 }
 
 static int pd_revalidate(kdev_t dev)
+{
+	int unit, res;
+	long flags;
 
-{       int p, unit, minor;
-        long flags;
-
-        unit = DEVICE_NR(dev);
-        if ((unit >= PD_UNITS) || (!PD.present)) return -ENODEV;
-
-        save_flags(flags);
-        cli(); 
-        if (PD.access > 1) {
-                restore_flags(flags);
-                return -EBUSY;
-        }
-        pd_valid = 0;
-        restore_flags(flags);   
+	unit = DEVICE_NR(dev);
+	if ((unit >= PD_UNITS) || !PD.present)
+		return -ENODEV;
 
-        for (p=(PD_PARTNS-1);p>=0;p--) {
-		minor = p + unit*PD_PARTNS;
-                invalidate_device(MKDEV(MAJOR_NR, minor), 1);
-                pd_hd[minor].start_sect = 0;
-                pd_hd[minor].nr_sects = 0;
-        }
+	save_flags(flags);
+	cli(); 
+	if (PD.access > 1) {
+		restore_flags(flags);
+		return -EBUSY;
+	}
+	pd_valid = 0;
+	restore_flags(flags);   
 
-	if (pd_identify(unit))
-		grok_partitions(&pd_gendisk,unit,1<<PD_BITS,PD.capacity);
+	res = wipe_partitions(dev);
 
-        pd_valid = 1;
-        wake_up(&pd_wait_open);
+	if (res == 0 && pd_identify(unit))
+		grok_partitions(dev, PD.capacity);
 
-        return 0;
+	pd_valid = 1;
+	wake_up(&pd_wait_open);
+ 
+        return res;
 }
 
 #ifdef MODULE
@@ -637,19 +626,16 @@
 }
 
 void    cleanup_module(void)
-
-{       struct gendisk **gdp;
+{
 	int unit;
 
-        devfs_unregister_blkdev(MAJOR_NR,name);
-        for(gdp=&gendisk_head;*gdp;gdp=&((*gdp)->next))
-                if (*gdp == &pd_gendisk) break;
-        if (*gdp) *gdp = (*gdp)->next;
+	devfs_unregister_blkdev(MAJOR_NR, name);
+	del_gendisk(&pd_gendisk);
 
-	for (unit=0;unit<PD_UNITS;unit++) 
-	   if (PD.present) pi_release(PI);
+	for (unit=0; unit<PD_UNITS; unit++) 
+		if (PD.present)
+			pi_release(PI);
 }
-
 #endif
 
 #define	WR(c,r,v)	pi_write_regr(PI,c,r,v)
@@ -933,8 +919,6 @@
         }
 
 	pd_cmd = CURRENT->cmd;
-	pd_poffs = pd_hd[pd_dev].start_sect;
-        pd_block += pd_poffs;
         pd_buf = CURRENT->buffer;
         pd_retries = 0;
 
@@ -963,7 +947,7 @@
 	    (CURRENT->cmd != pd_cmd) ||
 	    (MINOR(CURRENT->rq_dev) != pd_dev) ||
 	    (CURRENT->rq_status == RQ_INACTIVE) ||
-	    (CURRENT->sector+pd_poffs != pd_block)) 
+	    (CURRENT->sector != pd_block)) 
 		printk("%s: OUCH: request list changed unexpectedly\n",
 			PD.name);
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c
--- /opt/kernel/linux-2.4.7/drivers/block/paride/pf.c	Sun Feb  4 19:05:29 2001
+++ linux/drivers/block/paride/pf.c	Tue Jul 24 15:04:44 2001
@@ -463,7 +463,7 @@
 		if (PF.access == 1) {
 			pf_eject(unit);
 			return 0;
-			}
+		}
 	    case HDIO_GETGEO:
                 if (!geo) return -EINVAL;
                 err = verify_area(VERIFY_WRITE,geo,sizeof(*geo));
@@ -483,10 +483,7 @@
                 return 0;
             case BLKGETSIZE:
                 if (!arg) return -EINVAL;
-                err = verify_area(VERIFY_WRITE,(long *) arg,sizeof(long));
-                if (err) return (err);
-                put_user(PF.capacity,(long *) arg);
-                return (0);
+                return put_user(PF.capacity,(long *) arg);
 	    case BLKROSET:
 	    case BLKROGET:
 	    case BLKRASET:
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/ps2esdi.c linux/drivers/block/ps2esdi.c
--- /opt/kernel/linux-2.4.7/drivers/block/ps2esdi.c	Wed Jul 11 01:18:51 2001
+++ linux/drivers/block/ps2esdi.c	Tue Jul 24 15:04:44 2001
@@ -65,6 +65,7 @@
 #define TYPE_0_CMD_BLK_LENGTH 2
 #define TYPE_1_CMD_BLK_LENGTH 4
 
+#define PS2ESDI_LOCK (&((BLK_DEFAULT_QUEUE(MAJOR_NR))->queue_lock))
 
 static void reset_ctrl(void);
 
@@ -115,9 +116,8 @@
 static int no_int_yet;
 static int access_count[MAX_HD];
 static char ps2esdi_valid[MAX_HD];
-static int ps2esdi_sizes[MAX_HD << 6];
+static sector_t ps2esdi_sizes[MAX_HD << 6];
 static int ps2esdi_blocksizes[MAX_HD << 6];
-static int ps2esdi_maxsect[MAX_HD << 6];
 static int ps2esdi_drives;
 static struct hd_struct ps2esdi[MAX_HD << 6];
 static u_short io_base;
@@ -183,9 +183,9 @@
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
 	read_ahead[MAJOR_NR] = 8;	/* 8 sector (4kB) read ahead */
 
-	/* some minor housekeeping - setup the global gendisk structure */
-	ps2esdi_gendisk.next = gendisk_head;
-	gendisk_head = &ps2esdi_gendisk;
+	/* setup the global gendisk structure */
+	add_gendisk(&ps2esdi_gendisk);
+
 	ps2esdi_geninit();
 	return 0;
 }				/* ps2esdi_init */
@@ -221,18 +221,18 @@
 }
 
 void
-cleanup_module(void)
-{
-	if(ps2esdi_slot)
-	{
+cleanup_module(void) {
+	if(ps2esdi_slot) {
 		mca_mark_as_unused(ps2esdi_slot);
 		mca_set_adapter_procfn(ps2esdi_slot, NULL, NULL);
 	}
 	release_region(io_base, 4);
 	free_dma(dma_arb_level);
-  	free_irq(PS2ESDI_IRQ, NULL)
+  	free_irq(PS2ESDI_IRQ, NULL);
 	devfs_unregister_blkdev(MAJOR_NR, "ed");
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
+	del_gendisk(&ps2esdi_gendisk);
+	blk_clear(MAJOR_NR);
 }
 #endif /* MODULE */
 
@@ -415,16 +415,13 @@
 
 	ps2esdi_gendisk.nr_real = ps2esdi_drives;
 
-	/* 128 was old default, maybe maxsect=255 is ok too? - Paul G. */
-	for (i = 0; i < (MAX_HD << 6); i++) {
-		ps2esdi_maxsect[i] = 128;
+	for (i = 0; i < (MAX_HD << 6); i++)
 		ps2esdi_blocksizes[i] = 1024;
-	}
 
 	request_dma(dma_arb_level, "ed");
 	request_region(io_base, 4, "ed");
 	blksize_size[MAJOR_NR] = ps2esdi_blocksizes;
-	max_sectors[MAJOR_NR] = ps2esdi_maxsect;
+	blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 128);
 
 	for (i = 0; i < ps2esdi_drives; i++) {
 		register_disk(&ps2esdi_gendisk,MKDEV(MAJOR_NR,i<<6),1<<6,
@@ -482,7 +479,7 @@
 
 	if (virt_to_bus(CURRENT->buffer + CURRENT->current_nr_sectors * 512) > 16 * MB) {
 		printk("%s: DMA above 16MB not supported\n", DEVICE_NAME);
-		end_request(FAIL);
+		__end_request(CURRENT, FAIL);
 	}			/* check for above 16Mb dmas */
 	else if ((CURRENT_DEV < ps2esdi_drives) &&
 	    (CURRENT->sector + CURRENT->current_nr_sectors <=
@@ -495,13 +492,9 @@
 		       CURRENT->current_nr_sectors);
 #endif
 
-
-		block = CURRENT->sector + ps2esdi[MINOR(CURRENT->rq_dev)].start_sect;
-
-#if 0
-		printk("%s: blocknumber : %d\n", DEVICE_NAME, block);
-#endif
+		block = CURRENT->sector;
 		count = CURRENT->current_nr_sectors;
+
 		switch (CURRENT->cmd) {
 		case READ:
 			ps2esdi_readwrite(READ, CURRENT_DEV, block, count);
@@ -511,7 +504,7 @@
 			break;
 		default:
 			printk("%s: Unknown command\n", DEVICE_NAME);
-			end_request(FAIL);
+			__end_request(CURRENT, FAIL);
 			break;
 		}		/* handle different commands */
 	}
@@ -519,7 +512,7 @@
 	else {
 		printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives,
 		       CURRENT->sector, ps2esdi[MINOR(CURRENT->rq_dev)].nr_sects);
-		end_request(FAIL);
+		__end_request(CURRENT, FAIL);
 	}
 
 }				/* main strategy routine */
@@ -584,7 +577,7 @@
 	if (ps2esdi_out_cmd_blk(cmd_blk)) {
 		printk("%s: Controller failed\n", DEVICE_NAME);
 		if ((++CURRENT->errors) >= MAX_RETRIES)
-			end_request(FAIL);
+			__end_request(CURRENT, FAIL);
 	}
 	/* check for failure to put out the command block */ 
 	else {
@@ -958,10 +951,10 @@
 		break;
 	}
 	if(ending != -1) {
-		spin_lock_irqsave(&io_request_lock, flags);
-		end_request(ending);
+		spin_lock_irqsave(PS2ESDI_LOCK, flags);
+		__end_request(CURRENT, ending);
 		do_ps2esdi_request(BLK_DEFAULT_QUEUE(MAJOR_NR));
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(PS2ESDI_LOCK, flags);
 	}
 }				/* handle interrupts */
 
@@ -1100,20 +1093,10 @@
 				put_user(ps2esdi_info[dev].head, (char *) &geometry->heads);
 				put_user(ps2esdi_info[dev].sect, (char *) &geometry->sectors);
 				put_user(ps2esdi_info[dev].cyl, (short *) &geometry->cylinders);
-				put_user(ps2esdi[MINOR(inode->i_rdev)].start_sect,
+				put_user(get_start_sect(inode->i_rdev),
 					    (long *) &geometry->start);
 
-				return (0);
-			}
-			break;
-
-		case BLKGETSIZE:
-			if (arg) {
-				if ((err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long))))
-					 return (err);
-				put_user(ps2esdi[MINOR(inode->i_rdev)].nr_sects, (long *) arg);
-
-				return (0);
+				return 0;
 			}
 			break;
 
@@ -1122,6 +1105,7 @@
 				return -EACCES;
 			return (ps2esdi_reread_partitions(inode->i_rdev));
 
+		case BLKGETSIZE:
 		case BLKROSET:
 		case BLKROGET:
 		case BLKRASET:
@@ -1138,8 +1122,7 @@
 static int ps2esdi_reread_partitions(kdev_t dev)
 {
 	int target = DEVICE_NR(dev);
-	int start = target << ps2esdi_gendisk.minor_shift;
-	int partition;
+	int res;
 
 	cli();
 	ps2esdi_valid[target] = (access_count[target] != 1);
@@ -1147,21 +1130,16 @@
 	if (ps2esdi_valid[target])
 		return (-EBUSY);
 
-	for (partition = ps2esdi_gendisk.max_p - 1;
-	     partition >= 0; partition--) {
-		int minor = (start | partition);
-		invalidate_device(MKDEV(MAJOR_NR, minor), 1);
-		ps2esdi_gendisk.part[minor].start_sect = 0;
-		ps2esdi_gendisk.part[minor].nr_sects = 0;
-	}
-
-	grok_partitions(&ps2esdi_gendisk, target, 1<<6, 
-		ps2esdi_info[target].head * ps2esdi_info[target].cyl * ps2esdi_info[target].sect);
-
+	res = wipe_partitions(dev);
+	if (res == 0)
+		grok_partitions(dev, ps2esdi_info[target].head
+				* ps2esdi_info[target].cyl
+				* ps2esdi_info[target].sect);
+ 
 	ps2esdi_valid[target] = 1;
 	wake_up(&ps2esdi_wait_open);
 
-	return (0);
+	return (res);
 }
 
 static void ps2esdi_reset_timer(unsigned long unused)
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/rd.c linux/drivers/block/rd.c
--- /opt/kernel/linux-2.4.7/drivers/block/rd.c	Mon Jul 16 01:15:44 2001
+++ linux/drivers/block/rd.c	Wed Jan  1 00:07:23 1997
@@ -98,7 +98,7 @@
 static unsigned long rd_length[NUM_RAMDISKS];	/* Size of RAM disks in bytes   */
 static int rd_hardsec[NUM_RAMDISKS];		/* Size of real blocks in bytes */
 static int rd_blocksizes[NUM_RAMDISKS];		/* Size of 1024 byte blocks :)  */
-static int rd_kbsize[NUM_RAMDISKS];		/* Size in blocks of 1024 bytes */
+static sector_t rd_kbsize[NUM_RAMDISKS];	/* Size in blocks of 1024 bytes */
 static devfs_handle_t devfs_handle;
 static struct inode *rd_inode[NUM_RAMDISKS];	/* Protected device inodes */
 
@@ -194,22 +194,21 @@
  * 19-JAN-1998  Richard Gooch <rgooch@atnf.csiro.au>  Added devfs support
  *
  */
-static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh)
+static int rd_make_request(request_queue_t * q, struct bio *sbh)
 {
 	unsigned int minor;
 	unsigned long offset, len;
 	struct buffer_head *rbh;
 	char *bdata;
+	int rw = bio_rw(sbh);
 
-	
-	minor = MINOR(sbh->b_rdev);
+	minor = MINOR(sbh->bi_dev);
 
 	if (minor >= NUM_RAMDISKS)
 		goto fail;
 
-	
-	offset = sbh->b_rsector << 9;
-	len = sbh->b_size;
+	offset = sbh->bi_sector << 9;
+	len = bio_size(sbh);
 
 	if ((offset + len) > rd_length[minor])
 		goto fail;
@@ -221,25 +220,26 @@
 		goto fail;
 	}
 
-	rbh = getblk(sbh->b_rdev, sbh->b_rsector/(sbh->b_size>>9), sbh->b_size);
+	rbh = getblk(sbh->bi_dev,sbh->bi_sector/bio_sectors(sbh),bio_size(sbh));
 	/* I think that it is safe to assume that rbh is not in HighMem, though
 	 * sbh might be - NeilBrown
 	 */
-	bdata = bh_kmap(sbh);
-	if (rw == READ) {
-		if (sbh != rbh)
-			memcpy(bdata, rbh->b_data, rbh->b_size);
-	} else
-		if (sbh != rbh)
-			memcpy(rbh->b_data, bdata, rbh->b_size);
-	bh_kunmap(sbh);
+	bdata = kmap(bio_page(sbh));
+
+	if (rw == READ)
+		memcpy(bdata, rbh->b_data, rbh->b_size);
+	else
+		memcpy(rbh->b_data, bdata, rbh->b_size);
+
+	kunmap(bio_page(sbh));
+
 	mark_buffer_protected(rbh);
 	brelse(rbh);
 
-	sbh->b_end_io(sbh,1);
+	bio_endio(sbh, 1);
 	return 0;
  fail:
-	sbh->b_end_io(sbh,0);
+	bio_io_error(sbh);
 	return 0;
 } 
 
@@ -370,7 +370,8 @@
 
 	for (i = 0 ; i < NUM_RAMDISKS; i++) {
 		if (rd_inode[i]) {
-			/* withdraw invalidate_buffers() and prune_icache() immunity */
+			/* withdraw invalidate_buffers() and prune_icache()
+			   immunity */
 			atomic_dec(&rd_inode[i]->i_bdev->bd_openers);
 			/* remove stale pointer to module address space */
 			rd_inode[i]->i_bdev->bd_op = NULL;
@@ -381,9 +382,7 @@
 
 	devfs_unregister (devfs_handle);
 	unregister_blkdev( MAJOR_NR, "ramdisk" );
-	hardsect_size[MAJOR_NR] = NULL;
-	blksize_size[MAJOR_NR] = NULL;
-	blk_size[MAJOR_NR] = NULL;
+	blk_clear(MAJOR_NR);
 }
 #endif
 
@@ -428,7 +427,6 @@
 	register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &fd_fops, rd_size<<1);
 #endif
 
-	hardsect_size[MAJOR_NR] = rd_hardsec;		/* Size of the RAM disk blocks */
 	blksize_size[MAJOR_NR] = rd_blocksizes;		/* Avoid set_blocksize() check */
 	blk_size[MAJOR_NR] = rd_kbsize;			/* Size of the RAM disk in kB  */
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/xd.c linux/drivers/block/xd.c
--- /opt/kernel/linux-2.4.7/drivers/block/xd.c	Fri May 25 00:14:08 2001
+++ linux/drivers/block/xd.c	Tue Jul 24 15:04:44 2001
@@ -173,8 +173,7 @@
 	devfs_handle = devfs_mk_dir (NULL, xd_gendisk.major_name, NULL);
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
 	read_ahead[MAJOR_NR] = 8;	/* 8 sector (4kB) read ahead */
-	xd_gendisk.next = gendisk_head;
-	gendisk_head = &xd_gendisk;
+	add_gendisk(&xd_gendisk);
 	xd_geninit();
 
 	return 0;
@@ -258,7 +257,6 @@
 	}
 
 	xd_gendisk.nr_real = xd_drives;
-
 }
 
 /* xd_open: open a device */
@@ -296,7 +294,7 @@
 		if (CURRENT_DEV < xd_drives
 		    && CURRENT->sector + CURRENT->nr_sectors
 		         <= xd_struct[MINOR(CURRENT->rq_dev)].nr_sects) {
-			block = CURRENT->sector + xd_struct[MINOR(CURRENT->rq_dev)].start_sect;
+			block = CURRENT->sector;
 			count = CURRENT->nr_sectors;
 
 			switch (CURRENT->cmd) {
@@ -333,18 +331,16 @@
 			g.heads = xd_info[dev].heads;
 			g.sectors = xd_info[dev].sectors;
 			g.cylinders = xd_info[dev].cylinders;
-			g.start = xd_struct[MINOR(inode->i_rdev)].start_sect;
+			g.start = get_start_sect(inode->i_rdev);
 			return copy_to_user(geometry, &g, sizeof g) ? -EFAULT : 0;
 		}
-		case BLKGETSIZE:
-			if (!arg) return -EINVAL;
-			return put_user(xd_struct[MINOR(inode->i_rdev)].nr_sects,(long *) arg);
 		case HDIO_SET_DMA:
 			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
 			if (xdc_busy) return -EBUSY;
 			nodma = !arg;
 			if (nodma && xd_dma_buffer) {
-				xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200);
+				xd_dma_mem_free((unsigned long)xd_dma_buffer,
+						xd_maxsectors * 0x200);
 				xd_dma_buffer = 0;
 			}
 			return 0;
@@ -357,6 +353,7 @@
 				return -EACCES;
 			return xd_reread_partitions(inode->i_rdev);
 
+		case BLKGETSIZE:
 		case BLKFLSBUF:
 		case BLKROSET:
 		case BLKROGET:
@@ -387,11 +384,9 @@
 static int xd_reread_partitions(kdev_t dev)
 {
 	int target;
-	int start;
-	int partition;
+	int res;
 	
 	target = DEVICE_NR(dev);
- 	start = target << xd_gendisk.minor_shift;
 
 	cli();
 	xd_valid[target] = (xd_access[target] != 1);
@@ -399,20 +394,16 @@
 	if (xd_valid[target])
 		return -EBUSY;
 
-	for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) {
-		int minor = (start | partition);
-		invalidate_device(MKDEV(MAJOR_NR, minor), 1);
-		xd_gendisk.part[minor].start_sect = 0;
-		xd_gendisk.part[minor].nr_sects = 0;
-	};
-
-	grok_partitions(&xd_gendisk, target, 1<<6,
-			xd_info[target].heads * xd_info[target].cylinders * xd_info[target].sectors);
+	res = wipe_partitions(dev);
+	if (!res)
+		grok_partitions(dev, xd_info[target].heads
+				* xd_info[target].cylinders
+				* xd_info[target].sectors);
 
 	xd_valid[target] = 1;
 	wake_up(&xd_wait_open);
 
-	return 0;
+	return res;
 }
 
 /* xd_readwrite: handle a read/write request */
@@ -1112,18 +1103,9 @@
 
 static void xd_done (void)
 {
-	struct gendisk ** gdp;
-	
-	blksize_size[MAJOR_NR] = NULL;
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
-	blk_size[MAJOR_NR] = NULL;
-	hardsect_size[MAJOR_NR] = NULL;
-	read_ahead[MAJOR_NR] = 0;
-	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
-		if (*gdp == &xd_gendisk)
-			break;
-	if (*gdp)
-		*gdp = (*gdp)->next;
+	del_gendisk(&xd_gendisk);
+	blk_clear(MAJOR_NR);
 	release_region(xd_iobase,4);
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/cdrom.c linux/drivers/cdrom/cdrom.c
--- /opt/kernel/linux-2.4.7/drivers/cdrom/cdrom.c	Wed Jul 11 23:55:41 2001
+++ linux/drivers/cdrom/cdrom.c	Wed Jan  1 00:07:23 1997
@@ -279,6 +279,8 @@
 static int lockdoor = 1;
 /* will we ever get to use this... sigh. */
 static int check_media_type;
+/* protects various structures */
+static spinlock_t cdrom_lock = SPIN_LOCK_UNLOCKED;
 MODULE_PARM(debug, "i");
 MODULE_PARM(autoclose, "i");
 MODULE_PARM(autoeject, "i");
@@ -420,8 +422,10 @@
 				    &cdrom_fops, NULL);
 	}
 	cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name);
+	spin_lock(&cdrom_lock);
 	cdi->next = topCdromPtr; 	
 	topCdromPtr = cdi;
+	spin_unlock(&cdrom_lock);
 	return 0;
 }
 #undef ENSURE
@@ -429,13 +433,15 @@
 int unregister_cdrom(struct cdrom_device_info *unreg)
 {
 	struct cdrom_device_info *cdi, *prev;
-	int major = MAJOR(unreg->dev);
+	int major = MAJOR(unreg->dev), ret;
 
 	cdinfo(CD_OPEN, "entering unregister_cdrom\n"); 
 
+	ret = -1;
 	if (major < 0 || major >= MAX_BLKDEV)
-		return -1;
+		goto out;
 
+	spin_lock(&cdrom_lock);
 	prev = NULL;
 	cdi = topCdromPtr;
 	while (cdi != NULL && cdi->dev != unreg->dev) {
@@ -443,27 +449,34 @@
 		cdi = cdi->next;
 	}
 
+	ret = -2;
 	if (cdi == NULL)
-		return -2;
+		goto out_unlock;
+	ret = 0;
 	if (prev)
 		prev->next = cdi->next;
 	else
 		topCdromPtr = cdi->next;
 	cdi->ops->n_minors--;
-	devfs_unregister (cdi->de);
-	devfs_dealloc_unique_number (&cdrom_numspace, cdi->number);
+	devfs_unregister(cdi->de);
+	devfs_dealloc_unique_number(&cdrom_numspace, cdi->number);
 	cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
-	return 0;
+out_unlock:
+	spin_unlock(&cdrom_lock);
+out:
+	return ret;
 }
 
 struct cdrom_device_info *cdrom_find_device(kdev_t dev)
 {
 	struct cdrom_device_info *cdi;
 
+	spin_lock(&cdrom_lock);
 	cdi = topCdromPtr;
 	while (cdi != NULL && cdi->dev != dev)
 		cdi = cdi->next;
 
+	spin_unlock(&cdrom_lock);
 	return cdi;
 }
 
@@ -1926,7 +1939,11 @@
 
 	ret = cdi->ops->generic_packet(cdi, cgc);
 	__copy_to_user(usense, cgc->sense, sizeof(*usense));
-	if (!ret && cgc->data_direction == CGC_DATA_READ)
+
+	/*
+	 * copy data back regardless of package status
+	 */
+	if (cgc->data_direction == CGC_DATA_READ)
 		__copy_to_user(ubuf, cgc->buffer, cgc->buflen);
 	kfree(cgc->buffer);
 	return ret;
@@ -2502,6 +2519,7 @@
 {
 	struct cdrom_device_info *cdi;
 
+	spin_lock(&cdrom_lock);
 	for (cdi = topCdromPtr; cdi != NULL; cdi = cdi->next) {
 		if (autoclose && CDROM_CAN(CDC_CLOSE_TRAY))
 			cdi->options |= CDO_AUTO_CLOSE;
@@ -2520,6 +2538,7 @@
 		else
 			cdi->options &= ~CDO_CHECK_TYPE;
 	}
+	spin_unlock(&cdrom_lock);
 }
 
 static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/cdu31a.c linux/drivers/cdrom/cdu31a.c
--- /opt/kernel/linux-2.4.7/drivers/cdrom/cdu31a.c	Fri Feb  9 20:30:22 2001
+++ linux/drivers/cdrom/cdu31a.c	Wed Jan  1 00:07:23 1997
@@ -1675,7 +1675,7 @@
          restore_flags(flags);
          if (!QUEUE_EMPTY && CURRENT->rq_status != RQ_INACTIVE)
          {
-            end_request(0);
+            __end_request(CURRENT, 0);
          }
          restore_flags(flags);
 #if DEBUG
@@ -1694,7 +1694,10 @@
    /* Make sure we have a valid TOC. */
    sony_get_toc(); 
 
-   spin_unlock_irq(&io_request_lock);
+   /* yes lets release the lock and then much with the queue etc. I won't
+    * bother auditing this driver, it's decrepit and full of races anyway.
+    * /jens */
+   spin_unlock_irq(&q->queue_lock);
 
    /* Make sure the timer is cancelled. */
    del_timer(&cdu31a_abort_timer);
@@ -1853,7 +1856,7 @@
    }
 
 end_do_cdu31a_request:
-   spin_lock_irq(&io_request_lock);
+   spin_lock_irq(&q->queue_lock);
 #if 0
    /* After finished, cancel any pending operations. */
    abort_read();
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c
--- /opt/kernel/linux-2.4.7/drivers/cdrom/cm206.c	Fri Feb  9 20:30:22 2001
+++ linux/drivers/cdrom/cm206.c	Wed Jan  1 00:07:23 1997
@@ -823,10 +823,10 @@
       return;
     if (CURRENT->cmd != READ) {
       debug(("Non-read command %d on cdrom\n", CURRENT->cmd));
-      end_request(0);
+      __end_request(CURRENT, 0);
       continue;
     }
-    spin_unlock_irq(&io_request_lock);
+    spin_unlock_irq(&q->queue_lock);
     error=0;
     for (i=0; i<CURRENT->nr_sectors; i++) {
       int e1, e2;
@@ -849,8 +849,8 @@
 	debug(("cm206_request: %d %d\n", e1, e2));
       }
     }
-    spin_lock_irq(&io_request_lock);
-    end_request(!error);
+    spin_lock_irq(&q->queue_lock);
+    __end_request(CURRENT, !error);
   }
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/sbpcd.c linux/drivers/cdrom/sbpcd.c
--- /opt/kernel/linux-2.4.7/drivers/cdrom/sbpcd.c	Tue Jul 17 00:13:32 2001
+++ linux/drivers/cdrom/sbpcd.c	Wed Jan  1 00:07:23 1997
@@ -4882,7 +4882,7 @@
 #undef DEBUG_GTL
 static inline void sbpcd_end_request(struct request *req, int uptodate) {
 	list_add(&req->queue, &req->q->queue_head);
-	end_request(uptodate);
+	__end_request(req, uptodate);
 }
 /*==========================================================================*/
 /*
@@ -4924,7 +4924,7 @@
 		sbpcd_end_request(req, 0);
 	if (req -> sector == -1)
 		sbpcd_end_request(req, 0);
-	spin_unlock_irq(&io_request_lock);
+	spin_unlock_irq(&q->queue_lock);
 
 	down(&ioctl_read_sem);
 	if (req->cmd != READ)
@@ -4964,7 +4964,7 @@
 			xnr, req, req->sector, req->nr_sectors, jiffies);
 #endif
 		up(&ioctl_read_sem);
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&q->queue_lock);
 		sbpcd_end_request(req, 1);
 		goto request_loop;
 	}
@@ -5005,7 +5005,7 @@
 				xnr, req, req->sector, req->nr_sectors, jiffies);
 #endif
 			up(&ioctl_read_sem);
-			spin_lock_irq(&io_request_lock);
+			spin_lock_irq(&q->queue_lock);
 			sbpcd_end_request(req, 1);
 			goto request_loop;
 		}
@@ -5021,7 +5021,7 @@
 #endif
 	up(&ioctl_read_sem);
 	sbp_sleep(0);    /* wait a bit, try again */
-	spin_lock_irq(&io_request_lock);
+	spin_lock_irq(&q->queue_lock);
 	sbpcd_end_request(req, 0);
 	goto request_loop;
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/char/raw.c linux/drivers/char/raw.c
--- /opt/kernel/linux-2.4.7/drivers/char/raw.c	Thu Jun 28 02:10:55 2001
+++ linux/drivers/char/raw.c	Wed Jan  1 00:07:23 1997
@@ -134,10 +134,8 @@
 	if (is_mounted(rdev)) {
 		if (blksize_size[MAJOR(rdev)])
 			sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)];
-	} else {
-		if (hardsect_size[MAJOR(rdev)])
-			sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)];
-	}
+	} else
+		sector_size = get_hardsect_size(rdev);
 
 	set_blocksize(rdev, sector_size);
 	raw_devices[minor].sector_size = sector_size;
@@ -282,16 +280,14 @@
 	struct kiobuf * iobuf;
 	int		new_iobuf;
 	int		err = 0;
-	unsigned long	blocknr, blocks;
+	unsigned long	blocks;
 	size_t		transferred;
 	int		iosize;
-	int		i;
 	int		minor;
 	kdev_t		dev;
 	unsigned long	limit;
-
 	int		sector_size, sector_bits, sector_mask;
-	int		max_sectors;
+	sector_t	blocknr;
 	
 	/*
 	 * First, a few checks on device size limits 
@@ -316,7 +312,6 @@
 	sector_size = raw_devices[minor].sector_size;
 	sector_bits = raw_devices[minor].sector_bits;
 	sector_mask = sector_size- 1;
-	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
 	
 	if (blk_size[MAJOR(dev)])
 		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
@@ -334,18 +329,10 @@
 	if ((*offp >> sector_bits) >= limit)
 		goto out_free;
 
-	/*
-	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
-	 * unmapping the single kiobuf as we go to perform each chunk of
-	 * IO.  
-	 */
-
 	transferred = 0;
 	blocknr = *offp >> sector_bits;
 	while (size > 0) {
 		blocks = size >> sector_bits;
-		if (blocks > max_sectors)
-			blocks = max_sectors;
 		if (blocks > limit - blocknr)
 			blocks = limit - blocknr;
 		if (!blocks)
@@ -357,10 +344,7 @@
 		if (err)
 			break;
 
-		for (i=0; i < blocks; i++) 
-			iobuf->blocks[i] = blocknr++;
-		
-		err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size);
+		err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size);
 
 		if (rw == READ && err > 0)
 			mark_dirty_kiobuf(iobuf, err);
@@ -370,6 +354,8 @@
 			size -= err;
 			buf += err;
 		}
+
+		blocknr += blocks;
 
 		unmap_kiobuf(iobuf);
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/i2o/i2o_block.c linux/drivers/i2o/i2o_block.c
--- /opt/kernel/linux-2.4.7/drivers/i2o/i2o_block.c	Fri Jul 20 05:48:39 2001
+++ linux/drivers/i2o/i2o_block.c	Tue Jul 24 15:04:44 2001
@@ -111,15 +111,16 @@
 #define I2O_BSA_DSC_VOLUME_CHANGED      0x000D
 #define I2O_BSA_DSC_TIMEOUT             0x000E
 
+#define I2O_UNIT(dev)	(i2ob_dev[MINOR((dev)) & 0xf0])
+#define I2O_LOCK(unit)	(i2ob_dev[(unit)].req_queue->queue_lock)
+
 /*
  *	Some of these can be made smaller later
  */
 
 static int i2ob_blksizes[MAX_I2OB<<4];
-static int i2ob_hardsizes[MAX_I2OB<<4];
-static int i2ob_sizes[MAX_I2OB<<4];
+static sector_t i2ob_sizes[MAX_I2OB<<4];
 static int i2ob_media_change_flag[MAX_I2OB];
-static u32 i2ob_max_sectors[MAX_I2OB<<4];
 
 static int i2ob_context;
 
@@ -249,9 +250,9 @@
 	unsigned long mptr;
 	u64 offset;
 	struct request *req = ireq->req;
-	struct buffer_head *bh = req->bh;
+	struct bio *bio = req->bio;
 	int count = req->nr_sectors<<9;
-	char *last = NULL;
+	unsigned long last = 0;
 	unsigned short size = 0;
 
 	// printk(KERN_INFO "i2ob_send called\n");
@@ -280,30 +281,30 @@
 	if(req->cmd == READ)
 	{
 		__raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
-		while(bh!=NULL)
+		while(bio)
 		{
-			if(bh->b_data == last) {
-				size += bh->b_size;
-				last += bh->b_size;
-				if(bh->b_reqnext)
+			if (bio_to_bus(bio) == last) {
+				size += bio_size(bio);
+				last += bio_size(bio);
+				if(bio->bi_next)
 					__raw_writel(0x14000000|(size), mptr-8);
 				else
 					__raw_writel(0xD4000000|(size), mptr-8);
 			}
 			else
 			{
-				if(bh->b_reqnext)
-					__raw_writel(0x10000000|(bh->b_size), mptr);
+				if(bio->bi_next)
+					__raw_writel(0x10000000|bio_size(bio), mptr);
 				else
-					__raw_writel(0xD0000000|(bh->b_size), mptr);
-				__raw_writel(virt_to_bus(bh->b_data), mptr+4);
+					__raw_writel(0xD0000000|bio_size(bio), mptr);
+				__raw_writel(bio_to_bus(bio), mptr+4);
 				mptr += 8;	
-				size = bh->b_size;
-				last = bh->b_data + size;
+				size = bio_size(bio);
+				last = bio_to_bus(bio) + bio_size(bio);
 			}
 
-			count -= bh->b_size;
-			bh = bh->b_reqnext;
+			count -= bio_size(bio);
+			bio = bio->bi_next;
 		}
 		/*
 		 *	Heuristic for now since the block layer doesnt give
@@ -319,30 +320,30 @@
 	else if(req->cmd == WRITE)
 	{
 		__raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
-		while(bh!=NULL)
+		while(bio)
 		{
-			if(bh->b_data == last) {
-				size += bh->b_size;
-				last += bh->b_size;
-				if(bh->b_reqnext)
+			if (bio_to_bus(bio) == last) {
+				size += bio_size(bio);
+				last += bio_size(bio);
+				if(bio->bi_next)
 					__raw_writel(0x14000000|(size), mptr-8);
 				else
 					__raw_writel(0xD4000000|(size), mptr-8);
 			}
 			else
 			{
-				if(bh->b_reqnext)
-					__raw_writel(0x14000000|(bh->b_size), mptr);
+				if(bio->bi_next)
+					__raw_writel(0x14000000|bio_size(bio), mptr);
 				else
-					__raw_writel(0xD4000000|(bh->b_size), mptr);
-				__raw_writel(virt_to_bus(bh->b_data), mptr+4);
+					__raw_writel(0xD4000000|bio_size(bio), mptr);
+				__raw_writel(bio_to_bus(bio), mptr+4);
 				mptr += 8;	
-				size = bh->b_size;
-				last = bh->b_data + size;
+				size = bio_size(bio);
+				last = bio_to_bus(bio) + bio_size(bio);
 			}
 
-			count -= bh->b_size;
-			bh = bh->b_reqnext;
+			count -= bio_size(bio);
+			bio = bio->bi_next;
 		}
 
 		if(c->battery)
@@ -406,7 +407,8 @@
 	 * unlocked.
 	 */
 
-	while (end_that_request_first( req, !req->errors, "i2o block" ));
+	while (__end_that_request_first( req, !req->errors))
+		;
 
 	/*
 	 * It is now ok to complete the request.
@@ -414,61 +416,6 @@
 	end_that_request_last( req );
 }
 
-/*
- * Request merging functions
- */
-static inline int i2ob_new_segment(request_queue_t *q, struct request *req,
-				  int __max_segments)
-{
-	int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
-
-	if (__max_segments < max_segments)
-		max_segments = __max_segments;
-
-	if (req->nr_segments < max_segments) {
-		req->nr_segments++;
-		return 1;
-	}
-	return 0;
-}
-
-static int i2ob_back_merge(request_queue_t *q, struct request *req, 
-			     struct buffer_head *bh, int __max_segments)
-{
-	if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
-		return 1;
-	return i2ob_new_segment(q, req, __max_segments);
-}
-
-static int i2ob_front_merge(request_queue_t *q, struct request *req, 
-			      struct buffer_head *bh, int __max_segments)
-{
-	if (bh->b_data + bh->b_size == req->bh->b_data)
-		return 1;
-	return i2ob_new_segment(q, req, __max_segments);
-}
-
-static int i2ob_merge_requests(request_queue_t *q,
-				struct request *req,
-				struct request *next,
-				int __max_segments)
-{
-	int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
-	int total_segments = req->nr_segments + next->nr_segments;
-
-	if (__max_segments < max_segments)
-		max_segments = __max_segments;
-
-	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
-		total_segments--;
-    
-	if (total_segments > max_segments)
-		return 0;
-
-	req->nr_segments = total_segments;
-	return 1;
-}
-
 static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit)
 {
 	unsigned long msg;
@@ -526,10 +473,10 @@
 		ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
 		ireq->req->errors++;
 
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
 		i2ob_unhook_request(ireq, c->unit);
 		i2ob_end_request(ireq->req);
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 	
 		/* Now flush the message by making it a NOP */
 		m[0]&=0x00FFFFFF;
@@ -550,12 +497,12 @@
 
 	if(msg->function == I2O_CMD_BLOCK_CFLUSH)
 	{
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
 		dev->constipated=0;
 		DEBUG(("unconstipated\n"));
 		if(i2ob_backlog_request(c, dev)==0)
 			i2ob_request(dev->req_queue);
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 		return;
 	}
 
@@ -571,10 +518,10 @@
 		ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
 		ireq->req->errors++;
 		printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
 		i2ob_unhook_request(ireq, c->unit);
 		i2ob_end_request(ireq->req);
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 		return;
 	}	
 
@@ -620,7 +567,7 @@
 		 */
 		 
 		
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
 		if(err==4)
 		{
 			/*
@@ -665,7 +612,7 @@
 			 */
 			 
 			i2ob_request(dev->req_queue);
-			spin_unlock_irqrestore(&io_request_lock, flags);
+			spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 			
 			/*
 			 *	and out
@@ -673,7 +620,7 @@
 			 
 			return;	
 		}
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 		printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, 
 			bsa_errors[m[4]&0XFFFF]);
 		if(m[4]&0x00FF0000)
@@ -688,8 +635,8 @@
 	 *	Dequeue the request. We use irqsave locks as one day we
 	 *	may be running polled controllers from a BH...
 	 */
-	
-	spin_lock_irqsave(&io_request_lock, flags);
+
+	spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
 	i2ob_unhook_request(ireq, c->unit);
 	i2ob_end_request(ireq->req);
 	atomic_dec(&i2ob_queues[c->unit]->queue_depth);
@@ -701,7 +648,7 @@
 	if(i2ob_backlog_request(c, dev)==0)
 		i2ob_request(dev->req_queue);
 
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 }
 
 /* 
@@ -780,8 +727,7 @@
 				for(i = unit; i <= unit+15; i++)
 				{
 					i2ob_sizes[i] = 0;
-					i2ob_hardsizes[i] = 0;
-					i2ob_max_sectors[i] = 0;
+					blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
 					i2ob[i].nr_sects = 0;
 					i2ob_gendisk.part[i].nr_sects = 0;
 				}
@@ -815,11 +761,11 @@
 	  			if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
 					i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
 
-				spin_lock_irqsave(&io_request_lock, flags);	
+				spin_lock_irqsave(&I2O_LOCK(unit), flags);	
 				i2ob_sizes[unit] = (int)(size>>10);
 				i2ob_gendisk.part[unit].nr_sects = size>>9;
 				i2ob[unit].nr_sects = (int)(size>>9);
-				spin_unlock_irqrestore(&io_request_lock, flags);	
+				spin_unlock_irqrestore(&I2O_LOCK(unit), flags);	
 				break;
 			}
 
@@ -872,13 +818,14 @@
 
 static void i2ob_timer_handler(unsigned long q)
 {
+	request_queue_t *req_queue = (request_queue_t *) q;
 	unsigned long flags;
 
 	/*
 	 * We cannot touch the request queue or the timer
-         * flag without holding the io_request_lock.
+         * flag without holding the queue_lock
 	 */
-	spin_lock_irqsave(&io_request_lock,flags);
+	spin_lock_irqsave(&req_queue->queue_lock,flags);
 
 	/* 
 	 * Clear the timer started flag so that 
@@ -889,12 +836,12 @@
 	/* 
 	 * Restart any requests.
 	 */
-	i2ob_request((request_queue_t*)q);
+	i2ob_request(req_queue);
 
 	/* 
 	 * Free the lock.
 	 */
-	spin_unlock_irqrestore(&io_request_lock,flags);
+	spin_unlock_irqrestore(&req_queue->queue_lock,flags);
 }
 
 static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev)
@@ -1123,32 +1070,23 @@
 static int i2ob_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
-	struct i2ob_device *dev;
-	int minor;
-
 	/* Anyone capable of this syscall can do *real bad* things */
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	if (!inode)
+	if (!inode || !inode->i_rdev)
 		return -EINVAL;
-	minor = MINOR(inode->i_rdev);
-	if (minor >= (MAX_I2OB<<4))
-		return -ENODEV;
 
-	dev = &i2ob_dev[minor];
 	switch (cmd) {
-		case BLKGETSIZE:
-			return put_user(i2ob[minor].nr_sects, (long *) arg);
-
 		case HDIO_GETGEO:
 		{
 			struct hd_geometry g;
-			int u=minor&0xF0;
+			int u = MINOR(inode->i_rdev) & 0xF0;
 			i2o_block_biosparam(i2ob_sizes[u]<<1, 
 				&g.cylinders, &g.heads, &g.sectors);
-			g.start = i2ob[minor].start_sect;
-			return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
+			g.start = get_start_sect(inode->i_rdev);
+			return copy_to_user((void *)arg, &g, sizeof(g))
+				? -EFAULT : 0;
 		}
 	
 		case BLKRRPART:
@@ -1156,6 +1094,7 @@
 				return -EACCES;
 			return do_i2ob_revalidate(inode->i_rdev,1);
 			
+		case BLKGETSIZE:
 		case BLKFLSBUF:
 		case BLKROSET:
 		case BLKROGET:
@@ -1345,8 +1284,6 @@
 	i2ob_query_device(dev, 0x0000, 5, &flags, 4);
 	i2ob_query_device(dev, 0x0000, 6, &status, 4);
 	i2ob_sizes[unit] = (int)(size>>10);
-	for(i=unit; i <= unit+15 ; i++)
-		i2ob_hardsizes[i] = blocksize;
 	i2ob_gendisk.part[unit].nr_sects = size>>9;
 	i2ob[unit].nr_sects = (int)(size>>9);
 
@@ -1360,23 +1297,25 @@
 
 	for(i=unit;i<=unit+15;i++)
 	{
+		request_queue_t *q = i2ob_dev[unit].req_queue;
+
 		if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy)
 		{
-			i2ob_max_sectors[i] = 32;
-			i2ob_dev[i].max_segments = 8;
+			blk_queue_max_sectors(q, 32);
+			blk_queue_max_sectors(q, 8);
 			i2ob_dev[i].depth = 4;
 		}
 		else if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req)
 		{
-			i2ob_max_sectors[i] = 8;
-			i2ob_dev[i].max_segments = 8;
+			blk_queue_max_sectors(q, 8);
+			blk_queue_max_segments(q, 8);
 		}
 		else
 		{
 			/* MAX_SECTORS was used but 255 is a dumb number for
 			   striped RAID */
-			i2ob_max_sectors[i]=256;
-			i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2;
+			blk_queue_max_sectors(q, 256);
+			blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2);
 		}
 	}
 
@@ -1421,7 +1360,7 @@
 	}
 	printk(".\n");
 	printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", 
-		d->dev_name, i2ob_max_sectors[unit]);
+		d->dev_name, i2ob_dev[unit].req_queue->max_sectors);
 
 	/* 
 	 * If this is the first I2O block device found on this IOP,
@@ -1441,7 +1380,7 @@
 	 */
 	dev->req_queue = &i2ob_queues[c->unit]->req_queue;
 
-	grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9));
+	grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9));
 
 	/*
 	 * Register for the events we're interested in and that the
@@ -1484,9 +1423,6 @@
 
 	blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request);
 	blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0);
-	i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge;
-	i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge;
-	i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests;
 	i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit];
 
 	return 0;
@@ -1497,11 +1433,11 @@
  */	
 static request_queue_t* i2ob_get_queue(kdev_t dev)
 {
-	int unit = MINOR(dev)&0xF0;
-
-	return i2ob_dev[unit].req_queue;
+	return I2O_UNIT(dev).req_queue;
 }
 
+
+
 /*
  * Probe the I2O subsytem for block class devices
  */
@@ -1699,7 +1635,7 @@
 	int i = 0;
 	int flags;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
 
 	/*
 	 * Need to do this...we somtimes get two events from the IRTOS
@@ -1721,7 +1657,7 @@
 	if(unit >= MAX_I2OB<<4)
 	{
 		printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 		return;
 	}
 
@@ -1734,12 +1670,11 @@
 	{
 		i2ob_dev[i].i2odev = NULL;
 		i2ob_sizes[i] = 0;
-		i2ob_hardsizes[i] = 0;
-		i2ob_max_sectors[i] = 0;
+		blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
 		i2ob[i].nr_sects = 0;
 		i2ob_gendisk.part[i].nr_sects = 0;
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
 
 	/*
 	 * Sync the device...this will force all outstanding I/Os
@@ -1903,9 +1838,7 @@
 	 */
 	 
 	blksize_size[MAJOR_NR] = i2ob_blksizes;
-	hardsect_size[MAJOR_NR] = i2ob_hardsizes;
 	blk_size[MAJOR_NR] = i2ob_sizes;
-	max_sectors[MAJOR_NR] = i2ob_max_sectors;
 	blk_dev[MAJOR_NR].queue = i2ob_get_queue;
 	
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request);
@@ -1921,7 +1854,6 @@
 		i2ob_dev[i].tail = NULL;
 		i2ob_dev[i].depth = MAX_I2OB_DEPTH;
 		i2ob_blksizes[i] = 1024;
-		i2ob_max_sectors[i] = 2;
 	}
 	
 	/*
@@ -1977,9 +1909,8 @@
 
 	/*
 	 *	Adding i2ob_gendisk into the gendisk list.
-	 */	
-	i2ob_gendisk.next = gendisk_head;
-	gendisk_head = &i2ob_gendisk;
+	 */
+	add_gendisk(&i2ob_gendisk);
 
 	return 0;
 }
@@ -1992,7 +1923,6 @@
 
 void cleanup_module(void)
 {
-	struct gendisk *gdp;
 	int i;
 	
 	if(evt_running) {
@@ -2049,20 +1979,6 @@
 	 */
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
 
-	/*
-	 *	Why isnt register/unregister gendisk in the kernel ???
-	 */
-
-	if (gendisk_head == &i2ob_gendisk) {
-		gendisk_head = i2ob_gendisk.next;
-		}
-	else {
-		for (gdp = gendisk_head; gdp; gdp = gdp->next)
-			if (gdp->next == &i2ob_gendisk)
-			{
-				gdp->next = i2ob_gendisk.next;
-				break;
-			}
-	}
+	del_gendisk(&i2ob_gendisk);
 }
 #endif
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/i2o/i2o_core.c linux/drivers/i2o/i2o_core.c
--- /opt/kernel/linux-2.4.7/drivers/i2o/i2o_core.c	Wed May  2 01:10:37 2001
+++ linux/drivers/i2o/i2o_core.c	Tue Jul 24 15:12:30 2001
@@ -124,6 +124,7 @@
  * Function table to send to bus specific layers
  * See <include/linux/i2o.h> for explanation of this
  */
+#ifdef CONFIG_I2O_PCI_MODULE
 static struct i2o_core_func_table i2o_core_functions =
 {
 	i2o_install_controller,
@@ -134,7 +135,6 @@
 	i2o_delete_controller
 };
 
-#ifdef CONFIG_I2O_PCI_MODULE
 extern int i2o_pci_core_attach(struct i2o_core_func_table *);
 extern void i2o_pci_core_detach(void);
 #endif /* CONFIG_I2O_PCI_MODULE */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/aec62xx.c linux/drivers/ide/aec62xx.c
--- /opt/kernel/linux-2.4.7/drivers/ide/aec62xx.c	Tue Jun 20 16:52:36 2000
+++ linux/drivers/ide/aec62xx.c	Wed Jan  1 00:07:23 1997
@@ -557,6 +557,7 @@
 #ifdef CONFIG_BLK_DEV_IDEDMA
 	if (hwif->dma_base)
 		hwif->dmaproc = &aec62xx_dmaproc;
+	hwif->highmem = 1;
 #else /* !CONFIG_BLK_DEV_IDEDMA */
 	hwif->drives[0].autotune = 1;
 	hwif->drives[1].autotune = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c
--- /opt/kernel/linux-2.4.7/drivers/ide/cmd64x.c	Fri Jul 28 01:40:57 2000
+++ linux/drivers/ide/cmd64x.c	Wed Jan  1 00:07:23 1997
@@ -795,5 +795,7 @@
 		default:
 			break;
 	}
+
+	hwif->highmem = 1;
 #endif /* CONFIG_BLK_DEV_IDEDMA */
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/cs5530.c linux/drivers/ide/cs5530.c
--- /opt/kernel/linux-2.4.7/drivers/ide/cs5530.c	Wed Jan  3 01:58:45 2001
+++ linux/drivers/ide/cs5530.c	Wed Jan  1 00:07:23 1997
@@ -352,9 +352,10 @@
 		unsigned int basereg, d0_timings;
 
 #ifdef CONFIG_BLK_DEV_IDEDMA
-		hwif->dmaproc  = &cs5530_dmaproc;
+	hwif->dmaproc  = &cs5530_dmaproc;
+	hwif->highmem = 1;
 #else
-		hwif->autodma = 0;
+	hwif->autodma = 0;
 #endif /* CONFIG_BLK_DEV_IDEDMA */
 
 		hwif->tuneproc = &cs5530_tuneproc;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/cy82c693.c linux/drivers/ide/cy82c693.c
--- /opt/kernel/linux-2.4.7/drivers/ide/cy82c693.c	Sun May 20 02:43:06 2001
+++ linux/drivers/ide/cy82c693.c	Wed Jan  1 00:07:23 1997
@@ -441,6 +441,7 @@
 
 #ifdef CONFIG_BLK_DEV_IDEDMA
 	if (hwif->dma_base) {
+		hwif->highmem = 1;
 		hwif->dmaproc = &cy82c693_dmaproc;
 		if (!noautodma)
 			hwif->autodma = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/hd.c linux/drivers/ide/hd.c
--- /opt/kernel/linux-2.4.7/drivers/ide/hd.c	Sat Apr 28 20:27:53 2001
+++ linux/drivers/ide/hd.c	Tue Jul 24 15:04:44 2001
@@ -107,7 +107,6 @@
 static int hd_sizes[MAX_HD<<6];
 static int hd_blocksizes[MAX_HD<<6];
 static int hd_hardsectsizes[MAX_HD<<6];
-static int hd_maxsect[MAX_HD<<6];
 
 static struct timer_list device_timer;
 
@@ -560,19 +559,18 @@
 	dev = MINOR(CURRENT->rq_dev);
 	block = CURRENT->sector;
 	nsect = CURRENT->nr_sectors;
-	if (dev >= (NR_HD<<6) || block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) {
-#ifdef DEBUG
-		if (dev >= (NR_HD<<6))
+	if (dev >= (NR_HD<<6) || (dev & 0x3f) ||
+	    block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) {
+		if (dev >= (NR_HD<<6) || (dev & 0x3f))
 			printk("hd: bad minor number: device=%s\n",
 			       kdevname(CURRENT->rq_dev));
 		else
 			printk("hd%c: bad access: block=%d, count=%d\n",
 				(MINOR(CURRENT->rq_dev)>>6)+'a', block, nsect);
-#endif
 		end_request(0);
 		goto repeat;
 	}
-	block += hd[dev].start_sect;
+
 	dev >>= 6;
 	if (special_op[dev]) {
 		if (do_special_op(dev))
@@ -634,20 +632,16 @@
 			g.heads = hd_info[dev].head;
 			g.sectors = hd_info[dev].sect;
 			g.cylinders = hd_info[dev].cyl;
-			g.start = hd[MINOR(inode->i_rdev)].start_sect;
+			g.start = get_start_sect(inode->i_rdev);
 			return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; 
 		}
 
-         	case BLKGETSIZE:   /* Return device size */
-			if (!arg)  return -EINVAL;
-			return put_user(hd[MINOR(inode->i_rdev)].nr_sects, 
-					(long *) arg);
-
 		case BLKRRPART: /* Re-read partition tables */
 			if (!capable(CAP_SYS_ADMIN))
 				return -EACCES;
 			return revalidate_hddisk(inode->i_rdev, 1);
 
+         	case BLKGETSIZE:
 		case BLKROSET:
 		case BLKROGET:
 		case BLKRASET:
@@ -734,11 +728,9 @@
 	for(drive=0; drive < (MAX_HD << 6); drive++) {
 		hd_blocksizes[drive] = 1024;
 		hd_hardsectsizes[drive] = 512;
-		hd_maxsect[drive]=255;
 	}
 	blksize_size[MAJOR_NR] = hd_blocksizes;
 	hardsect_size[MAJOR_NR] = hd_hardsectsizes;
-	max_sectors[MAJOR_NR] = hd_maxsect;
 
 #ifdef __i386__
 	if (!NR_HD) {
@@ -841,9 +833,9 @@
 		return -1;
 	}
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255);
 	read_ahead[MAJOR_NR] = 8;		/* 8 sector (4kB) read-ahead */
-	hd_gendisk.next = gendisk_head;
-	gendisk_head = &hd_gendisk;
+	add_gendisk(&hd_gendisk);
 	init_timer(&device_timer);
 	device_timer.function = hd_times_out;
 	hd_geninit();
@@ -870,9 +862,7 @@
 {
 	int target;
 	struct gendisk * gdev;
-	int max_p;
-	int start;
-	int i;
+	int res;
 	long flags;
 
 	target = DEVICE_NR(dev);
@@ -887,25 +877,20 @@
 	DEVICE_BUSY = 1;
 	restore_flags(flags);
 
-	max_p = gdev->max_p;
-	start = target << gdev->minor_shift;
-
-	for (i=max_p - 1; i >=0 ; i--) {
-		int minor = start + i;
-		invalidate_device(MKDEV(MAJOR_NR, minor), 1);
-		gdev->part[minor].start_sect = 0;
-		gdev->part[minor].nr_sects = 0;
-	}
+	res = wipe_partitions(dev);
+	if (res)
+		goto leave;
 
 #ifdef MAYBE_REINIT
 	MAYBE_REINIT;
 #endif
 
-	grok_partitions(gdev, target, 1<<6, CAPACITY);
+	grok_partitions(dev, CAPACITY);
 
+leave:
 	DEVICE_BUSY = 0;
 	wake_up(&busy_wait);
-	return 0;
+	return res;
 }
 
 static int parse_hd_setup (char *line) {
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c
--- /opt/kernel/linux-2.4.7/drivers/ide/hpt34x.c	Sun May 20 02:43:06 2001
+++ linux/drivers/ide/hpt34x.c	Wed Jan  1 00:07:23 1997
@@ -425,6 +425,7 @@
 			hwif->autodma = 0;
 
 		hwif->dmaproc = &hpt34x_dmaproc;
+		hwif->highmem = 1;
 	} else {
 		hwif->drives[0].autotune = 1;
 		hwif->drives[1].autotune = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c
--- /opt/kernel/linux-2.4.7/drivers/ide/hpt366.c	Thu Jun 28 02:10:55 2001
+++ linux/drivers/ide/hpt366.c	Wed Jan  1 00:07:23 1997
@@ -720,6 +720,7 @@
 			hwif->autodma = 1;
 		else
 			hwif->autodma = 0;
+		hwif->highmem = 1;
 	} else {
 		hwif->autodma = 0;
 		hwif->drives[0].autotune = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.c	Fri Jul 20 06:04:55 2001
+++ linux/drivers/ide/ide-cd.c	Wed Jan  1 00:07:23 1997
@@ -926,7 +926,7 @@
 		/* If we're not done filling the current buffer, complain.
 		   Otherwise, complete the command normally. */
 		if (rq->current_nr_sectors > 0) {
-			printk ("%s: cdrom_read_intr: data underrun (%ld blocks)\n",
+			printk ("%s: cdrom_read_intr: data underrun (%u blocks)\n",
 				drive->name, rq->current_nr_sectors);
 			cdrom_end_request (0, drive);
 		} else
@@ -959,8 +959,7 @@
 
 	/* First, figure out if we need to bit-bucket
 	   any of the leading sectors. */
-	nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)),
-		     sectors_to_transfer);
+	nskip = MIN(rq->current_nr_sectors - bio_sectors(rq->bio), sectors_to_transfer);
 
 	while (nskip > 0) {
 		/* We need to throw away a sector. */
@@ -978,8 +977,7 @@
 
 		/* If we've filled the present buffer but there's another
 		   chained buffer after it, move on. */
-		if (rq->current_nr_sectors == 0 &&
-		    rq->nr_sectors > 0)
+		if (rq->current_nr_sectors == 0 && rq->nr_sectors)
 			cdrom_end_request (1, drive);
 
 		/* If the buffers are full, cache the rest of the data in our
@@ -1059,7 +1057,7 @@
 	   represent the number of sectors to skip at the start of a transfer
 	   will fail.  I think that this will never happen, but let's be
 	   paranoid and check. */
-	if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) &&
+	if (rq->current_nr_sectors < bio_sectors(rq->bio) &&
 	    (rq->sector % SECTORS_PER_FRAME) != 0) {
 		printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n",
 			drive->name, rq->sector);
@@ -1098,9 +1096,9 @@
 	nskip = (sector % SECTORS_PER_FRAME);
 	if (nskip > 0) {
 		/* Sanity check... */
-		if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) &&
+		if (rq->current_nr_sectors != bio_sectors(rq->bio) &&
 			(rq->sector % CD_FRAMESIZE != 0)) {
-			printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n",
+			printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n",
 				drive->name, rq->current_nr_sectors);
 			cdrom_end_request (0, drive);
 			return ide_stopped;
@@ -1197,13 +1195,15 @@
    start it over entirely, or even put it back on the request queue. */
 static void restore_request (struct request *rq)
 {
-	if (rq->buffer != rq->bh->b_data) {
-		int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE;
-		rq->buffer = rq->bh->b_data;
+	if (rq->buffer != bio_data(rq->bio)) {
+		int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE;
+		rq->buffer = bio_data(rq->bio);
 		rq->nr_sectors += n;
 		rq->sector -= n;
 	}
-	rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS;
+	rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio);
+	rq->hard_nr_sectors = rq->nr_sectors;
+	rq->hard_sector = rq->sector;
 }
 
 /*
@@ -1217,20 +1217,22 @@
 
 	/* If the request is relative to a partition, fix it up to refer to the
 	   absolute address.  */
-	if ((minor & PARTN_MASK) != 0) {
+	if (minor & PARTN_MASK) {
 		rq->sector = block;
 		minor &= ~PARTN_MASK;
-		rq->rq_dev = MKDEV (MAJOR(rq->rq_dev), minor);
+		rq->rq_dev = MKDEV(MAJOR(rq->rq_dev), minor);
 	}
 
 	/* We may be retrying this request after an error.  Fix up
 	   any weirdness which might be present in the request packet. */
-	restore_request (rq);
+	restore_request(rq);
 
 	/* Satisfy whatever we can of this request from our cached sector. */
 	if (cdrom_read_from_buffer(drive))
 		return ide_stopped;
 
+	blk_attempt_remerge(&drive->queue, rq);
+
 	/* Clear the local sector buffer. */
 	info->nsectors_buffered = 0;
 
@@ -1478,7 +1480,7 @@
 
 static ide_startstop_t cdrom_write_intr(ide_drive_t *drive)
 {
-	int stat, ireason, len, sectors_to_transfer;
+	int stat, ireason, len, sectors_to_transfer, uptodate;
 	struct cdrom_info *info = drive->driver_data;
 	int i, dma_error = 0, dma = info->dma;
 	ide_startstop_t startstop;
@@ -1499,6 +1501,9 @@
 		return startstop;
 	}
  
+	/*
+	 * using dma, transfer is complete now
+	 */
 	if (dma) {
 		if (dma_error)
 			return ide_error(drive, "dma error", stat);
@@ -1520,12 +1525,13 @@
 		/* If we're not done writing, complain.
 		 * Otherwise, complete the command normally.
 		 */
+		uptodate = 1;
 		if (rq->current_nr_sectors > 0) {
-			printk("%s: write_intr: data underrun (%ld blocks)\n",
-				drive->name, rq->current_nr_sectors);
-			cdrom_end_request(0, drive);
-		} else
-			cdrom_end_request(1, drive);
+			printk("%s: write_intr: data underrun (%u blocks)\n",
+			drive->name, rq->current_nr_sectors);
+			uptodate = 0;
+		}
+		cdrom_end_request(uptodate, drive);
 		return ide_stopped;
 	}
 
@@ -1534,26 +1540,42 @@
 		if (cdrom_write_check_ireason(drive, len, ireason))
 			return ide_stopped;
 
-	/* The number of sectors we need to read from the drive. */
 	sectors_to_transfer = len / SECTOR_SIZE;
 
-	/* Now loop while we still have data to read from the drive. DMA
-	 * transfers will already have been complete
+	/*
+	 * now loop and write out the data
 	 */
 	while (sectors_to_transfer > 0) {
-		/* If we've filled the present buffer but there's another
-		   chained buffer after it, move on. */
-		if (rq->current_nr_sectors == 0 && rq->nr_sectors > 0)
-			cdrom_end_request(1, drive);
+		int this_transfer;
+
+		if (!rq->current_nr_sectors) {
+			printk("ide-cd: write_intr: oops\n");
+			break;
+		}
+
+		/*
+		 * Figure out how many sectors we can transfer
+		 */
+		this_transfer = MIN(sectors_to_transfer,rq->current_nr_sectors);
 
-		atapi_output_bytes(drive, rq->buffer, rq->current_nr_sectors);
-		rq->nr_sectors -= rq->current_nr_sectors;
-		rq->current_nr_sectors = 0;
-		rq->sector += rq->current_nr_sectors;
-		sectors_to_transfer -= rq->current_nr_sectors;
+		while (this_transfer > 0) {
+			atapi_output_bytes(drive, rq->buffer, SECTOR_SIZE);
+			rq->buffer += SECTOR_SIZE;
+			--rq->nr_sectors;
+			--rq->current_nr_sectors;
+			++rq->sector;
+			--this_transfer;
+			--sectors_to_transfer;
+		}
+
+		/*
+		 * current buffer complete, move on
+		 */
+		if (rq->current_nr_sectors == 0 && rq->nr_sectors)
+			cdrom_end_request (1, drive);
 	}
 
-	/* arm handler */
+	/* re-arm handler */
 	ide_set_handler(drive, &cdrom_write_intr, 5 * WAIT_CMD, NULL);
 	return ide_started;
 }
@@ -1584,10 +1606,26 @@
 	return cdrom_transfer_packet_command(drive, &pc, cdrom_write_intr);
 }
 
-static ide_startstop_t cdrom_start_write(ide_drive_t *drive)
+static ide_startstop_t cdrom_start_write(ide_drive_t *drive, struct request *rq)
 {
 	struct cdrom_info *info = drive->driver_data;
 
+	/*
+	 * writes *must* be 2kB frame aligned
+	 */
+	if ((rq->nr_sectors & 3) || (rq->sector & 3)) {
+		cdrom_end_request(0, drive);
+		return ide_stopped;
+	}
+
+	/*
+	 * for dvd-ram and such media, it's a really big deal to get
+	 * big writes all the time. so scour the queue and attempt to
+	 * remerge requests, often the plugging will not have had time
+	 * to do this properly
+	 */
+	blk_attempt_remerge(&drive->queue, rq);
+
 	info->nsectors_buffered = 0;
 
         /* use dma, if possible. we don't need to check more, since we
@@ -1630,7 +1668,7 @@
 				if (rq->cmd == READ)
 					action = cdrom_start_read(drive, block);
 				else
-					action = cdrom_start_write(drive);
+					action = cdrom_start_write(drive, rq);
 			}
 			info->last_block = block;
 			return action;
@@ -1833,6 +1871,7 @@
 
 	pc.buffer =  buf;
 	pc.buflen = buflen;
+	pc.quiet = 1;
 	pc.c[0] = GPCMD_READ_TOC_PMA_ATIP;
 	pc.c[6] = trackno;
 	pc.c[7] = (buflen >> 8);
@@ -2113,7 +2152,9 @@
 	pc.quiet = cgc->quiet;
 	pc.timeout = cgc->timeout;
 	pc.sense = cgc->sense;
-	return cgc->stat = cdrom_queue_packet_command(drive, &pc);
+	cgc->stat = cdrom_queue_packet_command(drive, &pc);
+	cgc->buflen -= pc.buflen;
+	return cgc->stat;
 }
 
 static
@@ -2622,7 +2663,6 @@
 
 	ide_add_setting(drive,	"breada_readahead",	SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL);
 	ide_add_setting(drive,	"file_readahead",	SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor],	NULL);
-	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL);
 	ide_add_setting(drive,	"dsc_overlap",		SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1,	1, &drive->dsc_overlap, NULL);
 }
 
@@ -2827,7 +2867,12 @@
 	drive->part[0].nr_sects = toc->capacity * SECTORS_PER_FRAME;
 	HWIF(drive)->gd->sizes[minor] = toc->capacity * BLOCKS_PER_FRAME;
 
+	/*
+	 * reset block size, ide_revalidate_disk incorrectly sets it to
+	 * 1024 even for CDROM's
+	 */
 	blk_size[HWIF(drive)->major] = HWIF(drive)->gd->sizes;
+	set_blocksize(MKDEV(HWIF(drive)->major, minor), CD_FRAMESIZE);
 }
 
 static
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.h	Fri Jul 20 21:54:49 2001
+++ linux/drivers/ide/ide-cd.h	Wed Jan  1 00:07:23 1997
@@ -37,11 +37,12 @@
 
 /************************************************************************/
 
-#define SECTOR_SIZE		512
 #define SECTOR_BITS 		9
-#define SECTORS_PER_FRAME	(CD_FRAMESIZE / SECTOR_SIZE)
+#define SECTOR_SIZE		(1 << SECTOR_BITS)
+#define SECTORS_PER_FRAME	(CD_FRAMESIZE >> SECTOR_BITS)
 #define SECTOR_BUFFER_SIZE	(CD_FRAMESIZE * 32)
-#define SECTORS_BUFFER		(SECTOR_BUFFER_SIZE / SECTOR_SIZE)
+#define SECTORS_BUFFER		(SECTOR_BUFFER_SIZE >> SECTOR_BITS)
+#define SECTORS_MAX		(131072 >> SECTOR_BITS)
 
 #define BLOCKS_PER_FRAME	(CD_FRAMESIZE / BLOCK_SIZE)
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-disk.c	Fri Feb  9 20:30:23 2001
+++ linux/drivers/ide/ide-disk.c	Wed Jan  1 00:07:23 1997
@@ -27,6 +27,7 @@
  * Version 1.09		added increment of rq->sector in ide_multwrite
  *			added UDMA 3/4 reporting
  * Version 1.10		request queue changes, Ultra DMA 100
+ * Version 1.11		Highmem I/O support, Jens Axboe <axboe@suse.de>
  */
 
 #define IDEDISK_VERSION	"1.10"
@@ -140,6 +141,7 @@
 	int i;
 	unsigned int msect, nsect;
 	struct request *rq;
+	char *to;
 
 	/* new way for dealing with premature shared PCI interrupts */
 	if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
@@ -150,8 +152,8 @@
 		ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
 		return ide_started;
 	}
+
 	msect = drive->mult_count;
-	
 read_next:
 	rq = HWGROUP(drive)->rq;
 	if (msect) {
@@ -160,14 +162,15 @@
 		msect -= nsect;
 	} else
 		nsect = 1;
-	idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
+	to = ide_map_buffer(rq);
+	idedisk_input_data(drive, to, nsect * SECTOR_WORDS);
 #ifdef DEBUG
 	printk("%s:  read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
 		drive->name, rq->sector, rq->sector+nsect-1,
 		(unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
 #endif
+	ide_unmap_buffer(to);
 	rq->sector += nsect;
-	rq->buffer += nsect<<9;
 	rq->errors = 0;
 	i = (rq->nr_sectors -= nsect);
 	if (((long)(rq->current_nr_sectors -= nsect)) <= 0)
@@ -201,14 +204,15 @@
 #endif
 		if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
 			rq->sector++;
-			rq->buffer += 512;
 			rq->errors = 0;
 			i = --rq->nr_sectors;
 			--rq->current_nr_sectors;
 			if (((long)rq->current_nr_sectors) <= 0)
 				ide_end_request(1, hwgroup);
 			if (i > 0) {
-				idedisk_output_data (drive, rq->buffer, SECTOR_WORDS);
+				char *to = ide_map_buffer(rq);
+				idedisk_output_data (drive, to, SECTOR_WORDS);
+				ide_unmap_buffer(to);
 				ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
                                 return ide_started;
 			}
@@ -238,28 +242,27 @@
   	do {
   		char *buffer;
   		int nsect = rq->current_nr_sectors;
- 
+
 		if (nsect > mcount)
 			nsect = mcount;
 		mcount -= nsect;
-		buffer = rq->buffer;
 
+		buffer = ide_map_buffer(rq);
 		rq->sector += nsect;
-		rq->buffer += nsect << 9;
 		rq->nr_sectors -= nsect;
 		rq->current_nr_sectors -= nsect;
 
 		/* Do we move to the next bh after this? */
 		if (!rq->current_nr_sectors) {
-			struct buffer_head *bh = rq->bh->b_reqnext;
+			struct bio *bio = rq->bio->bi_next;
 
 			/* end early early we ran out of requests */
-			if (!bh) {
+			if (!bio) {
 				mcount = 0;
 			} else {
-				rq->bh = bh;
-				rq->current_nr_sectors = bh->b_size >> 9;
-				rq->buffer             = bh->b_data;
+				rq->bio = bio;
+				rq->current_nr_sectors = bio_sectors(bio);
+				rq->hard_cur_sectors = rq->current_nr_sectors;
 			}
 		}
 
@@ -268,6 +271,7 @@
 		 * re-entering us on the last transfer.
 		 */
 		idedisk_output_data(drive, buffer, nsect<<7);
+		ide_unmap_buffer(buffer);
 	} while (mcount);
 
         return 0;
@@ -367,6 +371,8 @@
  */
 static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
 {
+	char *buffer;
+
 	if (IDE_CONTROL_REG)
 		OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
 	OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG);
@@ -444,15 +450,17 @@
 			ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
 			if (ide_multwrite(drive, drive->mult_count)) {
 				unsigned long flags;
-				spin_lock_irqsave(&io_request_lock, flags);
+				spin_lock_irqsave(&ide_lock, flags);
 				hwgroup->handler = NULL;
 				del_timer(&hwgroup->timer);
-				spin_unlock_irqrestore(&io_request_lock, flags);
+				spin_unlock_irqrestore(&ide_lock, flags);
 				return ide_stopped;
 			}
 		} else {
 			ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
-			idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
+			buffer = ide_map_buffer(rq);
+			idedisk_output_data(drive, buffer, SECTOR_WORDS);
+			ide_unmap_buffer(buffer);
 		}
 		return ide_started;
 	}
@@ -481,7 +489,8 @@
 {
 	if (drive->removable && !drive->usage) {
 		invalidate_buffers(inode->i_rdev);
-		if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL))
+		if (drive->doorlocking &&
+		    ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL))
 			drive->doorlocking = 0;
 	}
 	MOD_DEC_USE_COUNT;
@@ -494,9 +503,7 @@
 
 static void idedisk_revalidate (ide_drive_t *drive)
 {
-	grok_partitions(HWIF(drive)->gd, drive->select.b.unit,
-			1<<PARTN_BITS,
-			current_capacity(drive));
+	ide_revalidate_drive(drive);
 }
 
 /*
@@ -672,7 +679,7 @@
 		return -EBUSY;
 	drive->nowerr = arg;
 	drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
-	spin_unlock_irq(&io_request_lock);
+	spin_unlock_irq(&DRIVE_LOCK(drive));
 	return 0;
 }
 
@@ -690,7 +697,6 @@
 	ide_add_setting(drive,	"nowerr",		SETTING_RW,					HDIO_GET_NOWERR,	HDIO_SET_NOWERR,	TYPE_BYTE,	0,	1,				1,	1,	&drive->nowerr,			set_nowerr);
 	ide_add_setting(drive,	"breada_readahead",	SETTING_RW,					BLKRAGET,		BLKRASET,		TYPE_INT,	0,	255,				1,	2,	&read_ahead[major],		NULL);
 	ide_add_setting(drive,	"file_readahead",	SETTING_RW,					BLKFRAGET,		BLKFRASET,		TYPE_INTA,	0,	INT_MAX,			1,	1024,	&max_readahead[major][minor],	NULL);
-	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW,					BLKSECTGET,		BLKSECTSET,		TYPE_INTA,	1,	255,				1,	2,	&max_sectors[major][minor],	NULL);
 	ide_add_setting(drive,	"lun",			SETTING_RW,					-1,			-1,			TYPE_INT,	0,	7,				1,	1,	&drive->lun,			NULL);
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-dma.c	Mon Jan 15 22:08:15 2001
+++ linux/drivers/ide/ide-dma.c	Wed Jan  1 00:07:23 1997
@@ -168,25 +168,6 @@
 #endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
 
 /*
- * Our Physical Region Descriptor (PRD) table should be large enough
- * to handle the biggest I/O request we are likely to see.  Since requests
- * can have no more than 256 sectors, and since the typical blocksize is
- * two or more sectors, we could get by with a limit of 128 entries here for
- * the usual worst case.  Most requests seem to include some contiguous blocks,
- * further reducing the number of table entries required.
- *
- * The driver reverts to PIO mode for individual requests that exceed
- * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling
- * 100% of all crazy scenarios here is not necessary.
- *
- * As it turns out though, we must allocate a full 4KB page for this,
- * so the two PRD tables (ide0 & ide1) will each get half of that,
- * allowing each to have about 256 entries (8 bytes each) from this.
- */
-#define PRD_BYTES	8
-#define PRD_ENTRIES	(PAGE_SIZE / (2 * PRD_BYTES))
-
-/*
  * dma_intr() is the handler for disk read/write DMA interrupts
  */
 ide_startstop_t ide_dma_intr (ide_drive_t *drive)
@@ -213,34 +194,42 @@
 
 static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
 {
-	struct buffer_head *bh;
-	struct scatterlist *sg = hwif->sg_table;
+	struct sg_list *sg = hwif->sg_table;
+	struct bio *bio = rq->bio;
+	unsigned long lastdataend;
 	int nents = 0;
 
 	if (rq->cmd == READ)
 		hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
 	else
 		hwif->sg_dma_direction = PCI_DMA_TODEVICE;
-	bh = rq->bh;
-	do {
-		unsigned char *virt_addr = bh->b_data;
-		unsigned int size = bh->b_size;
 
-		if (nents >= PRD_ENTRIES)
-			return 0;
-
-		while ((bh = bh->b_reqnext) != NULL) {
-			if ((virt_addr + size) != (unsigned char *) bh->b_data)
-				break;
-			size += bh->b_size;
+	bio = rq->bio;
+	lastdataend = 0;
+	do {
+		/*
+		 * continue segment from before?
+		 */
+		if (bio_to_bus(bio) == lastdataend) {
+			sg[nents - 1].length += bio_size(bio);
+			lastdataend += bio_size(bio);
+		} else {
+			/*
+			 * start new segment
+			 */
+			if (nents >= PRD_ENTRIES)
+				BUG();
+
+			memset(&sg[nents], 0, sizeof(*sg));
+			sg[nents].page = bio_page(bio);
+			sg[nents].length = bio_size(bio);
+			sg[nents].offset = bio_offset(bio);
+			lastdataend = bio_to_bus(bio) + bio_size(bio);
+			nents++;
 		}
-		memset(&sg[nents], 0, sizeof(*sg));
-		sg[nents].address = virt_addr;
-		sg[nents].length = size;
-		nents++;
-	} while (bh != NULL);
+	} while ((bio = bio->bi_next) != NULL);
 
-	return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+	return pci_map_sgl(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
 }
 
 /*
@@ -258,7 +247,7 @@
 #endif
 	unsigned int count = 0;
 	int i;
-	struct scatterlist *sg;
+	struct sg_list *sg;
 
 	HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
 
@@ -266,7 +255,7 @@
 		return 0;
 
 	sg = HWIF(drive)->sg_table;
-	while (i && sg_dma_len(sg)) {
+	while (i) {
 		u32 cur_addr;
 		u32 cur_len;
 
@@ -280,26 +269,20 @@
 		 */
 
 		while (cur_len) {
-			if (count++ >= PRD_ENTRIES) {
-				printk("%s: DMA table too small\n", drive->name);
-				pci_unmap_sg(HWIF(drive)->pci_dev,
-					     HWIF(drive)->sg_table,
-					     HWIF(drive)->sg_nents,
-					     HWIF(drive)->sg_dma_direction);
-				return 0; /* revert to PIO for this request */
-			} else {
-				u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff);
-
-				if (bcount > cur_len)
-					bcount = cur_len;
-				*table++ = cpu_to_le32(cur_addr);
-				xcount = bcount & 0xffff;
-				if (is_trm290_chipset)
-					xcount = ((xcount >> 2) - 1) << 16;
-				*table++ = cpu_to_le32(xcount);
-				cur_addr += bcount;
-				cur_len -= bcount;
-			}
+			u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff);
+
+			if (count++ >= PRD_ENTRIES)
+				BUG();
+
+			if (bcount > cur_len)
+				bcount = cur_len;
+			*table++ = cpu_to_le32(cur_addr);
+			xcount = bcount & 0xffff;
+			if (is_trm290_chipset)
+				xcount = ((xcount >> 2) - 1) << 16;
+			*table++ = cpu_to_le32(xcount);
+			cur_addr += bcount;
+			cur_len -= bcount;
 		}
 
 		sg++;
@@ -318,10 +301,10 @@
 void ide_destroy_dmatable (ide_drive_t *drive)
 {
 	struct pci_dev *dev = HWIF(drive)->pci_dev;
-	struct scatterlist *sg = HWIF(drive)->sg_table;
+	struct sg_list *sg = HWIF(drive)->sg_table;
 	int nents = HWIF(drive)->sg_nents;
 
-	pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction);
+	pci_unmap_sgl(dev, sg, nents, HWIF(drive)->sg_dma_direction);
 }
 
 /*
@@ -450,6 +433,22 @@
 	return 0;
 }
 
+#ifdef CONFIG_HIGHMEM
+static inline void ide_toggle_bounce(ide_drive_t *drive, int on)
+{
+	unsigned long addr = BLK_BOUNCE_HIGH;
+
+	if (on && drive->media == ide_disk && HWIF(drive)->highmem) {
+		printk("%s: enabling highmem I/O\n", drive->name);
+		addr = BLK_BOUNCE_4G;
+	}
+
+	blk_queue_bounce_limit(&drive->queue, addr);
+}
+#else
+#define ide_toggle_bounce(drive, on)
+#endif
+
 /*
  * ide_dmaproc() initiates/aborts DMA read/write operations on a drive.
  *
@@ -471,15 +470,17 @@
 	ide_hwif_t *hwif = HWIF(drive);
 	unsigned long dma_base = hwif->dma_base;
 	byte unit = (drive->select.b.unit & 0x01);
-	unsigned int count, reading = 0;
+	unsigned int count, reading = 0, set_high = 1;
 	byte dma_stat;
 
 	switch (func) {
 		case ide_dma_off:
 			printk("%s: DMA disabled\n", drive->name);
+			set_high = 0;
 		case ide_dma_off_quietly:
 			outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2);
 		case ide_dma_on:
+			ide_toggle_bounce(drive, set_high);
 			drive->using_dma = (func == ide_dma_on);
 			if (drive->using_dma)
 				outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-floppy.c	Thu Jun 14 23:16:58 2001
+++ linux/drivers/ide/ide-floppy.c	Wed Jan  1 00:07:23 1997
@@ -1380,9 +1380,7 @@
  */
 static void idefloppy_revalidate (ide_drive_t *drive)
 {
-	grok_partitions(HWIF(drive)->gd, drive->select.b.unit,
-			1<<PARTN_BITS,
-			current_capacity(drive));
+	ide_revalidate_drive(drive);
 }
 
 /*
@@ -1522,7 +1520,6 @@
 	ide_add_setting(drive,	"bios_sect",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	63,				1,	1,	&drive->bios_sect,		NULL);
 	ide_add_setting(drive,	"breada_readahead",	SETTING_RW,					BLKRAGET,		BLKRASET,		TYPE_INT,	0,	255,				1,	2,	&read_ahead[major],		NULL);
 	ide_add_setting(drive,	"file_readahead",	SETTING_RW,					BLKFRAGET,		BLKFRASET,		TYPE_INTA,	0,	INT_MAX,			1,	1024,	&max_readahead[major][minor],	NULL);
-	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW,					BLKSECTGET,		BLKSECTSET,		TYPE_INTA,	1,	255,				1,	2,	&max_sectors[major][minor],	NULL);
 
 }
 
@@ -1555,10 +1552,7 @@
 	 */
 
 	if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0)
-	{
-		for (i = 0; i < 1 << PARTN_BITS; i++)
-			max_sectors[major][minor + i] = 64;
-	}
+		blk_queue_max_sectors(&drive->queue, 64);
 
 	(void) idefloppy_get_capacity (drive);
 	idefloppy_add_settings(drive);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-probe.c	Sun Mar 18 18:25:02 2001
+++ linux/drivers/ide/ide-probe.c	Wed Jan  1 00:10:32 1997
@@ -594,9 +594,21 @@
 static void ide_init_queue(ide_drive_t *drive)
 {
 	request_queue_t *q = &drive->queue;
+	int max_sectors;
 
 	q->queuedata = HWGROUP(drive);
 	blk_init_queue(q, do_ide_request);
+
+	/* IDE can do up to 128K per request, pdc4030 needs smaller limit */
+#ifdef CONFIG_BLK_DEV_PDC4030
+	max_sectors = 127;
+#else
+	max_sectors = 255;
+#endif
+	blk_queue_max_sectors(q, max_sectors);
+
+	/* IDE DMA can do PRD_ENTRIES number of segments */
+	q->max_segments = PRD_ENTRIES;
 }
 
 /*
@@ -670,7 +682,7 @@
 		hwgroup->rq       = NULL;
 		hwgroup->handler  = NULL;
 		hwgroup->drive    = NULL;
-		hwgroup->busy     = 0;
+		hwgroup->flags	  = 0;
 		init_timer(&hwgroup->timer);
 		hwgroup->timer.function = &ide_timer_expiry;
 		hwgroup->timer.data = (unsigned long) hwgroup;
@@ -700,6 +712,13 @@
 	hwif->next = hwgroup->hwif->next;
 	hwgroup->hwif->next = hwif;
 
+	if (!hwgroup->hwif) {
+		hwgroup->hwif = HWIF(hwgroup->drive);
+#ifdef DEBUG
+		printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name);
+#endif
+	}
+	restore_flags(flags);	/* all CPUs; safe now that hwif->hwgroup is set up */
 	for (index = 0; index < MAX_DRIVES; ++index) {
 		ide_drive_t *drive = &hwif->drives[index];
 		if (!drive->present)
@@ -710,13 +729,6 @@
 		hwgroup->drive->next = drive;
 		ide_init_queue(drive);
 	}
-	if (!hwgroup->hwif) {
-		hwgroup->hwif = HWIF(hwgroup->drive);
-#ifdef DEBUG
-		printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name);
-#endif
-	}
-	restore_flags(flags);	/* all CPUs; safe now that hwif->hwgroup is set up */
 
 #if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__)
 	printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name,
@@ -747,9 +759,9 @@
  */
 static void init_gendisk (ide_hwif_t *hwif)
 {
-	struct gendisk *gd, **gdp;
+	struct gendisk *gd;
 	unsigned int unit, units, minors;
-	int *bs, *max_sect, *max_ra;
+	int *bs, *max_ra;
 	extern devfs_handle_t ide_devfs_handle;
 
 	/* figure out maximum drive number on the interface */
@@ -762,23 +774,15 @@
 	gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
 	gd->part  = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
 	bs        = kmalloc (minors*sizeof(int), GFP_KERNEL);
-	max_sect  = kmalloc (minors*sizeof(int), GFP_KERNEL);
 	max_ra    = kmalloc (minors*sizeof(int), GFP_KERNEL);
 
 	memset(gd->part, 0, minors * sizeof(struct hd_struct));
 
 	/* cdroms and msdos f/s are examples of non-1024 blocksizes */
 	blksize_size[hwif->major] = bs;
-	max_sectors[hwif->major] = max_sect;
 	max_readahead[hwif->major] = max_ra;
 	for (unit = 0; unit < minors; ++unit) {
 		*bs++ = BLOCK_SIZE;
-#ifdef CONFIG_BLK_DEV_PDC4030
-		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255);
-#else
-		/* IDE can do up to 128K per request. */
-		*max_sect++ = 255;
-#endif
 		*max_ra++ = MAX_READAHEAD;
 	}
 
@@ -800,8 +804,8 @@
 	if (gd->flags)
 		memset (gd->flags, 0, sizeof *gd->flags * units);
 
-	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ;
-	hwif->gd = *gdp = gd;			/* link onto tail of list */
+	hwif->gd = gd;
+	add_gendisk(gd);
 
 	for (unit = 0; unit < units; ++unit) {
 		if (hwif->drives[unit].present) {
@@ -870,13 +874,6 @@
 	read_ahead[hwif->major] = 8;	/* (4kB) */
 	hwif->present = 1;	/* success */
 
-#if (DEBUG_SPINLOCK > 0)
-{
-	static int done = 0;
-	if (!done++)
-		printk("io_request_lock is %p\n", &io_request_lock);    /* FIXME */
-}
-#endif
 	return hwif->present;
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-proc.c linux/drivers/ide/ide-proc.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide-proc.c	Thu Oct 26 23:11:39 2000
+++ linux/drivers/ide/ide-proc.c	Wed Jan  1 00:07:23 1997
@@ -190,7 +190,7 @@
 			if (hwif->mate && hwif->mate->hwgroup)
 				mategroup = (ide_hwgroup_t *)(hwif->mate->hwgroup);
 			cli();	/* all CPUs; ensure all writes are done together */
-			while (mygroup->busy || (mategroup && mategroup->busy)) {
+			while (test_bit(IDE_BUSY, &mygroup->flags) || (mategroup && test_bit(IDE_BUSY, &mategroup->flags))) {
 				sti();	/* all CPUs */
 				if (0 < (signed long)(jiffies - timeout)) {
 					printk("/proc/ide/%s/config: channel(s) busy, cannot write\n", hwif->name);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide.c linux/drivers/ide/ide.c
--- /opt/kernel/linux-2.4.7/drivers/ide/ide.c	Fri Jul 20 06:02:28 2001
+++ linux/drivers/ide/ide.c	Tue Jul 24 15:04:44 2001
@@ -113,6 +113,8 @@
  * Version 6.31		Debug Share INTR's and request queue streaming
  *			Native ATA-100 support
  *			Prep for Cascades Project
+ * Version 6.32		4GB highmem support for DMA, and mapping of those for
+ * 			PIO transfer (Jens Axboe)
  *
  *  Some additional driver compile-time options are in ./include/linux/ide.h
  *
@@ -121,8 +123,8 @@
  *
  */
 
-#define	REVISION	"Revision: 6.31"
-#define	VERSION		"Id: ide.c 6.31 2000/06/09"
+#define	REVISION	"Revision: 6.32"
+#define	VERSION		"Id: ide.c 6.32 2001/05/24"
 
 #undef REALLY_SLOW_IO		/* most systems can safely undef this */
 
@@ -167,6 +169,7 @@
 static int	idebus_parameter; /* holds the "idebus=" parameter */
 static int	system_bus_speed; /* holds what we think is VESA/PCI bus speed */
 static int	initializing;     /* set while initializing built-in drivers */
+spinlock_t ide_lock = SPIN_LOCK_UNLOCKED;
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
 static int	ide_scan_direction;	/* THIS was formerly 2.2.x pci=reverse */
@@ -512,17 +515,25 @@
 {
 	struct request *rq;
 	unsigned long flags;
+	ide_drive_t *drive = hwgroup->drive;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 	rq = hwgroup->rq;
 
-	if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) {
+	/*
+	 * decide whether to reenable DMA -- 3 is a random magic for now,
+	 * if we DMA timeout more than 3 times, just stay in PIO
+	 */
+	if (drive->state == DMA_PIO_RETRY && drive->retry_pio < 3)
+		hwgroup->hwif->dmaproc(ide_dma_on, drive);
+
+	if (!__end_that_request_first(rq, uptodate)) {
 		add_blkdev_randomness(MAJOR(rq->rq_dev));
 		blkdev_dequeue_request(rq);
         	hwgroup->rq = NULL;
 		end_that_request_last(rq);
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 }
 
 /*
@@ -538,7 +549,7 @@
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 	if (hwgroup->handler != NULL) {
 		printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n",
 			drive->name, hwgroup->handler, handler);
@@ -547,7 +558,7 @@
 	hwgroup->expiry		= expiry;
 	hwgroup->timer.expires	= jiffies + timeout;
 	add_timer(&hwgroup->timer);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 }
 
 /*
@@ -794,9 +805,9 @@
 	unsigned long flags;
 	struct request *rq;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 	rq = HWGROUP(drive)->rq;
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 
 	if (rq->cmd == IDE_DRIVE_CMD) {
 		byte *args = (byte *) rq->buffer;
@@ -819,11 +830,11 @@
 			args[6] = IN_BYTE(IDE_SELECT_REG);
 		}
 	}
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 	blkdev_dequeue_request(rq);
 	HWGROUP(drive)->rq = NULL;
 	end_that_request_last(rq);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 }
 
 /*
@@ -1137,8 +1148,8 @@
 static ide_startstop_t start_request (ide_drive_t *drive)
 {
 	ide_startstop_t startstop;
-	unsigned long block, blockend;
-	struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head);
+	unsigned long block;
+	struct request *rq = HWGROUP(drive)->rq;
 	unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS;
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -1156,16 +1167,11 @@
 	}
 #endif
 	block    = rq->sector;
-	blockend = block + rq->nr_sectors;
 
+	/* Strange disk manager remap */
 	if ((rq->cmd == READ || rq->cmd == WRITE) &&
 	    (drive->media == ide_disk || drive->media == ide_floppy)) {
-		if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) {
-			printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name,
-			 (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors);
-			goto kill_rq;
-		}
-		block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0;
+		block += drive->sect0;
 	}
 	/* Yecch - this will shift the entire interval,
 	   possibly killing some innocent following sector */
@@ -1177,7 +1183,8 @@
 #endif
 
 	SELECT_DRIVE(hwif, drive);
-	if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) {
+	if (ide_wait_stat(&startstop, drive, drive->ready_stat,
+			  BUSY_STAT|DRQ_STAT, WAIT_READY)) {
 		printk("%s: drive not ready for command\n", drive->name);
 		return startstop;
 	}
@@ -1188,7 +1195,8 @@
 		if (drive->driver != NULL) {
 			return (DRIVER(drive)->do_request(drive, rq, block));
 		}
-		printk("%s: media type %d not supported\n", drive->name, drive->media);
+		printk("%s: media type %d not supported\n",
+		       drive->name, drive->media);
 		goto kill_rq;
 	}
 	return do_special(drive);
@@ -1229,7 +1237,7 @@
 			 || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
 			 || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
 			{
-				if( !drive->queue.plugged )
+				if (!blk_queue_plugged(&drive->queue))
 					best = drive;
 			}
 		}
@@ -1258,7 +1266,7 @@
 
 /*
  * Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&io_request_lock, ..);
+ * Caller must have already done spin_lock_irqsave(&DRIVE_LOCK(drive), ...)
  *
  * A hwgroup is a serialized group of IDE interfaces.  Usually there is
  * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -1270,26 +1278,21 @@
  * possibly along with many other devices.  This is especially common in
  * PCI-based systems with off-board IDE controller cards.
  *
- * The IDE driver uses the single global io_request_lock spinlock to protect
- * access to the request queues, and to protect the hwgroup->busy flag.
+ * The IDE driver uses the queue spinlock to protect access to the request
+ * queues.
  *
  * The first thread into the driver for a particular hwgroup sets the
- * hwgroup->busy flag to indicate that this hwgroup is now active,
+ * hwgroup->flags IDE_BUSY flag to indicate that this hwgroup is now active,
  * and then initiates processing of the top request from the request queue.
  *
  * Other threads attempting entry notice the busy setting, and will simply
- * queue their new requests and exit immediately.  Note that hwgroup->busy
- * remains set even when the driver is merely awaiting the next interrupt.
+ * queue their new requests and exit immediately.  Note that hwgroup->flags
+ * remains busy even when the driver is merely awaiting the next interrupt.
  * Thus, the meaning is "this hwgroup is busy processing a request".
  *
  * When processing of a request completes, the completing thread or IRQ-handler
  * will start the next request from the queue.  If no more work remains,
- * the driver will clear the hwgroup->busy flag and exit.
- *
- * The io_request_lock (spinlock) is used to protect all access to the
- * hwgroup->busy flag, but is otherwise not needed for most processing in
- * the driver.  This makes the driver much more friendlier to shared IRQs
- * than previous designs, while remaining 100% (?) SMP safe and capable.
+ * the driver will clear the hwgroup->flags IDE_BUSY flag and exit.
  */
 static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq)
 {
@@ -1301,8 +1304,7 @@
 
 	__cli();	/* necessary paranoia: ensure IRQs are masked on local CPU */
 
-	while (!hwgroup->busy) {
-		hwgroup->busy = 1;
+	while (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) {
 		drive = choose_drive(hwgroup);
 		if (drive == NULL) {
 			unsigned long sleep = 0;
@@ -1325,13 +1327,13 @@
 				if (timer_pending(&hwgroup->timer))
 					printk("ide_set_handler: timer already active\n");
 #endif
-				hwgroup->sleeping = 1;	/* so that ide_timer_expiry knows what to do */
+				set_bit(IDE_SLEEP, &hwgroup->flags);
 				mod_timer(&hwgroup->timer, sleep);
-				/* we purposely leave hwgroup->busy==1 while sleeping */
+				/* we purposely leave hwgroup busy while sleeping */
 			} else {
 				/* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */
 				ide_release_lock(&ide_lock);	/* for atari only */
-				hwgroup->busy = 0;
+				clear_bit(IDE_BUSY, &hwgroup->flags);
 			}
 			return;		/* no more work for this hwgroup (for now) */
 		}
@@ -1345,9 +1347,14 @@
 		drive->sleep = 0;
 		drive->service_start = jiffies;
 
-		if ( drive->queue.plugged )	/* paranoia */
+		if (blk_queue_plugged(&drive->queue)) /* paranoia */
 			printk("%s: Huh? nuking plugged queue\n", drive->name);
-		hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
+
+		/*
+		 * just continuing an interrupted request maybe
+		 */
+		hwgroup->rq = elv_next_request(&drive->queue);
+
 		/*
 		 * Some systems have trouble with IDE IRQs arriving while
 		 * the driver is still setting things up.  So, here we disable
@@ -1358,14 +1365,14 @@
 		 */
 		if (masked_irq && hwif->irq != masked_irq)
 			disable_irq_nosync(hwif->irq);
-		spin_unlock(&io_request_lock);
+		spin_unlock(&DRIVE_LOCK(drive));
 		ide__sti();	/* allow other IRQs while we start this request */
 		startstop = start_request(drive);
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&DRIVE_LOCK(drive));
 		if (masked_irq && hwif->irq != masked_irq)
 			enable_irq(hwif->irq);
 		if (startstop == ide_stopped)
-			hwgroup->busy = 0;
+			clear_bit(IDE_BUSY, &hwgroup->flags);
 	}
 }
 
@@ -1388,6 +1395,50 @@
 }
 
 /*
+ * un-busy the hwgroup etc, and clear any pending DMA status. we want to
+ * retry the current request in pio mode instead of risking tossing it
+ * all away
+ */
+void ide_dma_timeout_retry(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = HWIF(drive);
+	struct request *rq;
+
+	/*
+	 * end current dma transaction
+	 */
+	(void) hwif->dmaproc(ide_dma_end, drive);
+
+	/*
+	 * complain a little, later we might remove some of this verbosity
+	 */
+	printk("%s: timeout waiting for DMA\n", drive->name);
+	(void) hwif->dmaproc(ide_dma_timeout, drive);
+
+	/*
+	 * disable dma for now, but remember that we did so because of
+	 * a timeout -- we'll reenable after we finish this next request
+	 * (or rather the first chunk of it) in pio.
+	 */
+	drive->retry_pio++;
+	drive->state = DMA_PIO_RETRY;
+	(void) hwif->dmaproc(ide_dma_off_quietly, drive);
+
+	/*
+	 * un-busy drive etc (hwgroup is un-busy'ed on return) and
+	 * make sure request is sane
+	 */
+	rq = HWGROUP(drive)->rq;
+	HWGROUP(drive)->rq = NULL;
+
+	rq->errors = 0;
+	rq->sector = rq->bio->bi_sector;
+	rq->current_nr_sectors = bio_sectors(rq->bio);
+	//rq->buffer = rq->bh->b_data;
+}
+
+
+/*
  * ide_timer_expiry() is our timeout function for all drive operations.
  * But note that it can also be invoked as a result of a "sleep" operation
  * triggered by the mod_timer() call in ide_do_request.
@@ -1400,7 +1451,11 @@
  	unsigned long	flags;
 	unsigned long	wait;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	/*
+	 * a global lock protects timers etc -- shouldn't get contention
+	 * worth mentioning
+	 */
+	spin_lock_irqsave(&ide_lock, flags);
 	del_timer(&hwgroup->timer);
 
 	if ((handler = hwgroup->handler) == NULL) {
@@ -1410,10 +1465,8 @@
 		 * or we were "sleeping" to give other devices a chance.
 		 * Either way, we don't really want to complain about anything.
 		 */
-		if (hwgroup->sleeping) {
-			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
-		}
+		if (test_and_clear_bit(IDE_SLEEP, &hwgroup->flags))
+			clear_bit(IDE_BUSY, &hwgroup->flags);
 	} else {
 		ide_drive_t *drive = hwgroup->drive;
 		if (!drive) {
@@ -1422,17 +1475,16 @@
 		} else {
 			ide_hwif_t *hwif;
 			ide_startstop_t startstop;
-			if (!hwgroup->busy) {
-				hwgroup->busy = 1;	/* paranoia */
-				printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name);
-			}
+			/* paranoia */
+			if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags))
+				printk("%s: ide_timer_expiry: hwgroup was not busy??\n", drive->name);
 			if ((expiry = hwgroup->expiry) != NULL) {
 				/* continue */
 				if ((wait = expiry(drive)) != 0) {
 					/* reset timer */
 					hwgroup->timer.expires  = jiffies + wait;
 					add_timer(&hwgroup->timer);
-					spin_unlock_irqrestore(&io_request_lock, flags);
+					spin_unlock_irqrestore(&ide_lock, flags);
 					return;
 				}
 			}
@@ -1442,7 +1494,7 @@
 			 * the handler() function, which means we need to globally
 			 * mask the specific IRQ:
 			 */
-			spin_unlock(&io_request_lock);
+			spin_unlock(&ide_lock);
 			hwif  = HWIF(drive);
 #if DISABLE_IRQ_NOSYNC
 			disable_irq_nosync(hwif->irq);
@@ -1460,22 +1512,23 @@
 				startstop = handler(drive);
 			} else {
 				if (drive->waiting_for_dma) {
-					(void) hwgroup->hwif->dmaproc(ide_dma_end, drive);
-					printk("%s: timeout waiting for DMA\n", drive->name);
-					(void) hwgroup->hwif->dmaproc(ide_dma_timeout, drive);
-				}
-				startstop = ide_error(drive, "irq timeout", GET_STAT());
+					startstop = ide_stopped;
+					ide_dma_timeout_retry(drive);
+				} else
+					startstop = ide_error(drive, "irq timeout", GET_STAT());
 			}
 			set_recovery_timer(hwif);
 			drive->service_time = jiffies - drive->service_start;
 			enable_irq(hwif->irq);
-			spin_lock_irq(&io_request_lock);
+			spin_lock_irq(&ide_lock);
 			if (startstop == ide_stopped)
-				hwgroup->busy = 0;
+				clear_bit(IDE_BUSY, &hwgroup->flags);
 		}
 	}
+	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(hwgroup->drive), flags);
 	ide_do_request(hwgroup, 0);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(hwgroup->drive), flags);
 }
 
 /*
@@ -1538,11 +1591,11 @@
 	ide_handler_t *handler;
 	ide_startstop_t startstop;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&ide_lock, flags);
 	hwif = hwgroup->hwif;
 
 	if (!ide_ack_intr(hwif)) {
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&ide_lock, flags);
 		return;
 	}
 
@@ -1576,7 +1629,7 @@
 			(void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
 #endif /* CONFIG_BLK_DEV_IDEPCI */
 		}
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&ide_lock, flags);
 		return;
 	}
 	drive = hwgroup->drive;
@@ -1584,7 +1637,7 @@
 		/*
 		 * This should NEVER happen, and there isn't much we could do about it here.
 		 */
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&ide_lock, flags);
 		return;
 	}
 	if (!drive_is_ready(drive)) {
@@ -1594,21 +1647,20 @@
 		 * the IRQ before their status register is up to date.  Hopefully we have
 		 * enough advance overhead that the latter isn't a problem.
 		 */
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&ide_lock, flags);
 		return;
 	}
-	if (!hwgroup->busy) {
-		hwgroup->busy = 1;	/* paranoia */
-		printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
-	}
+	/* paranoia */
+	if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags))
+		printk("%s: ide_intr: hwgroup was not busy??\n", drive->name);
 	hwgroup->handler = NULL;
 	del_timer(&hwgroup->timer);
-	spin_unlock(&io_request_lock);
+	spin_unlock(&ide_lock);
 
 	if (drive->unmask)
 		ide__sti();	/* local CPU only */
 	startstop = handler(drive);		/* service this interrupt, may set handler for next interrupt */
-	spin_lock_irq(&io_request_lock);
+	spin_lock_irq(&DRIVE_LOCK(drive));
 
 	/*
 	 * Note that handler() may have set things up for another
@@ -1621,13 +1673,13 @@
 	drive->service_time = jiffies - drive->service_start;
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
-			hwgroup->busy = 0;
+			clear_bit(IDE_BUSY, &hwgroup->flags);
 			ide_do_request(hwgroup, hwif->irq);
 		} else {
 			printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name);
 		}
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 }
 
 /*
@@ -1637,9 +1689,6 @@
 ide_drive_t *get_info_ptr (kdev_t i_rdev)
 {
 	int		major = MAJOR(i_rdev);
-#if 0
-	int		minor = MINOR(i_rdev) & PARTN_MASK;
-#endif
 	unsigned int	h;
 
 	for (h = 0; h < MAX_HWIFS; ++h) {
@@ -1648,11 +1697,7 @@
 			unsigned unit = DEVICE_NR(i_rdev);
 			if (unit < MAX_DRIVES) {
 				ide_drive_t *drive = &hwif->drives[unit];
-#if 0
-				if ((drive->present) && (drive->part[minor].nr_sects))
-#else
 				if (drive->present)
-#endif
 					return drive;
 			}
 			break;
@@ -1712,7 +1757,7 @@
 	rq->rq_dev = MKDEV(major,(drive->select.b.unit)<<PARTN_BITS);
 	if (action == ide_wait)
 		rq->waiting = &wait;
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 	if (list_empty(queue_head) || action == ide_preempt) {
 		if (action == ide_preempt)
 			hwgroup->rq = NULL;
@@ -1722,9 +1767,9 @@
 		} else
 			queue_head = queue_head->next;
 	}
-	list_add(&rq->queue, queue_head);
+	list_add(&rq->queuelist, queue_head);
 	ide_do_request(hwgroup, 0);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 	if (action == ide_wait) {
 		wait_for_completion(&wait);	/* wait for it to be serviced */
 		return rq->errors ? -EIO : 0;	/* return -EIO if errors */
@@ -1733,6 +1778,16 @@
 
 }
 
+/* Common for ide-floppy.c and ide-disk.c */
+void ide_revalidate_drive (ide_drive_t *drive)
+{
+        struct gendisk *g = HWIF(drive)->gd;
+        int minor = (drive->select.b.unit << g->minor_shift);
+        kdev_t dev = MKDEV(g->major, minor);
+
+        grok_partitions(dev, current_capacity(drive));
+}
+
 /*
  * This routine is called to flush all partitions and partition tables
  * for a changed disk, and then re-read the new partition table.
@@ -1745,40 +1800,33 @@
 {
 	ide_drive_t *drive;
 	ide_hwgroup_t *hwgroup;
-	unsigned int p, major, minor;
-	long flags;
+	unsigned long flags;
+	int res;
 
 	if ((drive = get_info_ptr(i_rdev)) == NULL)
 		return -ENODEV;
-	major = MAJOR(i_rdev);
-	minor = drive->select.b.unit << PARTN_BITS;
 	hwgroup = HWGROUP(drive);
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 	if (drive->busy || (drive->usage > 1)) {
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 		return -EBUSY;
-	};
+	}
 	drive->busy = 1;
 	MOD_INC_USE_COUNT;
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 
-	for (p = 0; p < (1<<PARTN_BITS); ++p) {
-		if (drive->part[p].nr_sects > 0) {
-			kdev_t devp = MKDEV(major, minor+p);
-			invalidate_device(devp, 1);
-			set_blocksize(devp, 1024);
-		}
-		drive->part[p].start_sect = 0;
-		drive->part[p].nr_sects   = 0;
-	};
+	res = wipe_partitions(i_rdev);
+	if (res)
+		goto leave;
 
 	if (DRIVER(drive)->revalidate)
 		DRIVER(drive)->revalidate(drive);
 
+ leave:
 	drive->busy = 0;
 	wake_up(&drive->wqueue);
 	MOD_DEC_USE_COUNT;
-	return 0;
+	return res;
 }
 
 static void revalidate_drives (void)
@@ -1943,7 +1991,7 @@
 
 void ide_unregister (unsigned int index)
 {
-	struct gendisk *gd, **gdp;
+	struct gendisk *gd;
 	ide_drive_t *drive, *d;
 	ide_hwif_t *hwif, *g;
 	ide_hwgroup_t *hwgroup;
@@ -2058,18 +2106,13 @@
 	 */
 	unregister_blkdev(hwif->major, hwif->name);
 	kfree(blksize_size[hwif->major]);
-	kfree(max_sectors[hwif->major]);
 	kfree(max_readahead[hwif->major]);
 	blk_dev[hwif->major].data = NULL;
 	blk_dev[hwif->major].queue = NULL;
-	blksize_size[hwif->major] = NULL;
-	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
-		if (*gdp == hwif->gd)
-			break;
-	if (*gdp == NULL)
-		printk("gd not in disk chain!\n");
-	else {
-		gd = *gdp; *gdp = gd->next;
+	blk_clear(hwif->major);
+	gd = hwif->gd;
+	if (gd) {
+		del_gendisk(gd);
 		kfree(gd->sizes);
 		kfree(gd->part);
 		if (gd->de_arr)
@@ -2077,6 +2120,7 @@
 		if (gd->flags)
 			kfree (gd->flags);
 		kfree(gd);
+		hwif->gd = NULL;
 	}
 	old_hwif		= *hwif;
 	init_hwif_data (index);	/* restore hwif data to pristine status */
@@ -2292,7 +2336,7 @@
 	unsigned long	flags;
 
 	if ((setting->rw & SETTING_READ)) {
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&DRIVE_LOCK(drive), flags);
 		switch(setting->data_type) {
 			case TYPE_BYTE:
 				val = *((u8 *) setting->data);
@@ -2305,7 +2349,7 @@
 				val = *((u32 *) setting->data);
 				break;
 		}
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags);
 	}
 	return val;
 }
@@ -2315,11 +2359,11 @@
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 	unsigned long timeout = jiffies + (3 * HZ);
 
-	spin_lock_irq(&io_request_lock);
+	spin_lock_irq(&DRIVE_LOCK(drive));
 
-	while (hwgroup->busy) {
+	while (test_bit(IDE_BUSY, &hwgroup->flags)) {
 		unsigned long lflags;
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&DRIVE_LOCK(drive));
 		__save_flags(lflags);	/* local CPU only */
 		__sti();		/* local CPU only; needed for jiffies */
 		if (0 < (signed long)(jiffies - timeout)) {
@@ -2328,7 +2372,7 @@
 			return -EBUSY;
 		}
 		__restore_flags(lflags);	/* local CPU only */
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&DRIVE_LOCK(drive));
 	}
 	return 0;
 }
@@ -2369,7 +2413,7 @@
 				*p = val;
 			break;
 	}
-	spin_unlock_irq(&io_request_lock);
+	spin_unlock_irq(&DRIVE_LOCK(drive));
 	return 0;
 }
 
@@ -2509,24 +2553,14 @@
 		{
 			struct hd_geometry *loc = (struct hd_geometry *) arg;
 			unsigned short bios_cyl = drive->bios_cyl; /* truncate */
-			if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
-			if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
-			if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
-			if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
-			if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
-				(unsigned long *) &loc->start)) return -EFAULT;
-			return 0;
-		}
-
-		case HDIO_GETGEO_BIG:
-		{
-			struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
-			if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
-			if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
-			if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
-			if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
-			if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
-				(unsigned long *) &loc->start)) return -EFAULT;
+			if (!loc || (drive->media != ide_disk && drive->media != ide_floppy))
+				return -EINVAL;
+			if (put_user(drive->bios_head, &loc->heads) ||
+			    put_user(drive->bios_sect, &loc->sectors) ||
+			    put_user(bios_cyl, &loc->cylinders) ||
+			    put_user(get_start_sect(inode->i_rdev),
+				     &loc->start))
+				return -EFAULT;
 			return 0;
 		}
 
@@ -2542,9 +2576,6 @@
 			return 0;
 		}
 
-	 	case BLKGETSIZE:   /* Return device size */
-			return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (long *) arg);
-
 		case BLKRRPART: /* Re-read partition tables */
 			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
 			return ide_revalidate_disk(inode->i_rdev);
@@ -2662,6 +2693,7 @@
 			}
 			return 0;
 
+		case BLKGETSIZE:
 		case BLKROSET:
 		case BLKROGET:
 		case BLKFLSBUF:
@@ -2669,6 +2701,8 @@
 		case BLKPG:
 		case BLKELVGET:
 		case BLKELVSET:
+		case BLKHASHPROF:
+		case BLKHASHCLEAR:
 			return blk_ioctl(inode->i_rdev, cmd, arg);
 
 		default:
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/osb4.c linux/drivers/ide/osb4.c
--- /opt/kernel/linux-2.4.7/drivers/ide/osb4.c	Wed May  2 01:05:00 2001
+++ linux/drivers/ide/osb4.c	Wed Jan  1 00:07:23 1997
@@ -450,6 +450,7 @@
 		if (!noautodma)
 			hwif->autodma = 1;
 		hwif->dmaproc = &osb4_dmaproc;
+		hwif->highmem = 1;
 	} else {
 		hwif->autodma = 0;
 		hwif->drives[0].autotune = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c
--- /opt/kernel/linux-2.4.7/drivers/ide/pdc202xx.c	Wed May  2 01:05:00 2001
+++ linux/drivers/ide/pdc202xx.c	Wed Jan  1 00:07:23 1997
@@ -855,6 +855,7 @@
 #ifdef CONFIG_BLK_DEV_IDEDMA
 	if (hwif->dma_base) {
 		hwif->dmaproc = &pdc202xx_dmaproc;
+		hwif->highmem = 1;
 		if (!noautodma)
 			hwif->autodma = 1;
 	} else {
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/piix.c linux/drivers/ide/piix.c
--- /opt/kernel/linux-2.4.7/drivers/ide/piix.c	Mon Jul 16 01:22:23 2001
+++ linux/drivers/ide/piix.c	Wed Jan  1 00:07:23 1997
@@ -512,6 +512,7 @@
 	if (!hwif->dma_base)
 		return;
 
+	hwif->highmem = 1;
 #ifndef CONFIG_BLK_DEV_IDEDMA
 	hwif->autodma = 0;
 #else /* CONFIG_BLK_DEV_IDEDMA */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/sis5513.c linux/drivers/ide/sis5513.c
--- /opt/kernel/linux-2.4.7/drivers/ide/sis5513.c	Sun May 20 02:43:06 2001
+++ linux/drivers/ide/sis5513.c	Wed Jan  1 00:07:23 1997
@@ -631,6 +631,7 @@
 			case PCI_DEVICE_ID_SI_5591:
 				if (!noautodma)
 					hwif->autodma = 1;
+				hwif->highmem = 1;
 				hwif->dmaproc = &sis5513_dmaproc;
 				break;
 #endif /* CONFIG_BLK_DEV_IDEDMA */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/slc90e66.c linux/drivers/ide/slc90e66.c
--- /opt/kernel/linux-2.4.7/drivers/ide/slc90e66.c	Mon Jul 16 01:22:23 2001
+++ linux/drivers/ide/slc90e66.c	Wed Jan  1 00:07:23 1997
@@ -373,6 +373,7 @@
 		return;
 
 	hwif->autodma = 0;
+	hwif->highmem = 1;
 #ifdef CONFIG_BLK_DEV_IDEDMA 
 	if (!noautodma)
 		hwif->autodma = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c
--- /opt/kernel/linux-2.4.7/drivers/ide/via82cxxx.c	Sat Feb  3 20:27:43 2001
+++ linux/drivers/ide/via82cxxx.c	Wed Jan  1 00:07:23 1997
@@ -504,6 +504,7 @@
 
 #ifdef CONFIG_BLK_DEV_IDEDMA
 	if (hwif->dma_base) {
+		hwif->highmem = 1;
 		hwif->dmaproc = &via82cxxx_dmaproc;
 #ifdef CONFIG_IDEDMA_AUTO
 		hwif->autodma = 1;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/md/lvm.c linux/drivers/md/lvm.c
--- /opt/kernel/linux-2.4.7/drivers/md/lvm.c	Thu Jul 12 01:35:37 2001
+++ linux/drivers/md/lvm.c	Wed Jan  1 00:07:23 1997
@@ -394,8 +394,6 @@
  */
 int lvm_init(void)
 {
-	struct gendisk *gendisk_ptr = NULL;
-
 	if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
 		printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
 		return -EIO;
@@ -415,27 +413,19 @@
 
 	lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root);
 	if (lvm_proc_dir != NULL) {
-		lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir);
+		lvm_proc_vg_subdir =
+			create_proc_entry (LVM_VG_SUBDIR, S_IFDIR,
+					   lvm_proc_dir);
 		pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir);
-		if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info;
+		if (pde != NULL)
+			pde->read_proc = &lvm_proc_get_global_info;
 	}
 
 	lvm_init_vars();
 	lvm_geninit(&lvm_gendisk);
 
 	/* insert our gendisk at the corresponding major */
-	if (gendisk_head != NULL) {
-		gendisk_ptr = gendisk_head;
-		while (gendisk_ptr->next != NULL &&
-		       gendisk_ptr->major > lvm_gendisk.major) {
-			gendisk_ptr = gendisk_ptr->next;
-		}
-		lvm_gendisk.next = gendisk_ptr->next;
-		gendisk_ptr->next = &lvm_gendisk;
-	} else {
-		gendisk_head = &lvm_gendisk;
-		lvm_gendisk.next = NULL;
-	}
+	add_gendisk(&lvm_gendisk);
 
 #ifdef LVM_HD_NAME
 	/* reference from drivers/block/genhd.c */
@@ -447,7 +437,7 @@
 
 	/* optional read root VGDA */
 /*
-   if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
+	if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
 */
 
 	printk(KERN_INFO
@@ -469,8 +459,6 @@
  */
 static void lvm_cleanup(void)
 {
-	struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
-
 	devfs_unregister (lvm_devfs_handle);
 
 	if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) {
@@ -480,21 +468,8 @@
 		printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
 	}
 
-
-	gendisk_ptr = gendisk_ptr_prev = gendisk_head;
-	while (gendisk_ptr != NULL) {
-		if (gendisk_ptr == &lvm_gendisk)
-			break;
-		gendisk_ptr_prev = gendisk_ptr;
-		gendisk_ptr = gendisk_ptr->next;
-	}
-	/* delete our gendisk from chain */
-	if (gendisk_ptr == &lvm_gendisk)
-		gendisk_ptr_prev->next = gendisk_ptr->next;
-
-	blk_size[MAJOR_NR] = NULL;
-	blksize_size[MAJOR_NR] = NULL;
-	hardsect_size[MAJOR_NR] = NULL;
+	del_gendisk(&lvm_gendisk);
+	blk_clear(MAJOR_NR);
 
 	remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
 	remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
@@ -506,7 +481,6 @@
 #endif
 
 	printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
-
 	return;
 }	/* lvm_cleanup() */
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/md/md.c linux/drivers/md/md.c
--- /opt/kernel/linux-2.4.7/drivers/md/md.c	Mon Jul  2 23:16:24 2001
+++ linux/drivers/md/md.c	Tue Jul 24 15:02:55 2001
@@ -222,18 +222,6 @@
 	return mddev;
 }
 
-struct gendisk * find_gendisk (kdev_t dev)
-{
-	struct gendisk *tmp = gendisk_head;
-
-	while (tmp != NULL) {
-		if (tmp->major == MAJOR(dev))
-			return (tmp);
-		tmp = tmp->next;
-	}
-	return (NULL);
-}
-
 mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 {
 	mdk_rdev_t * rdev;
@@ -281,7 +269,7 @@
 	/*
 	 * ok, add this new device name to the list
 	 */
-	hd = find_gendisk (dev);
+	hd = get_gendisk (dev);
 	dname->name = NULL;
 	if (hd)
 		dname->name = disk_name (hd, MINOR(dev), dname->namebuf);
@@ -582,7 +570,7 @@
 static kdev_t dev_unit(kdev_t dev)
 {
 	unsigned int mask;
-	struct gendisk *hd = find_gendisk(dev);
+	struct gendisk *hd = get_gendisk(dev);
 
 	if (!hd)
 		return 0;
@@ -2663,7 +2651,7 @@
 						(short *) &loc->cylinders);
 			if (err)
 				goto abort_unlock;
-			err = md_put_user (md_hd_struct[minor].start_sect,
+			err = md_put_user (get_start_sect(dev),
 						(long *) &loc->start);
 			goto done_unlock;
 	}
@@ -3537,13 +3525,13 @@
 	
 
 	read_ahead[MAJOR_NR] = INT_MAX;
-	md_gendisk.next = gendisk_head;
 
-	gendisk_head = &md_gendisk;
+	add_gendisk(&md_gendisk);
 
 	md_recovery_thread = md_register_thread(md_do_recovery, NULL, name);
 	if (!md_recovery_thread)
-		printk(KERN_ALERT "md: bug: couldn't allocate md_recovery_thread\n");
+		printk(KERN_ALERT
+		       "md: bug: couldn't allocate md_recovery_thread\n");
 
 	md_register_reboot_notifier(&md_notifier);
 	raid_table_header = register_sysctl_table(raid_root_table, 1);
@@ -3876,23 +3864,12 @@
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("mdstat", NULL);
 #endif
-	
-	gendisk_ptr = &gendisk_head;
-	while (*gendisk_ptr) {
-		if (*gendisk_ptr == &md_gendisk) {
-			*gendisk_ptr = md_gendisk.next;
-			break;
-		}
-		gendisk_ptr = & (*gendisk_ptr)->next;
-	}
+
+	del_gendisk(&md_gendisk);
 	blk_dev[MAJOR_NR].queue = NULL;
-	blksize_size[MAJOR_NR] = NULL;
-	blk_size[MAJOR_NR] = NULL;
-	max_readahead[MAJOR_NR] = NULL;
-	hardsect_size[MAJOR_NR] = NULL;
+	blk_clear(MAJOR_NR);
 	
 	free_device_names();
-
 }
 #endif
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/mtd/ftl.c linux/drivers/mtd/ftl.c
--- /opt/kernel/linux-2.4.7/drivers/mtd/ftl.c	Tue Jun 12 19:30:27 2001
+++ linux/drivers/mtd/ftl.c	Tue Jul 24 15:04:12 2001
@@ -1171,7 +1171,7 @@
 	put_user(1, (char *)&geo->heads);
 	put_user(8, (char *)&geo->sectors);
 	put_user((sect>>3), (short *)&geo->cylinders);
-	put_user(ftl_hd[minor].start_sect, (u_long *)&geo->start);
+	put_user(get_start_sect(inode->i_rdev), (u_long *)&geo->start);
 	break;
     case BLKGETSIZE:
 	ret = verify_area(VERIFY_WRITE, (long *)arg, sizeof(long));
@@ -1211,42 +1211,27 @@
 
 ======================================================================*/
 
-static int ftl_reread_partitions(int minor)
+static int ftl_reread_partitions(kdev_t dev)
 {
+    int minor = MINOR(dev);
     partition_t *part = myparts[minor >> 4];
-    int i, whole;
+    int res;
 
     DEBUG(0, "ftl_cs: ftl_reread_partition(%d)\n", minor);
     if ((atomic_read(&part->open) > 1)) {
 	    return -EBUSY;
     }
-    whole = minor & ~(MAX_PART-1);
 
-    i = MAX_PART - 1;
-    while (i-- > 0) {
-	if (ftl_hd[whole+i].nr_sects > 0) {
-	    kdev_t rdev = MKDEV(FTL_MAJOR, whole+i);
-
-	    invalidate_device(rdev, 1);
-	}
-	ftl_hd[whole+i].start_sect = 0;
-	ftl_hd[whole+i].nr_sects = 0;
-    }
+    res = wipe_partitions(dev);
+    if (res)
+	goto leave;
 
     scan_header(part);
 
     register_disk(&ftl_gendisk, whole >> PART_BITS, MAX_PART,
 		  &ftl_blk_fops, le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE);
 
-#ifdef PCMCIA_DEBUG
-    for (i = 0; i < MAX_PART; i++) {
-	if (ftl_hd[whole+i].nr_sects > 0)
-	    printk(KERN_INFO "  %d: start %ld size %ld\n", i,
-		   ftl_hd[whole+i].start_sect,
-		   ftl_hd[whole+i].nr_sects);
-    }
-#endif
-    return 0;
+    return res;
 }
 
 /*======================================================================
@@ -1428,8 +1413,7 @@
     blksize_size[FTL_MAJOR] = ftl_blocksizes;
     ftl_gendisk.major = FTL_MAJOR;
     blk_init_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR), &do_ftl_request);
-    ftl_gendisk.next = gendisk_head;
-    gendisk_head = &ftl_gendisk;
+    add_gendisk(&ftl_gendisk, FTL_MAJOR);
     
     register_mtd_user(&ftl_notifier);
     
@@ -1438,19 +1422,13 @@
 
 mod_exit_t cleanup_ftl(void)
 {
-    struct gendisk *gd, **gdp;
-
     unregister_mtd_user(&ftl_notifier);
 
     unregister_blkdev(FTL_MAJOR, "ftl");
     blk_cleanup_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR));
-    blksize_size[FTL_MAJOR] = NULL;
+    bklk_clear(FTL_MAJOR);
 
-    for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
-	if (*gdp == &ftl_gendisk) {
-	    gd = *gdp; *gdp = gd->next;
-	    break;
-	}
+    del_gendisk(&ftl_gendisk);
 }
 
 module_init(init_ftl);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/mtd/mtdblock.c linux/drivers/mtd/mtdblock.c
--- /opt/kernel/linux-2.4.7/drivers/mtd/mtdblock.c	Sat Apr 28 20:27:54 2001
+++ linux/drivers/mtd/mtdblock.c	Wed Jan  1 00:07:23 1997
@@ -28,7 +28,7 @@
 #if LINUX_VERSION_CODE < 0x20300
 #define QUEUE_PLUGGED (blk_dev[MAJOR_NR].plug_tq.sync)
 #else
-#define QUEUE_PLUGGED (blk_dev[MAJOR_NR].request_queue.plugged)
+#define QUEUE_PLUGGED (blk_queue_plugged(QUEUE))
 #endif
 
 #ifdef CONFIG_DEVFS_FS
@@ -56,7 +56,7 @@
 
 static spinlock_t mtdblks_lock;
 
-static int mtd_sizes[MAX_MTD_DEVICES];
+static sector_t mtd_sizes[MAX_MTD_DEVICES];
 static int mtd_blksizes[MAX_MTD_DEVICES];
 
 
@@ -392,7 +392,7 @@
 
 /* 
  * This is a special request_fn because it is executed in a process context 
- * to be able to sleep independently of the caller.  The io_request_lock 
+ * to be able to sleep independently of the caller.  The queue_lock 
  * is held upon entry and exit.
  * The head of our request queue is considered active so there is no need 
  * to dequeue requests before we are done.
@@ -406,7 +406,7 @@
 	for (;;) {
 		INIT_REQUEST;
 		req = CURRENT;
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&QUEUE->queue_lock);
 		mtdblk = mtdblks[MINOR(req->rq_dev)];
 		res = 0;
 
@@ -448,7 +448,7 @@
 		}
 
 end_req:
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&QUEUE->queue_lock);
 		end_request(res);
 	}
 }
@@ -485,16 +485,16 @@
 	while (!leaving) {
 		add_wait_queue(&thr_wq, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&QUEUE->queue_lock);
 		if (QUEUE_EMPTY || QUEUE_PLUGGED) {
-			spin_unlock_irq(&io_request_lock);
+			spin_unlock_irq(&QUEUE->queue_lock);
 			schedule();
 			remove_wait_queue(&thr_wq, &wait); 
 		} else {
 			remove_wait_queue(&thr_wq, &wait); 
 			set_current_state(TASK_RUNNING);
 			handle_mtdblock_request();
-			spin_unlock_irq(&io_request_lock);
+			spin_unlock_irq(&QUEUE->queue_lock);
 		}
 	}
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/mtd/nftlcore.c linux/drivers/mtd/nftlcore.c
--- /opt/kernel/linux-2.4.7/drivers/mtd/nftlcore.c	Tue Jun 12 19:30:27 2001
+++ linux/drivers/mtd/nftlcore.c	Tue Jul 24 15:04:44 2001
@@ -53,17 +53,12 @@
  *  encountered, except ...
  */
 
-static int nftl_sizes[256];
+static sector_t nftl_sizes[256];
 static int nftl_blocksizes[256];
 
 /* .. for the Linux partition table handling. */
 struct hd_struct part_table[256];
 
-#if LINUX_VERSION_CODE < 0x20328
-static void dummy_init (struct gendisk *crap)
-{}
-#endif
-
 static struct gendisk nftl_gendisk = {
 	major:		MAJOR_NR,
 	major_name:	"nftl",
@@ -166,7 +161,8 @@
 #if LINUX_VERSION_CODE < 0x20328
 	resetup_one_dev(&nftl_gendisk, firstfree);
 #else
-	grok_partitions(&nftl_gendisk, firstfree, 1<<NFTL_PARTN_BITS, nftl->nr_sects);
+	grok_partitions(MKDEV(MAJOR_NR,firstfree<<NFTL_PARTN_BITS),
+			nftl->nr_sects);
 #endif
 }
 
@@ -774,7 +770,7 @@
 static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
 {
 	struct NFTLrecord *nftl;
-	int p;
+	int res;
 
 	nftl = NFTLs[MINOR(inode->i_rdev) >> NFTL_PARTN_BITS];
 
@@ -787,14 +783,9 @@
 		g.heads = nftl->heads;
 		g.sectors = nftl->sectors;
 		g.cylinders = nftl->cylinders;
-		g.start = part_table[MINOR(inode->i_rdev)].start_sect;
+		g.start = get_start_sect(inode->i_rdev);
 		return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0;
 	}
-	case BLKGETSIZE:   /* Return device size */
-		if (!arg) return -EINVAL;
-		return put_user(part_table[MINOR(inode->i_rdev)].nr_sects,
-                                (long *) arg);
-		
 	case BLKFLSBUF:
 		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
 		fsync_dev(inode->i_rdev);
@@ -811,23 +802,11 @@
 		 * or we won't be able to re-use the partitions,
 		 * if there was a change and we don't want to reboot
 		 */
-		p = (1<<NFTL_PARTN_BITS) - 1;
-		while (p-- > 0) {
-			kdev_t devp = MKDEV(MAJOR(inode->i_dev), MINOR(inode->i_dev)+p);
-			if (part_table[p].nr_sects > 0)
-				invalidate_device (devp, 1);
+		res = wipe_partitions(inode->i_rdev);
+		if (!res)
+			grok_partitions(inode->i_rdev, nftl->nr_sects);
 
-			part_table[MINOR(inode->i_dev)+p].start_sect = 0;
-			part_table[MINOR(inode->i_dev)+p].nr_sects = 0;
-		}
-		
-#if LINUX_VERSION_CODE < 0x20328
-		resetup_one_dev(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS);
-#else
-		grok_partitions(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS,
-				1<<NFTL_PARTN_BITS, nftl->nr_sects);
-#endif
-		return 0;
+		return res;
 
 #if (LINUX_VERSION_CODE < 0x20303)		
 	RO_IOCTLS(inode->i_rdev, arg);  /* ref. linux/blk.h */
@@ -845,7 +824,7 @@
 
 void nftl_request(RQFUNC_ARG)
 {
-	unsigned int dev, block, nsect;
+	unsigned int dev, unit, block, nsect;
 	struct NFTLrecord *nftl;
 	char *buffer;
 	struct request *req;
@@ -857,10 +836,11 @@
 		
 		/* We can do this because the generic code knows not to
 		   touch the request at the head of the queue */
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&QUEUE->queue_lock);
 
 		DEBUG(MTD_DEBUG_LEVEL2, "NFTL_request\n");
-		DEBUG(MTD_DEBUG_LEVEL3, "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n",
+		DEBUG(MTD_DEBUG_LEVEL3,
+		      "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n",
 		      (req->cmd == READ) ? "Read " : "Write",
 		      req->sector, req->current_nr_sectors);
 
@@ -870,8 +850,8 @@
 		buffer = req->buffer;
 		res = 1; /* succeed */
 
-		if (dev >= MAX_NFTLS * (1<<NFTL_PARTN_BITS)) {
-			/* there is no such partition */
+		unit = dev >> NFTL_PARTN_BITS;
+		if (unit >= MAX_NFTLS || dev != (unit << NFTL_PARTN_BITS)) {
 			printk("nftl: bad minor number: device = %s\n",
 			       kdevname(req->rq_dev));
 			res = 0; /* fail */
@@ -892,8 +872,6 @@
 			goto repeat;
 		}
 		
-		block += part_table[dev].start_sect;
-		
 		if (req->cmd == READ) {
 			DEBUG(MTD_DEBUG_LEVEL2, "NFTL read request of 0x%x sectors @ %x "
 			      "(req->nr_sectors == %lx)\n", nsect, block, req->nr_sectors);
@@ -939,7 +917,7 @@
 		}
 	repeat: 
 		DEBUG(MTD_DEBUG_LEVEL3, "end_request(%d)\n", res);
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&QUEUE->queue_lock);
 		end_request(res);
 	}
 }
@@ -1045,22 +1023,19 @@
 #endif
 
 	if (register_blkdev(MAJOR_NR, "nftl", &nftl_fops)){
-		printk("unable to register NFTL block device on major %d\n", MAJOR_NR);
+		printk("unable to register NFTL block device on major %d\n",
+		       MAJOR_NR);
 		return -EBUSY;
 	} else {
-#if LINUX_VERSION_CODE < 0x20320
-		blk_dev[MAJOR_NR].request_fn = nftl_request;
-#else
 		blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), &nftl_request);
-#endif
+
 		/* set block size to 1kB each */
 		for (i = 0; i < 256; i++) {
 			nftl_blocksizes[i] = 1024;
 		}
 		blksize_size[MAJOR_NR] = nftl_blocksizes;
 
-		nftl_gendisk.next = gendisk_head;
-		gendisk_head = &nftl_gendisk;
+		add_gendisk(&nftl_gendisk);
 	}
 	
 	register_mtd_user(&nftl_notifier);
@@ -1070,24 +1045,12 @@
 
 static void __exit cleanup_nftl(void)
 {
-	struct gendisk *gd, **gdp;
-
   	unregister_mtd_user(&nftl_notifier);
   	unregister_blkdev(MAJOR_NR, "nftl");
   	
-#if LINUX_VERSION_CODE < 0x20320
-  	blk_dev[MAJOR_NR].request_fn = 0;
-#else
   	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
-#endif	
 
-	/* remove ourself from generic harddisk list
-	   FIXME: why can't I found this partition on /proc/partition */
-  	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
-    		if (*gdp == &nftl_gendisk) {
-      			gd = *gdp; *gdp = gd->next;
-      			break;
-		}
+	del_gendisk(&nftl_gendisk);
 }
 
 module_init(init_nftl);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c
--- /opt/kernel/linux-2.4.7/drivers/s390/block/dasd.c	Tue May 15 10:29:34 2001
+++ linux/drivers/s390/block/dasd.c	Tue Jul 24 15:04:44 2001
@@ -596,10 +596,9 @@
 
 	INIT_BLK_DEV(major,do_dasd_request,dasd_get_queue,NULL);
 
-	major_info->gendisk.major = major;
-	major_info->gendisk.next = gendisk_head;
 	major_info->gendisk.sizes = blk_size[major];
-	gendisk_head = &major_info->gendisk;
+	major_info->gendisk.major = major;
+	add_gendisk(&major_info->gendisk);
 	return major;
 out_max_sectors:
 	kfree(max_sectors[major]);
@@ -621,7 +620,6 @@
 {
 	int rc = 0;
 	int major;
-	struct gendisk *dd, *prev = NULL;
 	unsigned long flags;
 
 	if (major_info == NULL) {
@@ -629,33 +627,18 @@
 	}
 	major = major_info->gendisk.major;
         INIT_BLK_DEV(major,NULL,NULL,NULL);
-	blk_size[major] = NULL;
-	blksize_size[major] = NULL;
-	hardsect_size[major] = NULL;
-	max_sectors[major] = NULL;
-
-	/* do the gendisk stuff */
-	for (dd = gendisk_head; dd; dd = dd->next) {
-		if (dd == &major_info->gendisk) {
-			if (prev)
-				prev->next = dd->next;
-			else
-				gendisk_head = dd->next;
-			break;
-		}
-		prev = dd;
-	}
-	if (dd == NULL) {
-		return -ENOENT;
-	}
+	del_gendisk(&major_info->gendisk);
+
 	kfree (major_info->gendisk.de_arr);
 	kfree (major_info->gendisk.flags);
+	kfree (major_info->gendisk.part);
 	kfree (major_info->dasd_device);
 	kfree (blk_size[major]);
 	kfree (blksize_size[major]);
 	kfree (hardsect_size[major]);
 	kfree (max_sectors[major]);
-	kfree (major_info->gendisk.part);
+
+	blk_clear(major);
 
 	rc = devfs_unregister_blkdev (major, DASD_NAME);
 	if (rc < 0) {
@@ -1298,14 +1281,10 @@
                      chanq_max_size > 0 ||
                      (req->nr_sectors >= chanq_min_size)) {
                         ccw_req_t *cqr;
-                        /* relocate request according to partition table */
-                        req->sector += device->major_info->gendisk.part[MINOR (req->rq_dev)].start_sect;
                         cqr = device->discipline->build_cp_from_req (device, req);
                         if (cqr == NULL) {
                                 DASD_MESSAGE (KERN_WARNING, device,
                                               "CCW creation failed on request %p\n", req);
-				/* revert relocation of request */
-                                req->sector -= device->major_info->gendisk.part[MINOR (req->rq_dev)].start_sect;
                                 break; /* terminate request queue loop */
                                 
                         } 
@@ -1357,10 +1336,10 @@
 dasd_run_bh (dasd_device_t *device)
 {
 	long flags;
-	spin_lock_irqsave (&io_request_lock, flags);
+	spin_lock_irqsave (&device->request_queue.queue_lock, flags);
 	atomic_set(&device->bh_scheduled,0);
 	dasd_process_queues (device);
-	spin_unlock_irqrestore (&io_request_lock, flags);
+	spin_unlock_irqrestore (&device->request_queue.queue_lock, flags);
 }
 
 /* 
@@ -2093,14 +2072,15 @@
 			break;
 		}
 	case BIODASDRWTB:{
+			long startsect;
 			int offset = 0;
 			int xlt;
 			rc = copy_from_user (&xlt, (void *) data,
 					     sizeof (int)) ? -EFAULT : 0;
 			if (rc)
 				break;
-			offset = major_info->gendisk.part[MINOR (inp->i_rdev)].start_sect >>
-			    device->sizes.s2b_shift;
+			startsect = get_start_sect(inp->i_rdev);
+			offset = startsect >> device->sizes.s2b_shift;
 			xlt += offset;
 			rc = copy_to_user ((void *) data, &xlt,
 					   sizeof (int)) ? -EFAULT : 0;
@@ -2287,15 +2267,14 @@
 
 /* SECTION: Management of device list */
 int
-dasd_fillgeo(int kdev,struct hd_geometry *geo)
+dasd_fillgeo(kdev_t kdev, struct hd_geometry *geo)
 {
 	dasd_device_t *device = dasd_device_from_kdev (kdev);
 	if (!device->discipline->fill_geometry)
 		return -EINVAL;
 
 	device->discipline->fill_geometry (device, geo);
-	geo->start = device->major_info->
-			gendisk.part[MINOR(kdev)].start_sect;
+	geo->start = get_start_sect(kdev);
 
 	/* This is a hack.  dasdfmt and ibm.c expect geo.start 
 	   to contain the block number of the label block when
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/s390/block/xpram.c linux/drivers/s390/block/xpram.c
--- /opt/kernel/linux-2.4.7/drivers/s390/block/xpram.c	Thu Apr 12 04:02:28 2001
+++ linux/drivers/s390/block/xpram.c	Wed Jan  1 00:07:23 1997
@@ -1208,8 +1208,7 @@
 {
 	int i;
 
-				/* first of all, flush it all and reset all the data structures */
-
+	/* first of all, flush it all and reset all the data structures */
 
 	for (i=0; i<xpram_devs; i++)
 		fsync_dev(MKDEV(xpram_major, i)); /* flush the devices */
@@ -1217,13 +1216,10 @@
 #if (XPRAM_VERSION == 22)
 	blk_dev[major].request_fn = NULL;
 #endif /* V22 */
-	read_ahead[major] = 0;
-	blk_size[major] = NULL;
 	kfree(blksize_size[major]);
-	blksize_size[major] = NULL;
 	kfree(hardsect_size[major]);
-	hardsect_size[major] = NULL;
 	kfree(xpram_offsets);
+	blk_clear(major);
 
 				/* finally, the usual cleanup */
 	unregister_blkdev(major, "xpram");
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/s390/char/tapeblock.c linux/drivers/s390/char/tapeblock.c
--- /opt/kernel/linux-2.4.7/drivers/s390/char/tapeblock.c	Thu Apr 12 21:16:35 2001
+++ linux/drivers/s390/char/tapeblock.c	Wed Jan  1 00:07:23 1997
@@ -366,12 +366,14 @@
 static void
 run_tapeblock_exec_IO (tape_info_t* tape) {
     long flags_390irq,flags_ior;
-    spin_lock_irqsave (&io_request_lock, flags_ior);
+    request_queue_t *q = &tape->request_queue;
+
+    spin_lock_irqsave (&q->queue_lock, flags_ior);
     s390irq_spin_lock_irqsave(tape->devinfo.irq,flags_390irq);
     atomic_set(&tape->bh_scheduled,0);
     tapeblock_exec_IO(tape);
     s390irq_spin_unlock_irqrestore(tape->devinfo.irq,flags_390irq);
-    spin_unlock_irqrestore (&io_request_lock, flags_ior);
+    spin_unlock_irqrestore (&q->queue_lock, flags_ior);
 }
 
 void
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/Config.in linux/drivers/scsi/Config.in
--- /opt/kernel/linux-2.4.7/drivers/scsi/Config.in	Thu Jul  5 20:28:16 2001
+++ linux/drivers/scsi/Config.in	Wed Jan  1 00:07:23 1997
@@ -20,10 +20,6 @@
 
 comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs'
 
-#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
-   bool '  Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES
-#fi
-
 bool '  Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN
   
 bool '  Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux.c linux/drivers/scsi/aic7xxx/aic7xxx_linux.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux.c	Sun May 20 21:11:39 2001
+++ linux/drivers/scsi/aic7xxx/aic7xxx_linux.c	Wed Jan  1 00:07:23 1997
@@ -1103,9 +1103,9 @@
 	if (host == NULL)
 		return (ENOMEM);
 
-	ahc_lock(ahc, &s);
 	*((struct ahc_softc **)host->hostdata) = ahc;
 	ahc->platform_data->host = host;
+	ahc_lock(ahc, &s);
 	host->can_queue = AHC_MAX_QUEUE;
 	host->cmd_per_lun = 2;
 	host->sg_tablesize = AHC_NSEG;
@@ -1241,7 +1241,9 @@
 	memset(ahc->platform_data, 0, sizeof(struct ahc_platform_data));
 	TAILQ_INIT(&ahc->platform_data->completeq);
 	LIST_INIT(&ahc->platform_data->device_runq);
-	ahc_lockinit(ahc);
+	/*
+	 * ahc_lockinit done by scsi_register, as we don't own that lock
+	 */
 	ahc_done_lockinit(ahc);
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
 	init_MUTEX_LOCKED(&ahc->platform_data->eh_sem);
@@ -1495,29 +1497,23 @@
 int
 ahc_linux_queue(Scsi_Cmnd * cmd, void (*scsi_done) (Scsi_Cmnd *))
 {
-	struct	 ahc_softc *ahc;
+	struct	 ahc_softc *ahc = *(struct ahc_softc **)cmd->host->hostdata;
 	struct	 ahc_linux_device *dev;
-	u_long	 flags;
-
-	ahc = *(struct ahc_softc **)cmd->host->hostdata;
 
 	/*
 	 * Save the callback on completion function.
 	 */
 	cmd->scsi_done = scsi_done;
 
-	ahc_lock(ahc, &flags);
 	dev = ahc_linux_get_device(ahc, cmd->channel, cmd->target,
 				   cmd->lun, /*alloc*/TRUE);
 	if (dev == NULL) {
-		ahc_unlock(ahc, &flags);
 		printf("aic7xxx_linux_queue: Unable to allocate device!\n");
 		return (-ENOMEM);
 	}
 	cmd->result = CAM_REQ_INPROG << 16;
 	TAILQ_INSERT_TAIL(&dev->busyq, (struct ahc_cmd *)cmd, acmd_links.tqe);
 	ahc_linux_run_device_queue(ahc, dev);
-	ahc_unlock(ahc, &flags);
 	return (0);
 }
 
@@ -2344,12 +2340,10 @@
 	       flag == SCB_ABORT ? "n ABORT" : " TARGET RESET");
 
 	/*
-	 * It is a bug that the upper layer takes
-	 * this lock just prior to calling us.
+	 * we used to drop io_request_lock and lock ahc from here, but
+	 * now that the global lock is gone the upper layer have already
+	 * done what ahc_lock would do /jens
 	 */
-	spin_unlock_irq(&io_request_lock);
-
-	ahc_lock(ahc, &s);
 
 	/*
 	 * First determine if we currently own this command.
@@ -2598,7 +2592,7 @@
 	ahc_unlock(ahc, &s);
 	if (acmd != NULL)
 		ahc_linux_run_complete_queue(ahc, acmd);
-	spin_lock_irq(&io_request_lock);
+	ahc_lock(ahc, &s);
 	return (retval);
 }
 
@@ -2641,14 +2635,7 @@
 	u_long s;
 	int    found;
 
-	/*
-	 * It is a bug that the upper layer takes
-	 * this lock just prior to calling us.
-	 */
-	spin_unlock_irq(&io_request_lock);
-
 	ahc = *(struct ahc_softc **)cmd->host->hostdata;
-	ahc_lock(ahc, &s);
 	found = ahc_reset_channel(ahc, cmd->channel + 'A',
 				  /*initiate reset*/TRUE);
 	acmd = TAILQ_FIRST(&ahc->platform_data->completeq);
@@ -2661,7 +2648,7 @@
 	if (acmd != NULL)
 		ahc_linux_run_complete_queue(ahc, acmd);
 
-	spin_lock_irq(&io_request_lock);
+	ahc_lock(ahc, &s);
 	return SUCCESS;
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h
--- /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux_host.h	Sat May  5 00:16:28 2001
+++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h	Wed Jan  1 00:07:23 1997
@@ -81,7 +81,8 @@
 	present: 0,		/* number of 7xxx's present   */\
 	unchecked_isa_dma: 0,	/* no memory DMA restrictions */\
 	use_clustering: ENABLE_CLUSTERING,			\
-	use_new_eh_code: 1					\
+	use_new_eh_code: 1,					\
+	can_dma_32: 1						\
 }
 
 #endif /* _AIC7XXX_LINUX_HOST_H_ */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_osm.h linux/drivers/scsi/aic7xxx/aic7xxx_osm.h
--- /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_osm.h	Fri Jul 20 06:07:19 2001
+++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h	Tue Jul 24 15:32:52 2001
@@ -516,9 +516,6 @@
 	LIST_HEAD(, ahc_linux_device) device_runq;
 	struct ahc_completeq	 completeq;
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0)
-	spinlock_t		 spin_lock;
-#endif
 	u_int			 qfrozen;
 	struct timer_list	 reset_timer;
 	struct semaphore	 eh_sem;
@@ -672,20 +669,20 @@
 static __inline void
 ahc_lockinit(struct ahc_softc *ahc)
 {
-	spin_lock_init(&ahc->platform_data->spin_lock);
+	spin_lock_init(&ahc->platform_data->host->host_lock);
 }
 
 static __inline void
 ahc_lock(struct ahc_softc *ahc, unsigned long *flags)
 {
 	*flags = 0;
-	spin_lock_irqsave(&ahc->platform_data->spin_lock, *flags);
+	spin_lock_irqsave(&ahc->platform_data->host->host_lock, *flags);
 }
 
 static __inline void
 ahc_unlock(struct ahc_softc *ahc, unsigned long *flags)
 {
-	spin_unlock_irqrestore(&ahc->platform_data->spin_lock, *flags);
+	spin_unlock_irqrestore(&ahc->platform_data->host->host_lock, *flags);
 }
 
 static __inline void
@@ -697,14 +694,18 @@
 static __inline void
 ahc_done_lock(struct ahc_softc *ahc, unsigned long *flags)
 {
+	struct Scsi_Host *host = ahc->platform_data->host;
+
 	*flags = 0;
-	spin_lock_irqsave(&io_request_lock, *flags);
+	spin_lock_irqsave(&host->host_lock, *flags);
 }
 
 static __inline void
 ahc_done_unlock(struct ahc_softc *ahc, unsigned long *flags)
 {
-	spin_unlock_irqrestore(&io_request_lock, *flags);
+	struct Scsi_Host *host = ahc->platform_data->host;
+
+	spin_unlock_irqrestore(&host->host_lock, *flags);
 }
 
 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/hosts.c	Thu Jul  5 20:28:17 2001
+++ linux/drivers/scsi/hosts.c	Wed Jan  1 00:07:23 1997
@@ -160,6 +160,7 @@
 	    break;
 	}
     }
+    spin_lock_init(&retval->host_lock);
     atomic_set(&retval->host_active,0);
     retval->host_busy = 0;
     retval->host_failed = 0;
@@ -235,6 +236,7 @@
     retval->cmd_per_lun = tpnt->cmd_per_lun;
     retval->unchecked_isa_dma = tpnt->unchecked_isa_dma;
     retval->use_clustering = tpnt->use_clustering;   
+    retval->can_dma_32 = tpnt->can_dma_32;
 
     retval->select_queue_depths = tpnt->select_queue_depths;
     retval->max_sectors = tpnt->max_sectors;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h
--- /opt/kernel/linux-2.4.7/drivers/scsi/hosts.h	Fri Jul 20 21:55:46 2001
+++ linux/drivers/scsi/hosts.h	Tue Jul 24 15:33:28 2001
@@ -291,6 +291,8 @@
      */
     unsigned emulated:1;
 
+    unsigned can_dma_32:1;
+
     /*
      * Name of proc directory
      */
@@ -317,6 +319,7 @@
     struct Scsi_Host      * next;
     Scsi_Device           * host_queue;
 
+    spinlock_t		  host_lock;
 
     struct task_struct    * ehandler;  /* Error recovery thread. */
     struct semaphore      * eh_wait;   /* The error recovery thread waits on
@@ -390,6 +393,7 @@
     unsigned in_recovery:1;
     unsigned unchecked_isa_dma:1;
     unsigned use_clustering:1;
+    unsigned can_dma_32:1;
     /*
      * True if this host was loaded as a loadable module
      */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h
--- /opt/kernel/linux-2.4.7/drivers/scsi/qlogicfc.h	Mon Jun 26 21:02:16 2000
+++ linux/drivers/scsi/qlogicfc.h	Wed Jan  1 00:07:23 1997
@@ -100,7 +100,8 @@
 	cmd_per_lun:		QLOGICFC_CMD_PER_LUN, 			   \
         present:                0,                                         \
         unchecked_isa_dma:      0,                                         \
-        use_clustering:         ENABLE_CLUSTERING 			   \
+        use_clustering:         ENABLE_CLUSTERING, 			   \
+	can_dma_32:		1					   \
 }
 
 #endif /* _QLOGICFC_H */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi.c	Fri Jul 20 06:07:04 2001
+++ linux/drivers/scsi/scsi.c	Wed Jan  1 00:07:23 1997
@@ -178,10 +178,13 @@
  *              handler in the list - ultimately they call scsi_request_fn
  *              to do the dirty deed.
  */
-void  scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) {
-	blk_init_queue(&SDpnt->request_queue, scsi_request_fn);
-        blk_queue_headactive(&SDpnt->request_queue, 0);
-        SDpnt->request_queue.queuedata = (void *) SDpnt;
+void  scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt)
+{
+	request_queue_t *q = &SDpnt->request_queue;
+
+	blk_init_queue(q, scsi_request_fn);
+	blk_queue_headactive(q, 0);
+	q->queuedata = (void *) SDpnt;
 }
 
 #ifdef MODULE
@@ -612,8 +615,6 @@
 	unsigned long flags = 0;
 	unsigned long timeout;
 
-	ASSERT_LOCK(&io_request_lock, 0);
-
 #if DEBUG
 	unsigned long *ret = 0;
 #ifdef __mips__
@@ -625,6 +626,8 @@
 
 	host = SCpnt->host;
 
+	ASSERT_LOCK(&host->host_lock, 0);
+
 	/* Assign a unique nonzero serial_number. */
 	if (++serial_number == 0)
 		serial_number = 1;
@@ -678,41 +681,41 @@
 		 * passes a meaningful return value.
 		 */
 		if (host->hostt->use_new_eh_code) {
-                        spin_lock_irqsave(&io_request_lock, flags);
+                        spin_lock_irqsave(&host->host_lock, flags);
 			rtn = host->hostt->queuecommand(SCpnt, scsi_done);
-                        spin_unlock_irqrestore(&io_request_lock, flags);
+                        spin_unlock_irqrestore(&host->host_lock, flags);
 			if (rtn != 0) {
 				scsi_delete_timer(SCpnt);
 				scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY);
                                 SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n"));                                
 			}
 		} else {
-                        spin_lock_irqsave(&io_request_lock, flags);
+                        spin_lock_irqsave(&host->host_lock, flags);
 			host->hostt->queuecommand(SCpnt, scsi_old_done);
-                        spin_unlock_irqrestore(&io_request_lock, flags);
+                        spin_unlock_irqrestore(&host->host_lock, flags);
 		}
 	} else {
 		int temp;
 
 		SCSI_LOG_MLQUEUE(3, printk("command() :  routine at %p\n", host->hostt->command));
-                spin_lock_irqsave(&io_request_lock, flags);
+                spin_lock_irqsave(&host->host_lock, flags);
 		temp = host->hostt->command(SCpnt);
 		SCpnt->result = temp;
 #ifdef DEBUG_DELAY
-                spin_unlock_irqrestore(&io_request_lock, flags);
+                spin_unlock_irqrestore(&host->host_lock, flags);
 		clock = jiffies + 4 * HZ;
 		while (time_before(jiffies, clock))
 			barrier();
 		printk("done(host = %d, result = %04x) : routine at %p\n",
 		       host->host_no, temp, host->hostt->command);
-                spin_lock_irqsave(&io_request_lock, flags);
+                spin_lock_irqsave(&host->host_lock, flags);
 #endif
 		if (host->hostt->use_new_eh_code) {
 			scsi_done(SCpnt);
 		} else {
 			scsi_old_done(SCpnt);
 		}
-                spin_unlock_irqrestore(&io_request_lock, flags);
+                spin_unlock_irqrestore(&host->host_lock, flags);
 	}
 	SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n"));
 	return rtn;
@@ -780,7 +783,7 @@
 	Scsi_Device * SDpnt = SRpnt->sr_device;
 	struct Scsi_Host *host = SDpnt->host;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&host->host_lock, 0);
 
 	SCSI_LOG_MLQUEUE(4,
 			 {
@@ -876,7 +879,7 @@
 {
 	struct Scsi_Host *host = SCpnt->host;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&host->host_lock, 0);
 
 	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
 	SRpnt->sr_command = SCpnt;
@@ -966,7 +969,7 @@
 {
 	struct Scsi_Host *host = SCpnt->host;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&host->host_lock, 0);
 
 	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
 
@@ -1315,11 +1318,11 @@
 	Scsi_Request * SRpnt;
 	unsigned long flags;
 
-	ASSERT_LOCK(&io_request_lock, 0);
-
 	host = SCpnt->host;
 	device = SCpnt->device;
 
+	ASSERT_LOCK(&host->host_lock, 0);
+
         /*
          * We need to protect the decrement, as otherwise a race condition
          * would exist.  Fiddling with SCpnt isn't a problem as the
@@ -1327,10 +1330,10 @@
          * one execution context, but the device and host structures are
          * shared.
          */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&host->host_lock, flags);
 	host->host_busy--;	/* Indicate that we are free */
 	device->device_busy--;	/* Decrement device usage counter. */
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&host->host_lock, flags);
 
         /*
          * Clear the flags which say that the device/host is no longer
@@ -1821,7 +1824,6 @@
 	Scsi_Device *SDpnt;
 	struct Scsi_Device_Template *sdtpnt;
 	const char *name;
-	unsigned long flags;
 	int out_of_space = 0;
 
 	if (tpnt->next || !tpnt->detect)
@@ -1831,7 +1833,7 @@
 
 	/* If max_sectors isn't set, default to max */
 	if (!tpnt->max_sectors)
-		tpnt->max_sectors = MAX_SECTORS;
+		tpnt->max_sectors = 1024;
 
 	pcount = next_scsi_host;
 
@@ -1843,10 +1845,11 @@
 	   using the new scsi code. NOTE: the detect routine could
 	   redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */
 
+	/*
+	 * detect should do its own locking
+	 */
 	if (tpnt->use_new_eh_code) {
-		spin_lock_irqsave(&io_request_lock, flags);
 		tpnt->present = tpnt->detect(tpnt);
-		spin_unlock_irqrestore(&io_request_lock, flags);
 	} else
 		tpnt->present = tpnt->detect(tpnt);
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi.h	Fri Jul 20 21:55:46 2001
+++ linux/drivers/scsi/scsi.h	Tue Jul 24 15:33:28 2001
@@ -389,9 +389,9 @@
 #if defined(__mc68000__) || defined(CONFIG_APUS)
 #include <asm/pgtable.h>
 #define CONTIGUOUS_BUFFERS(X,Y) \
-	(virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data))
+	(virt_to_phys(bio_data(X)+bio_size(X)-1)+1==virt_to_phys(bio_data(Y)))
 #else
-#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data)
+#define CONTIGUOUS_BUFFERS(X,Y) BIO_CONTIG(X, Y)
 #endif
 
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_error.c	Thu Jul  5 20:28:17 2001
+++ linux/drivers/scsi/scsi_error.c	Wed Jan  1 00:07:23 1997
@@ -422,8 +422,6 @@
 	{REQUEST_SENSE, 0, 0, 0, 255, 0};
 	unsigned char scsi_result0[256], *scsi_result = NULL;
 
-	ASSERT_LOCK(&io_request_lock, 0);
-
 	memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
 	       sizeof(generic_sense));
 
@@ -580,16 +578,14 @@
 STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
 {
 	unsigned long flags;
-	struct Scsi_Host *host;
-
-	ASSERT_LOCK(&io_request_lock, 0);
+	struct Scsi_Host *host = SCpnt->host;
 
-	host = SCpnt->host;
+	ASSERT_LOCK(&host->host_lock, 0);
 
-      retry:
+retry:
 	/*
-	 * We will use a queued command if possible, otherwise we will emulate the
-	 * queuing and calling of completion function ourselves.
+	 * We will use a queued command if possible, otherwise we will
+	 * emulate the queuing and calling of completion function ourselves.
 	 */
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
 
@@ -606,9 +602,9 @@
 		SCpnt->host->eh_action = &sem;
 		SCpnt->request.rq_status = RQ_SCSI_BUSY;
 
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&SCpnt->host->host_lock, flags);
 		host->hostt->queuecommand(SCpnt, scsi_eh_done);
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&SCpnt->host->host_lock, flags);
 
 		down(&sem);
 
@@ -631,10 +627,10 @@
 			 * abort a timed out command or not.  Not sure how
 			 * we should treat them differently anyways.
 			 */
-			spin_lock_irqsave(&io_request_lock, flags);
+			spin_lock_irqsave(&SCpnt->host->host_lock, flags);
 			if (SCpnt->host->hostt->eh_abort_handler)
 				SCpnt->host->hostt->eh_abort_handler(SCpnt);
-			spin_unlock_irqrestore(&io_request_lock, flags);
+			spin_unlock_irqrestore(&SCpnt->host->host_lock, flags);
 			
 			SCpnt->request.rq_status = RQ_SCSI_DONE;
 			SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
@@ -647,13 +643,13 @@
 		int temp;
 
 		/*
-		 * We damn well had better never use this code.  There is no timeout
-		 * protection here, since we would end up waiting in the actual low
-		 * level driver, we don't know how to wake it up.
+		 * We damn well had better never use this code.  There is no
+		 * timeout protection here, since we would end up waiting in
+		 * the actual low level driver, we don't know how to wake it up.
 		 */
-		spin_lock_irqsave(&io_request_lock, flags);
+		spin_lock_irqsave(&host->host_lock, flags);
 		temp = host->hostt->command(SCpnt);
-		spin_unlock_irqrestore(&io_request_lock, flags);
+		spin_unlock_irqrestore(&host->host_lock, flags);
 
 		SCpnt->result = temp;
 		/* Fall through to code below to examine status. */
@@ -661,8 +657,8 @@
 	}
 
 	/*
-	 * Now examine the actual status codes to see whether the command actually
-	 * did complete normally.
+	 * Now examine the actual status codes to see whether the command
+	 * actually did complete normally.
 	 */
 	if (SCpnt->eh_state == SUCCESS) {
 		int ret = scsi_eh_completed_normally(SCpnt);
@@ -773,9 +769,9 @@
 
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&SCpnt->host->host_lock, flags);
 	return rtn;
 }
 
@@ -805,9 +801,9 @@
 	}
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&SCpnt->host->host_lock, flags);
 
 	if (rtn == SUCCESS)
 		SCpnt->eh_state = SUCCESS;
@@ -838,9 +834,9 @@
 		return FAILED;
 	}
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&SCpnt->host->host_lock, flags);
 
 	if (rtn == SUCCESS)
 		SCpnt->eh_state = SUCCESS;
@@ -884,9 +880,9 @@
 	if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
 		return FAILED;
 	}
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&SCpnt->host->host_lock, flags);
 
 	if (rtn == SUCCESS)
 		SCpnt->eh_state = SUCCESS;
@@ -1227,7 +1223,7 @@
 	Scsi_Device *SDpnt;
 	unsigned long flags;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&host->host_lock, 0);
 
 	/*
 	 * Next free up anything directly waiting upon the host.  This will be
@@ -1244,19 +1240,22 @@
 	 * now that error recovery is done, we will need to ensure that these
 	 * requests are started.
 	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&host->host_lock, flags);
 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-		request_queue_t *q;
+		request_queue_t *q = &SDpnt->request_queue;
+
 		if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
 		    || (host->host_blocked)
 		    || (host->host_self_blocked)
 		    || (SDpnt->device_blocked)) {
 			break;
 		}
-		q = &SDpnt->request_queue;
+
+		spin_lock(&q->queue_lock);
 		q->request_fn(q);
+		spin_unlock(&q->queue_lock);
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&host->host_lock, flags);
 }
 
 /*
@@ -1303,7 +1302,7 @@
 	Scsi_Cmnd *SCdone;
 	int timed_out;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&host->host_lock, 0);
 
 	SCdone = NULL;
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_lib.c	Fri Jul 20 05:48:04 2001
+++ linux/drivers/scsi/scsi_lib.c	Tue Jul 24 12:53:58 2001
@@ -61,7 +61,7 @@
  * 		data - private data
  *		at_head - insert request at head or tail of queue
  *
- * Lock status:	Assumed that io_request_lock is not held upon entry.
+ * Lock status:	Assumed that queue lock is not held upon entry.
  *
  * Returns:	Nothing
  */
@@ -70,7 +70,7 @@
 {
 	unsigned long flags;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&q->queue_lock, 0);
 
 	rq->cmd = SPECIAL;
 	rq->special = data;
@@ -84,15 +84,15 @@
 	 * head of the queue for things like a QUEUE_FULL message from a
 	 * device, or a host that is unable to accept a particular command.
 	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&q->queue_lock, flags);
 
 	if (at_head)
-		list_add(&rq->queue, &q->queue_head);
+		list_add(&rq->queuelist, &q->queue_head);
 	else
-		list_add_tail(&rq->queue, &q->queue_head);
+		list_add_tail(&rq->queuelist, &q->queue_head);
 
 	q->request_fn(q);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 
 
@@ -167,8 +167,6 @@
  */
 int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
 {
-	ASSERT_LOCK(&io_request_lock, 0);
-
 	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
 	SCpnt->reset_chain = NULL;
 	SCpnt->serial_number = 0;
@@ -250,9 +248,9 @@
 	Scsi_Device *SDpnt;
 	struct Scsi_Host *SHpnt;
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&q->queue_lock, 0);
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&q->queue_lock, flags);
 	if (SCpnt != NULL) {
 
 		/*
@@ -262,7 +260,7 @@
 		 * the bad sector.
 		 */
 		SCpnt->request.special = (void *) SCpnt;
-		list_add(&SCpnt->request.queue, &q->queue_head);
+		list_add(&SCpnt->request.queuelist, &q->queue_head);
 	}
 
 	/*
@@ -280,14 +278,10 @@
 	 * with special case code, then spin off separate versions and
 	 * use function pointers to pick the right one.
 	 */
-	if (SDpnt->single_lun
-	    && list_empty(&q->queue_head)
-	    && SDpnt->device_busy == 0) {
+	if (SDpnt->single_lun && list_empty(&q->queue_head) && SDpnt->device_busy == 0) {
 		request_queue_t *q;
 
-		for (SDpnt = SHpnt->host_queue;
-		     SDpnt;
-		     SDpnt = SDpnt->next) {
+		for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
 			if (((SHpnt->can_queue > 0)
 			     && (SHpnt->host_busy >= SHpnt->can_queue))
 			    || (SHpnt->host_blocked)
@@ -295,6 +289,7 @@
 			    || (SDpnt->device_blocked)) {
 				break;
 			}
+
 			q = &SDpnt->request_queue;
 			q->request_fn(q);
 		}
@@ -328,7 +323,7 @@
 			SHpnt->some_device_starved = 0;
 		}
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 
 /*
@@ -361,56 +356,34 @@
 				     int frequeue)
 {
 	struct request *req;
-	struct buffer_head *bh;
-        Scsi_Device * SDpnt;
-	int nsect;
+	request_queue_t *q = &SCpnt->device->request_queue;
+	unsigned long flags;
+
+	ASSERT_LOCK(&q->queue_lock, 0);
 
-	ASSERT_LOCK(&io_request_lock, 0);
+	spin_lock_irqsave(&q->queue_lock, flags);
 
 	req = &SCpnt->request;
-	req->errors = 0;
-	if (!uptodate) {
-		printk(" I/O error: dev %s, sector %lu\n",
-		       kdevname(req->rq_dev), req->sector);
-	}
 	do {
-		if ((bh = req->bh) != NULL) {
-			nsect = bh->b_size >> 9;
-			blk_finished_io(nsect);
-			req->bh = bh->b_reqnext;
-			bh->b_reqnext = NULL;
-			sectors -= nsect;
-			bh->b_end_io(bh, uptodate);
-			if ((bh = req->bh) != NULL) {
-				req->hard_sector += nsect;
-				req->hard_nr_sectors -= nsect;
-				req->sector += nsect;
-				req->nr_sectors -= nsect;
-
-				req->current_nr_sectors = bh->b_size >> 9;
-				if (req->nr_sectors < req->current_nr_sectors) {
-					req->nr_sectors = req->current_nr_sectors;
-					printk("scsi_end_request: buffer-list destroyed\n");
-				}
-			}
+		if (!req->bio) {
+			printk("scsi_end_request: missing bio\n");
+			break;
 		}
-	} while (sectors && bh);
+		sectors -= bio_sectors(req->bio);
+		if (!__end_that_request_first(req, 1))
+			break;
+	} while (sectors > 0);
+
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 
 	/*
 	 * If there are blocks left over at the end, set up the command
 	 * to queue the remainder of them.
 	 */
-	if (req->bh) {
-                request_queue_t *q;
-
-		if( !requeue )
-		{
+	if (req->bio) {
+		if (!requeue)
 			return SCpnt;
-		}
-
-                q = &SCpnt->device->request_queue;
 
-		req->buffer = bh->b_data;
 		/*
 		 * Bleah.  Leftovers again.  Stick the leftovers in
 		 * the front of the queue, and goose the queue again.
@@ -418,17 +391,15 @@
 		scsi_queue_next_request(q, SCpnt);
 		return SCpnt;
 	}
+
 	/*
 	 * This request is done.  If there is someone blocked waiting for this
-	 * request, wake them up.  Typically used to wake up processes trying
-	 * to swap a page into memory.
+	 * request, wake them up.
 	 */
-	if (req->waiting != NULL) {
+	if (req->waiting)
 		complete(req->waiting);
-	}
-	add_blkdev_randomness(MAJOR(req->rq_dev));
 
-        SDpnt = SCpnt->device;
+	add_blkdev_randomness(MAJOR(req->rq_dev));
 
 	/*
 	 * This will goose the queue request function at the end, so we don't
@@ -436,12 +407,9 @@
 	 */
 	__scsi_release_command(SCpnt);
 
-	if( frequeue ) {
-		request_queue_t *q;
+	if (frequeue)
+		scsi_queue_next_request(q, NULL);
 
-		q = &SDpnt->request_queue;
-		scsi_queue_next_request(q, NULL);                
-	}
 	return NULL;
 }
 
@@ -489,7 +457,9 @@
  */
 static void scsi_release_buffers(Scsi_Cmnd * SCpnt)
 {
-	ASSERT_LOCK(&io_request_lock, 0);
+	struct request *req = &SCpnt->request;
+
+	ASSERT_LOCK(&SCpnt->device->request_queue.queue_lock, 0);
 
 	/*
 	 * Free up any indirection buffers we allocated for DMA purposes. 
@@ -507,9 +477,8 @@
 		}
 		scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
 	} else {
-		if (SCpnt->request_buffer != SCpnt->request.buffer) {
-			scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen);
-		}
+		if (SCpnt->request_buffer != req->buffer)
+			scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen);
 	}
 
 	/*
@@ -545,6 +514,7 @@
 	int result = SCpnt->result;
 	int this_count = SCpnt->bufflen >> 9;
 	request_queue_t *q = &SCpnt->device->request_queue;
+	struct request *req = &SCpnt->request;
 
 	/*
 	 * We must do one of several things here:
@@ -559,7 +529,7 @@
 	 *	would be used if we just wanted to retry, for example.
 	 *
 	 */
-	ASSERT_LOCK(&io_request_lock, 0);
+	ASSERT_LOCK(&q->queue_lock, 0);
 
 	/*
 	 * Free up any indirection buffers we allocated for DMA purposes. 
@@ -574,7 +544,7 @@
 
 		for (i = 0; i < SCpnt->use_sg; i++) {
 			if (sgpnt[i].alt_address) {
-				if (SCpnt->request.cmd == READ) {
+				if (req->cmd == READ) {
 					memcpy(sgpnt[i].alt_address, 
 					       sgpnt[i].address,
 					       sgpnt[i].length);
@@ -584,10 +554,12 @@
 		}
 		scsi_free(SCpnt->buffer, SCpnt->sglist_len);
 	} else {
-		if (SCpnt->buffer != SCpnt->request.buffer) {
-			if (SCpnt->request.cmd == READ) {
-				memcpy(SCpnt->request.buffer, SCpnt->buffer,
-				       SCpnt->bufflen);
+		if (SCpnt->buffer != req->buffer) {
+			if (req->cmd == READ) {
+				char *to = bio_kmap_irq(req->bio);
+
+				memcpy(to, SCpnt->buffer, SCpnt->bufflen);
+				bio_kunmap_irq(to);
 			}
 			scsi_free(SCpnt->buffer, SCpnt->bufflen);
 		}
@@ -608,11 +580,10 @@
 	 */
 	if (good_sectors > 0) {
 		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n",
-					      SCpnt->request.nr_sectors,
-					      good_sectors));
+					      req->nr_sectors good_sectors));
 		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg));
 
-		SCpnt->request.errors = 0;
+		req->errors = 0;
 		/*
 		 * If multiple sectors are requested in one buffer, then
 		 * they will have been finished off by the first command.
@@ -699,7 +670,7 @@
 			break;
 		case NOT_READY:
 			printk(KERN_INFO "Device %s not ready.\n",
-			       kdevname(SCpnt->request.rq_dev));
+			       kdevname(req->rq_dev));
 			SCpnt = scsi_end_request(SCpnt, 0, this_count);
 			return;
 			break;
@@ -734,7 +705,7 @@
 		 * We sometimes get this cruft in the event that a medium error
 		 * isn't properly reported.
 		 */
-		SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors);
+		SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors);
 		return;
 	}
 }
@@ -748,7 +719,7 @@
  * Arguments:   request   - I/O request we are preparing to queue.
  *
  * Lock status: No locks assumed to be held, but as it happens the
- *              io_request_lock is held when this is called.
+ *              q->queue_lock is held when this is called.
  *
  * Returns:     Nothing
  *
@@ -762,7 +733,7 @@
 	kdev_t dev = req->rq_dev;
 	int major = MAJOR(dev);
 
-	ASSERT_LOCK(&io_request_lock, 1);
+	ASSERT_LOCK(&req->q->queue_lock, 1);
 
 	for (spnt = scsi_devicelist; spnt; spnt = spnt->next) {
 		/*
@@ -820,7 +791,7 @@
 	struct Scsi_Host *SHpnt;
 	struct Scsi_Device_Template *STpnt;
 
-	ASSERT_LOCK(&io_request_lock, 1);
+	ASSERT_LOCK(&q->queue_lock, 1);
 
 	SDpnt = (Scsi_Device *) q->queuedata;
 	if (!SDpnt) {
@@ -838,7 +809,7 @@
 		 * released the lock and grabbed it again, so each time
 		 * we need to check to see if the queue is plugged or not.
 		 */
-		if (SHpnt->in_recovery || q->plugged)
+		if (SHpnt->in_recovery || blk_queue_plugged(q))
 			return;
 
 		/*
@@ -887,9 +858,9 @@
 			 */
 			SDpnt->was_reset = 0;
 			if (SDpnt->removable && !in_interrupt()) {
-				spin_unlock_irq(&io_request_lock);
+				spin_unlock_irq(&q->queue_lock);
 				scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0);
-				spin_lock_irq(&io_request_lock);
+				spin_lock_irq(&q->queue_lock);
 				continue;
 			}
 		}
@@ -898,14 +869,14 @@
 		 * If we couldn't find a request that could be queued, then we
 		 * can also quit.
 		 */
-		if (list_empty(&q->queue_head))
+		if (blk_queue_empty(q))
 			break;
 
 		/*
-		 * Loop through all of the requests in this queue, and find
-		 * one that is queueable.
+		 * get next queueable request. cur_rq would be set if we
+		 * previously had to abort for some reason
 		 */
-		req = blkdev_entry_next_request(&q->queue_head);
+		req = elv_next_request(q);
 
 		/*
 		 * Find the actual device driver associated with this command.
@@ -925,9 +896,8 @@
 			if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) {
 				SCpnt = scsi_allocate_device(SRpnt->sr_device, 
 							     FALSE, FALSE);
-				if( !SCpnt ) {
+				if (!SCpnt)
 					break;
-				}
 				scsi_init_cmd_from_req(SCpnt, SRpnt);
 			}
 
@@ -959,9 +929,8 @@
 			 * while the queue is locked and then break out of the
 			 * loop. Otherwise loop around and try another request.
 			 */
-			if (!SCpnt) {
+			if (!SCpnt)
 				break;
-			}
 		}
 
 		/*
@@ -998,7 +967,7 @@
 		 * another.  
 		 */
 		req = NULL;
-		spin_unlock_irq(&io_request_lock);
+		spin_unlock_irq(&q->queue_lock);
 
 		if (SCpnt->request.cmd != SPECIAL) {
 			/*
@@ -1028,7 +997,7 @@
 				{
 					panic("Should not have leftover blocks\n");
 				}
-				spin_lock_irq(&io_request_lock);
+				spin_lock_irq(&q->queue_lock);
 				SHpnt->host_busy--;
 				SDpnt->device_busy--;
 				continue;
@@ -1044,7 +1013,7 @@
 				{
 					panic("Should not have leftover blocks\n");
 				}
-				spin_lock_irq(&io_request_lock);
+				spin_lock_irq(&q->queue_lock);
 				SHpnt->host_busy--;
 				SDpnt->device_busy--;
 				continue;
@@ -1065,7 +1034,7 @@
 		 * Now we need to grab the lock again.  We are about to mess
 		 * with the request queue and try to find another command.
 		 */
-		spin_lock_irq(&io_request_lock);
+		spin_lock_irq(&q->queue_lock);
 	}
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_merge.c	Thu Jul  5 20:28:17 2001
+++ linux/drivers/scsi/scsi_merge.c	Tue Jul 24 14:26:10 2001
@@ -6,6 +6,7 @@
  *                        Based upon conversations with large numbers
  *                        of people at Linux Expo.
  *	Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com).
+ *	Support for highmem I/O: Jens Axboe <axboe@suse.de>
  */
 
 /*
@@ -71,51 +72,6 @@
  */
 #define DMA_SEGMENT_SIZE_LIMITED
 
-#ifdef CONFIG_SCSI_DEBUG_QUEUES
-/*
- * Enable a bunch of additional consistency checking.   Turn this off
- * if you are benchmarking.
- */
-static int dump_stats(struct request *req,
-		      int use_clustering,
-		      int dma_host,
-		      int segments)
-{
-	struct buffer_head *bh;
-
-	/*
-	 * Dump the information that we have.  We know we have an
-	 * inconsistency.
-	 */
-	printk("nr_segments is %x\n", req->nr_segments);
-	printk("counted segments is %x\n", segments);
-	printk("Flags %d %d\n", use_clustering, dma_host);
-	for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) 
-	{
-		printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
-		       bh,
-		       bh->b_size >> 9,
-		       virt_to_phys(bh->b_data - 1));
-	}
-	panic("Ththththaats all folks.  Too dangerous to continue.\n");
-}
-
-
-/*
- * Simple sanity check that we will use for the first go around
- * in order to ensure that we are doing the counting correctly.
- * This can be removed for optimization.
- */
-#define SANITY_CHECK(req, _CLUSTER, _DMA)				\
-    if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) )	\
-    {									\
-	printk("Incorrect segment count at 0x%p", current_text_addr());	\
-	dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \
-    }
-#else
-#define SANITY_CHECK(req, _CLUSTER, _DMA)
-#endif
-
 static void dma_exhausted(Scsi_Cmnd * SCpnt, int i)
 {
 	int jj;
@@ -191,8 +147,7 @@
 {
 	int ret = 1;
 	int reqsize = 0;
-	struct buffer_head *bh;
-	struct buffer_head *bhnext;
+	struct bio *bio, *bionext;
 
 	if( remainder != NULL ) {
 		reqsize = *remainder;
@@ -201,21 +156,21 @@
 	/*
 	 * Add in the size increment for the first buffer.
 	 */
-	bh = req->bh;
+	bio = req->bio;
 #ifdef DMA_SEGMENT_SIZE_LIMITED
-	if( reqsize + bh->b_size > PAGE_SIZE ) {
+	if( reqsize + bio_size(bio) > PAGE_SIZE ) {
 		ret++;
-		reqsize = bh->b_size;
+		reqsize = bio_size(bio);
 	} else {
-		reqsize += bh->b_size;
+		reqsize += bio_size(bio);
 	}
 #else
-	reqsize += bh->b_size;
+	reqsize += bio_size(bio);
 #endif
 
-	for (bh = req->bh, bhnext = bh->b_reqnext; 
-	     bhnext != NULL; 
-	     bh = bhnext, bhnext = bh->b_reqnext) {
+	for (bio = req->bio, bionext = bio->bi_next; 
+	     bionext != NULL; 
+	     bio = bionext, bionext = bio->bi_next) {
 		if (use_clustering) {
 			/* 
 			 * See if we can do this without creating another
@@ -223,11 +178,10 @@
 			 * DMA capable host, make sure that a segment doesn't span
 			 * the DMA threshold boundary.  
 			 */
-			if (dma_host &&
-			    virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) {
+			if (dma_host && bio_to_bus(bionext) - 1 == ISA_DMA_THRESHOLD) {
 				ret++;
-				reqsize = bhnext->b_size;
-			} else if (CONTIGUOUS_BUFFERS(bh, bhnext)) {
+				reqsize = bio_size(bionext);
+			} else if (CONTIGUOUS_BUFFERS(bio, bionext)) {
 				/*
 				 * This one is OK.  Let it go.
 				 */ 
@@ -241,23 +195,22 @@
 				 * kind of screwed and we need to start
 				 * another segment.
 				 */
-				if( dma_host
-				    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD
-				    && reqsize + bhnext->b_size > PAGE_SIZE )
+				if(dma_host && bio_to_bus(bionext) - 1 >= ISA_DMA_THRESHOLD
+				    && reqsize + bio_size(bionext) > PAGE_SIZE )
 				{
 					ret++;
-					reqsize = bhnext->b_size;
+					reqsize = bio_size(bionext);
 					continue;
 				}
 #endif
-				reqsize += bhnext->b_size;
+				reqsize += bio_size(bionext);
 				continue;
 			}
 			ret++;
-			reqsize = bhnext->b_size;
+			reqsize = bio_size(bionext);
 		} else {
 			ret++;
-			reqsize = bhnext->b_size;
+			reqsize = bio_size(bionext);
 		}
 	}
 	if( remainder != NULL ) {
@@ -304,14 +257,13 @@
 }
 
 #define MERGEABLE_BUFFERS(X,Y) \
-(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \
+(((((long)bio_to_bus((X))+bio_size((X)))|((long)bio_to_bus((Y)))) & \
   (DMA_CHUNK_SIZE - 1)) == 0)
 
 #ifdef DMA_CHUNK_SIZE
 static inline int scsi_new_mergeable(request_queue_t * q,
 				     struct request * req,
-				     struct Scsi_Host *SHpnt,
-				     int max_segments)
+				     struct Scsi_Host *SHpnt)
 {
 	/*
 	 * pci_map_sg will be able to merge these two
@@ -320,7 +272,7 @@
 	 * scsi.c allocates for this purpose
 	 * min(64,sg_tablesize) entries.
 	 */
-	if (req->nr_segments >= max_segments ||
+	if (req->nr_segments >= q->max_segments ||
 	    req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
 	req->nr_segments++;
@@ -329,8 +281,7 @@
 
 static inline int scsi_new_segment(request_queue_t * q,
 				   struct request * req,
-				   struct Scsi_Host *SHpnt,
-				   int max_segments)
+				   struct Scsi_Host *SHpnt)
 {
 	/*
 	 * pci_map_sg won't be able to map these two
@@ -347,11 +298,10 @@
 #else
 static inline int scsi_new_segment(request_queue_t * q,
 				   struct request * req,
-				   struct Scsi_Host *SHpnt,
-				   int max_segments)
+				   struct Scsi_Host *SHpnt)
 {
 	if (req->nr_segments < SHpnt->sg_tablesize &&
-	    req->nr_segments < max_segments) {
+	    req->nr_segments < q->max_segments) {
 		/*
 		 * This will form the start of a new segment.  Bump the 
 		 * counter.
@@ -371,7 +321,7 @@
  *
  * Arguments:   q       - Queue for which we are merging request.
  *              req     - request into which we wish to merge.
- *              bh      - Block which we may wish to merge into request
+ *              bio     - Block which we may wish to merge into request
  *              use_clustering - 1 if this host wishes to use clustering
  *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
  *                      expose all of the address lines, so that DMA cannot
@@ -380,7 +330,7 @@
  * Returns:     1 if it is OK to merge the block into the request.  0
  *              if it is not OK.
  *
- * Lock status: io_request_lock is assumed to be held here.
+ * Lock status: queue lock is assumed to be held here.
  *
  * Notes:       Some drivers have limited scatter-gather table sizes, and
  *              thus they cannot queue an infinitely large command.  This
@@ -399,8 +349,7 @@
  */
 __inline static int __scsi_back_merge_fn(request_queue_t * q,
 					 struct request *req,
-					 struct buffer_head *bh,
-					 int max_segments,
+					 struct bio *bio,
 					 int use_clustering,
 					 int dma_host)
 {
@@ -412,12 +361,14 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+	/*
+	 * FIXME: remember to look into this /jens
+	 */
 #ifdef DMA_CHUNK_SIZE
-	if (max_segments > 64)
-		max_segments = 64;
+	q->max_segments = 64;
 #endif
 
-	if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+	if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors)
 		return 0;
 
 	if (use_clustering) {
@@ -427,17 +378,15 @@
 		 * DMA capable host, make sure that a segment doesn't span
 		 * the DMA threshold boundary.  
 		 */
-		if (dma_host &&
-		    virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+		if (dma_host && bio_to_bus(req->biotail) - 1 == ISA_DMA_THRESHOLD) {
 			goto new_end_segment;
 		}
-		if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) {
+		if (CONTIGUOUS_BUFFERS(req->biotail, bio)) {
 #ifdef DMA_SEGMENT_SIZE_LIMITED
-			if( dma_host
-			    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+			if( dma_host && bio_to_bus(bio) - 1 >= ISA_DMA_THRESHOLD ) {
 				segment_size = 0;
 				count = __count_segments(req, use_clustering, dma_host, &segment_size);
-				if( segment_size + bh->b_size > PAGE_SIZE ) {
+				if( segment_size + bio_size(bio) > PAGE_SIZE ) {
 					goto new_end_segment;
 				}
 			}
@@ -450,16 +399,15 @@
 	}
  new_end_segment:
 #ifdef DMA_CHUNK_SIZE
-	if (MERGEABLE_BUFFERS(req->bhtail, bh))
-		return scsi_new_mergeable(q, req, SHpnt, max_segments);
+	if (MERGEABLE_BUFFERS(req->biotail, bio))
+		return scsi_new_mergeable(q, req, SHpnt);
 #endif
-	return scsi_new_segment(q, req, SHpnt, max_segments);
+	return scsi_new_segment(q, req, SHpnt);
 }
 
 __inline static int __scsi_front_merge_fn(request_queue_t * q,
 					  struct request *req,
-					  struct buffer_head *bh,
-					  int max_segments,
+					  struct bio *bio,
 					  int use_clustering,
 					  int dma_host)
 {
@@ -472,11 +420,10 @@
 	SHpnt = SDpnt->host;
 
 #ifdef DMA_CHUNK_SIZE
-	if (max_segments > 64)
-		max_segments = 64;
+	q->max_segments = 64;
 #endif
 
-	if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+	if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors)
 		return 0;
 
 	if (use_clustering) {
@@ -486,15 +433,13 @@
 		 * DMA capable host, make sure that a segment doesn't span
 		 * the DMA threshold boundary. 
 		 */
-		if (dma_host &&
-		    virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) {
+		if (dma_host && bio_to_bus(bio) - 1 == ISA_DMA_THRESHOLD) {
 			goto new_start_segment;
 		}
-		if (CONTIGUOUS_BUFFERS(bh, req->bh)) {
+		if (CONTIGUOUS_BUFFERS(bio, req->bio)) {
 #ifdef DMA_SEGMENT_SIZE_LIMITED
-			if( dma_host
-			    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
-				segment_size = bh->b_size;
+			if( dma_host && bio_to_bus(bio) - 1 >= ISA_DMA_THRESHOLD ) {
+				segment_size = bio_size(bio);
 				count = __count_segments(req, use_clustering, dma_host, &segment_size);
 				if( count != req->nr_segments ) {
 					goto new_start_segment;
@@ -509,10 +454,10 @@
 	}
  new_start_segment:
 #ifdef DMA_CHUNK_SIZE
-	if (MERGEABLE_BUFFERS(bh, req->bh))
-		return scsi_new_mergeable(q, req, SHpnt, max_segments);
+	if (MERGEABLE_BUFFERS(bio, req->bio))
+		return scsi_new_mergeable(q, req, SHpnt);
 #endif
-	return scsi_new_segment(q, req, SHpnt, max_segments);
+	return scsi_new_segment(q, req, SHpnt);
 }
 
 /*
@@ -522,12 +467,12 @@
  *
  * Arguments:   q       - Queue for which we are merging request.
  *              req     - request into which we wish to merge.
- *              bh      - Block which we may wish to merge into request
+ *              bio     - Block which we may wish to merge into request
  *
  * Returns:     1 if it is OK to merge the block into the request.  0
  *              if it is not OK.
  *
- * Lock status: io_request_lock is assumed to be held here.
+ * Lock status: queue lock is assumed to be held here.
  *
  * Notes:       Optimized for different cases depending upon whether
  *              ISA DMA is in use and whether clustering should be used.
@@ -535,15 +480,12 @@
 #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA)		\
 static int _FUNCTION(request_queue_t * q,				\
 		     struct request * req,				\
-		     struct buffer_head * bh,				\
-		     int max_segments)					\
+		     struct bio *bio)					\
 {									\
     int ret;								\
-    SANITY_CHECK(req, _CLUSTER, _DMA);					\
     ret =  __scsi_ ## _BACK_FRONT ## _merge_fn(q,			\
 					       req,			\
-					       bh,			\
-					       max_segments,		\
+					       bio,			\
 					       _CLUSTER,		\
 					       _DMA);			\
     return ret;								\
@@ -576,7 +518,7 @@
  * Returns:     1 if it is OK to merge the two requests.  0
  *              if it is not OK.
  *
- * Lock status: io_request_lock is assumed to be held here.
+ * Lock status: queue lock is assumed to be held here.
  *
  * Notes:       Some drivers have limited scatter-gather table sizes, and
  *              thus they cannot queue an infinitely large command.  This
@@ -596,7 +538,6 @@
 __inline static int __scsi_merge_requests_fn(request_queue_t * q,
 					     struct request *req,
 					     struct request *next,
-					     int max_segments,
 					     int use_clustering,
 					     int dma_host)
 {
@@ -614,13 +555,12 @@
 	SHpnt = SDpnt->host;
 
 #ifdef DMA_CHUNK_SIZE
-	if (max_segments > 64)
-		max_segments = 64;
+	q->max_segments = 64;
 
 	/* If it would not fit into prepared memory space for sg chain,
 	 * then don't allow the merge.
 	 */
-	if (req->nr_segments + next->nr_segments - 1 > max_segments ||
+	if (req->nr_segments + next->nr_segments - 1 > q->max_segments ||
 	    req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
 		return 0;
 	}
@@ -652,8 +592,7 @@
 		 * DMA capable host, make sure that a segment doesn't span
 		 * the DMA threshold boundary.  
 		 */
-		if (dma_host &&
-		    virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+		if (dma_host && bio_to_bus(req->biotail) - 1 == ISA_DMA_THRESHOLD) {
 			goto dont_combine;
 		}
 #ifdef DMA_SEGMENT_SIZE_LIMITED
@@ -662,8 +601,8 @@
 		 * buffers in chunks of PAGE_SIZE or less.
 		 */
 		if (dma_host
-		    && CONTIGUOUS_BUFFERS(req->bhtail, next->bh)
-		    && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD )
+		    && CONTIGUOUS_BUFFERS(req->biotail, next->bio)
+		    && bio_to_bus(req->biotail) - 1 >= ISA_DMA_THRESHOLD )
 		{
 			int segment_size = 0;
 			int count = 0;
@@ -675,7 +614,7 @@
 			}
 		}
 #endif
-		if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) {
+		if (CONTIGUOUS_BUFFERS(req->biotail, next->bio)) {
 			/*
 			 * This one is OK.  Let it go.
 			 */
@@ -688,7 +627,7 @@
 	}
       dont_combine:
 #ifdef DMA_CHUNK_SIZE
-	if (req->nr_segments + next->nr_segments > max_segments ||
+	if (req->nr_segments + next->nr_segments > q->max_segments ||
 	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 		return 0;
 	}
@@ -696,7 +635,7 @@
 	 * first segment in next, then the check for hw segments was
 	 * done above already, so we can always merge.
 	 */
-	if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) {
+	if (MERGEABLE_BUFFERS (req->biotail, next->bio)) {
 		req->nr_hw_segments += next->nr_hw_segments - 1;
 	} else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) {
 		return 0;
@@ -711,7 +650,7 @@
 	 * Make sure we can fix something that is the sum of the two.
 	 * A slightly stricter test than we had above.
 	 */
-	if (req->nr_segments + next->nr_segments > max_segments ||
+	if (req->nr_segments + next->nr_segments > q->max_segments ||
 	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 		return 0;
 	} else {
@@ -732,12 +671,12 @@
  *
  * Arguments:   q       - Queue for which we are merging request.
  *              req     - request into which we wish to merge.
- *              bh      - Block which we may wish to merge into request
+ *              bio     - Block which we may wish to merge into request
  *
  * Returns:     1 if it is OK to merge the block into the request.  0
  *              if it is not OK.
  *
- * Lock status: io_request_lock is assumed to be held here.
+ * Lock status: queue lock is assumed to be held here.
  *
  * Notes:       Optimized for different cases depending upon whether
  *              ISA DMA is in use and whether clustering should be used.
@@ -745,12 +684,10 @@
 #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA)		\
 static int _FUNCTION(request_queue_t * q,		\
 		     struct request * req,		\
-		     struct request * next,		\
-		     int max_segments)			\
+		     struct request * next)		\
 {							\
     int ret;						\
-    SANITY_CHECK(req, _CLUSTER, _DMA);			\
-    ret =  __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
+    ret =  __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \
     return ret;						\
 }
 
@@ -798,8 +735,8 @@
 			      int use_clustering,
 			      int dma_host)
 {
-	struct buffer_head * bh;
-	struct buffer_head * bhprev;
+	struct bio	   * bio;
+	struct bio	   * bioprev;
 	char		   * buff;
 	int		     count;
 	int		     i;
@@ -808,30 +745,8 @@
 	struct scatterlist * sgpnt;
 	int		     this_count;
 
-	/*
-	 * FIXME(eric) - don't inline this - it doesn't depend on the
-	 * integer flags.   Come to think of it, I don't think this is even
-	 * needed any more.  Need to play with it and see if we hit the
-	 * panic.  If not, then don't bother.
-	 */
-	if (!SCpnt->request.bh) {
-		/* 
-		 * Case of page request (i.e. raw device), or unlinked buffer 
-		 * Typically used for swapping, but this isn't how we do
-		 * swapping any more.
-		 */
-		panic("I believe this is dead code.  If we hit this, I was wrong");
-#if 0
-		SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9;
-		SCpnt->request_buffer = SCpnt->request.buffer;
-		SCpnt->use_sg = 0;
-		/*
-		 * FIXME(eric) - need to handle DMA here.
-		 */
-#endif
-		return 1;
-	}
 	req = &SCpnt->request;
+
 	/*
 	 * First we need to know how many scatter gather segments are needed.
 	 */
@@ -847,24 +762,22 @@
 	 * buffer.
 	 */
 	if (dma_host && scsi_dma_free_sectors <= 10) {
-		this_count = SCpnt->request.current_nr_sectors;
+		this_count = req->current_nr_sectors;
 		goto single_segment;
 	}
+
 	/*
-	 * Don't bother with scatter-gather if there is only one segment.
+	 * we used to not use scatter-gather for single segment request,
+	 * but now we do (it makes highmem I/O easier to support without
+	 * kmapping pages)
 	 */
-	if (count == 1) {
-		this_count = SCpnt->request.nr_sectors;
-		goto single_segment;
-	}
 	SCpnt->use_sg = count;
 
 	/* 
 	 * Allocate the actual scatter-gather table itself.
 	 * scsi_malloc can only allocate in chunks of 512 bytes 
 	 */
-	SCpnt->sglist_len = (SCpnt->use_sg
-			     * sizeof(struct scatterlist) + 511) & ~511;
+	SCpnt->sglist_len = (count * sizeof(struct scatterlist) + 511) & ~511;
 
 	sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len);
 
@@ -877,7 +790,7 @@
 		 * simply write the first buffer all by itself.
 		 */
 		printk("Warning - running *really* short on DMA buffers\n");
-		this_count = SCpnt->request.current_nr_sectors;
+		this_count = req->current_nr_sectors;
 		goto single_segment;
 	}
 	/* 
@@ -887,15 +800,14 @@
 	memset(sgpnt, 0, SCpnt->sglist_len);
 	SCpnt->request_buffer = (char *) sgpnt;
 	SCpnt->request_bufflen = 0;
-	bhprev = NULL;
+	req->buffer = NULL;
+	bioprev = NULL;
 
-	for (count = 0, bh = SCpnt->request.bh;
-	     bh; bh = bh->b_reqnext) {
-		if (use_clustering && bhprev != NULL) {
-			if (dma_host &&
-			    virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
+	for (count = 0, bio = req->bio; bio; bio = bio->bi_next) {
+		if (use_clustering && bioprev != NULL) {
+			if (dma_host && bio_to_bus(bioprev) - 1 == ISA_DMA_THRESHOLD) {
 				/* Nothing - fall through */
-			} else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
+			} else if (CONTIGUOUS_BUFFERS(bioprev, bio)) {
 				/*
 				 * This one is OK.  Let it go.  Note that we
 				 * do not have the ability to allocate
@@ -904,32 +816,32 @@
 				 */
 				if( dma_host ) {
 #ifdef DMA_SEGMENT_SIZE_LIMITED
-					if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD
-					    || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) {
-						sgpnt[count - 1].length += bh->b_size;
-						bhprev = bh;
+					if( bio_to_bus(bio) - 1 < ISA_DMA_THRESHOLD
+					    || sgpnt[count - 1].length + bio_size(bio) <= PAGE_SIZE ) {
+						sgpnt[count - 1].length += bio_size(bio);
+						bioprev = bio;
 						continue;
 					}
 #else
-					sgpnt[count - 1].length += bh->b_size;
-					bhprev = bh;
+					sgpnt[count - 1].length += bio_size(bio);
+					bioprev = bio;
 					continue;
 #endif
 				} else {
-					sgpnt[count - 1].length += bh->b_size;
-					SCpnt->request_bufflen += bh->b_size;
-					bhprev = bh;
+					sgpnt[count - 1].length += bio_size(bio);
+					SCpnt->request_bufflen += bio_size(bio);
+					bioprev = bio;
 					continue;
 				}
 			}
 		}
+
+		set_bio_sg(&sgpnt[count], bio);
+		if (!dma_host)
+			SCpnt->request_bufflen += bio_size(bio);
+
 		count++;
-		sgpnt[count - 1].address = bh->b_data;
-		sgpnt[count - 1].length += bh->b_size;
-		if (!dma_host) {
-			SCpnt->request_bufflen += bh->b_size;
-		}
-		bhprev = bh;
+		bioprev = bio;
 	}
 
 	/*
@@ -937,13 +849,14 @@
 	 */
 	if (count != SCpnt->use_sg) {
 		printk("Incorrect number of segments after building list\n");
-#ifdef CONFIG_SCSI_DEBUG_QUEUES
-		dump_stats(req, use_clustering, dma_host, count);
-#endif
+		scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+		this_count = req->current_nr_sectors;
+		goto single_segment;
 	}
-	if (!dma_host) {
+
+	if (!dma_host)
 		return 1;
-	}
+
 	/*
 	 * Now allocate bounce buffers, if needed.
 	 */
@@ -952,7 +865,7 @@
 		sectors = (sgpnt[i].length >> 9);
 		SCpnt->request_bufflen += sgpnt[i].length;
 		if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 >
-		    ISA_DMA_THRESHOLD) {
+                    ISA_DMA_THRESHOLD) {
 			if( scsi_dma_free_sectors - sectors <= 10  ) {
 				/*
 				 * If this would nearly drain the DMA
@@ -970,7 +883,12 @@
 				break;
 			}
 
-			sgpnt[i].alt_address = sgpnt[i].address;
+			/*
+			 * this is not a dma host, so it will never
+			 * be a highmem page
+			 */
+			sgpnt[i].alt_address = page_address(sgpnt[i].page)
+						+ sgpnt[i].offset;
 			sgpnt[i].address =
 			    (char *) scsi_malloc(sgpnt[i].length);
 			/*
@@ -986,7 +904,7 @@
 				}
 				break;
 			}
-			if (SCpnt->request.cmd == WRITE) {
+			if (req->cmd == WRITE) {
 				memcpy(sgpnt[i].address, sgpnt[i].alt_address,
 				       sgpnt[i].length);
 			}
@@ -1031,21 +949,20 @@
 	 * single-block requests if we had hundreds of free sectors.
 	 */
 	if( scsi_dma_free_sectors > 30 ) {
-		for (this_count = 0, bh = SCpnt->request.bh;
-		     bh; bh = bh->b_reqnext) {
+		for (this_count = 0, bio = req->bio; bio; bio = bio->bi_next) {
 			if( scsi_dma_free_sectors - this_count < 30 
 			    || this_count == sectors )
 			{
 				break;
 			}
-			this_count += bh->b_size >> 9;
+			this_count += bio_sectors(bio);
 		}
 
 	} else {
 		/*
 		 * Yow!   Take the absolute minimum here.
 		 */
-		this_count = SCpnt->request.current_nr_sectors;
+		this_count = req->current_nr_sectors;
 	}
 
 	/*
@@ -1058,28 +975,32 @@
 	 * segment.  Possibly the entire request, or possibly a small
 	 * chunk of the entire request.
 	 */
-	bh = SCpnt->request.bh;
-	buff = SCpnt->request.buffer;
 
-	if (dma_host) {
+	bio = req->bio;
+	buff = req->buffer = bio_data(bio);
+
+	if (dma_host || PageHighMem(bio_page(bio))) {
 		/*
 		 * Allocate a DMA bounce buffer.  If the allocation fails, fall
 		 * back and allocate a really small one - enough to satisfy
 		 * the first buffer.
 		 */
-		if (virt_to_phys(SCpnt->request.bh->b_data)
-		    + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
+		if (bio_to_bus(bio) + bio_size(bio) - 1 > ISA_DMA_THRESHOLD) {
 			buff = (char *) scsi_malloc(this_count << 9);
 			if (!buff) {
 				printk("Warning - running low on DMA memory\n");
-				this_count = SCpnt->request.current_nr_sectors;
+				this_count = req->current_nr_sectors;
 				buff = (char *) scsi_malloc(this_count << 9);
 				if (!buff) {
 					dma_exhausted(SCpnt, 0);
+					return 0;
 				}
 			}
-			if (SCpnt->request.cmd == WRITE)
-				memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9);
+			if (req->cmd == WRITE) {
+				char *buf = bio_kmap_irq(bio);
+				memcpy(buff, buf, this_count << 9);
+				bio_kunmap_irq(buf);
+			}
 		}
 	}
 	SCpnt->request_bufflen = this_count << 9;
@@ -1166,4 +1087,18 @@
 		q->merge_requests_fn = scsi_merge_requests_fn_dc;
 		SDpnt->scsi_init_io_fn = scsi_init_io_vdc;
 	}
+
+	/*
+	 * now enable highmem I/O, if appropriate
+	 */
+#ifdef CONFIG_HIGHMEM
+	if (SHpnt->can_dma_32 && (SDpnt->type == TYPE_DISK)) {
+		blk_queue_bounce_limit(q, BLK_BOUNCE_4G);
+		printk("SCSI: channel %d, id %d: enabling highmem I/O\n",
+			SDpnt->channel, SDpnt->id);
+	} else
+		blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+#endif
+
+	blk_queue_max_sectors(q, 1024);
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_obsolete.c linux/drivers/scsi/scsi_obsolete.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_obsolete.c	Thu Jul  5 20:28:17 2001
+++ linux/drivers/scsi/scsi_obsolete.c	Wed Jan  1 00:07:23 1997
@@ -145,9 +145,10 @@
 
 void scsi_old_times_out(Scsi_Cmnd * SCpnt)
 {
+	struct Scsi_Host *host = SCpnt->host;
 	unsigned long flags;
 
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&host->host_lock, flags);
 
 	/* Set the serial_number_at_timeout to the current serial_number */
 	SCpnt->serial_number_at_timeout = SCpnt->serial_number;
@@ -164,7 +165,7 @@
 			break;
 	case IN_ABORT:
 		printk("SCSI host %d abort (pid %ld) timed out - resetting\n",
-		       SCpnt->host->host_no, SCpnt->pid);
+		       host->host_no, SCpnt->pid);
 		if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS))
 			break;
 	case IN_RESET:
@@ -175,7 +176,7 @@
 		 */
 		printk("SCSI host %d channel %d reset (pid %ld) timed out - "
 		       "trying harder\n",
-		       SCpnt->host->host_no, SCpnt->channel, SCpnt->pid);
+		       host->host_no, SCpnt->channel, SCpnt->pid);
 		SCpnt->internal_timeout &= ~IN_RESET;
 		SCpnt->internal_timeout |= IN_RESET2;
 		scsi_reset(SCpnt,
@@ -188,7 +189,7 @@
 		 * Maybe the HBA itself crashed and this will shake it loose.
 		 */
 		printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n",
-		       SCpnt->host->host_no, SCpnt->pid);
+		       host->host_no, SCpnt->pid);
 		SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2);
 		SCpnt->internal_timeout |= IN_RESET3;
 		scsi_reset(SCpnt,
@@ -197,19 +198,19 @@
 
 	default:
 		printk("SCSI host %d reset (pid %ld) timed out again -\n",
-		       SCpnt->host->host_no, SCpnt->pid);
+		       host->host_no, SCpnt->pid);
 		printk("probably an unrecoverable SCSI bus or device hang.\n");
 		break;
 
 	}
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&host->host_lock, flags);
 
 }
 
 /*
  *  From what I can find in scsi_obsolete.c, this function is only called
  *  by scsi_old_done and scsi_reset.  Both of these functions run with the
- *  io_request_lock already held, so we need do nothing here about grabbing
+ *  host_lock already held, so we need do nothing here about grabbing
  *  any locks.
  */
 static void scsi_request_sense(Scsi_Cmnd * SCpnt)
@@ -217,7 +218,6 @@
 	SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE;
 	update_timeout(SCpnt, SENSE_TIMEOUT);
 
-
 	memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
 	       sizeof(generic_sense));
 	memset((void *) SCpnt->sense_buffer, 0,
@@ -238,9 +238,9 @@
          * Ugly, ugly.  The newer interfaces all assume that the lock
          * isn't held.  Mustn't disappoint, or we deadlock the system.
          */
-        spin_unlock_irq(&io_request_lock);
+        spin_unlock_irq(&SCpnt->host->host_lock);
 	scsi_dispatch_cmd(SCpnt);
-        spin_lock_irq(&io_request_lock);
+        spin_lock_irq(&SCpnt->host->host_lock);
 }
 
 
@@ -646,9 +646,9 @@
                          * assume that the lock isn't held.  Mustn't
                          * disappoint, or we deadlock the system.  
                          */
-                        spin_unlock_irq(&io_request_lock);
+			spin_unlock_irq(&host->host_lock);
 			scsi_dispatch_cmd(SCpnt);
-                        spin_lock_irq(&io_request_lock);
+			spin_lock_irq(&host->host_lock);
 		}
 		break;
 	default:
@@ -674,7 +674,7 @@
                  * use, the upper code is run from a bottom half handler, so
                  * it isn't an issue.
                  */
-                spin_unlock_irq(&io_request_lock);
+                spin_unlock_irq(&host->host_lock);
 		SRpnt = SCpnt->sc_request;
 		if( SRpnt != NULL ) {
 			SRpnt->sr_result = SRpnt->sr_command->result;
@@ -686,7 +686,7 @@
 		}
 
 		SCpnt->done(SCpnt);
-                spin_lock_irq(&io_request_lock);
+                spin_lock_irq(&host->host_lock);
 	}
 #undef CMD_FINISHED
 #undef REDO
@@ -725,10 +725,10 @@
 			return 0;
 		}
 		if (SCpnt->internal_timeout & IN_ABORT) {
-			spin_unlock_irq(&io_request_lock);
+			spin_unlock_irq(&host->host_lock);
 			while (SCpnt->internal_timeout & IN_ABORT)
 				barrier();
-			spin_lock_irq(&io_request_lock);
+			spin_lock_irq(&host->host_lock);
 		} else {
 			SCpnt->internal_timeout |= IN_ABORT;
 			oldto = update_timeout(SCpnt, ABORT_TIMEOUT);
@@ -908,10 +908,10 @@
 				return 0;
 			}
 		if (SCpnt->internal_timeout & IN_RESET) {
-			spin_unlock_irq(&io_request_lock);
+			spin_unlock_irq(&host->host_lock);
 			while (SCpnt->internal_timeout & IN_RESET)
 				barrier();
-			spin_lock_irq(&io_request_lock);
+			spin_lock_irq(&host->host_lock);
 		} else {
 			SCpnt->internal_timeout |= IN_RESET;
 			update_timeout(SCpnt, RESET_TIMEOUT);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_queue.c linux/drivers/scsi/scsi_queue.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_queue.c	Fri Feb  9 20:30:23 2001
+++ linux/drivers/scsi/scsi_queue.c	Wed Jan  1 00:07:23 1997
@@ -80,6 +80,7 @@
 {
 	struct Scsi_Host *host;
 	unsigned long flags;
+	request_queue_t *q = &cmd->device->request_queue;
 
 	SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd));
 
@@ -137,10 +138,10 @@
 	 * Decrement the counters, since these commands are no longer
 	 * active on the host/device.
 	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	spin_lock_irqsave(&q->queue_lock, flags);
 	cmd->host->host_busy--;
 	cmd->device->device_busy--;
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 
 	/*
 	 * Insert this command at the head of the queue for it's device.
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sd.c linux/drivers/scsi/sd.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/sd.c	Thu Jul  5 20:28:17 2001
+++ linux/drivers/scsi/sd.c	Tue Jul 24 15:38:27 2001
@@ -61,10 +61,6 @@
 
 #include <linux/genhd.h>
 
-/*
- *  static const char RCSid[] = "$Header:";
- */
-
 #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i))
 
 #define SCSI_DISKS_PER_MAJOR	16
@@ -72,8 +68,7 @@
 #define SD_MINOR_NUMBER(i)	((i) & 255)
 #define MKDEV_SD_PARTITION(i)	MKDEV(SD_MAJOR_NUMBER(i), (i) & 255)
 #define MKDEV_SD(index)		MKDEV_SD_PARTITION((index) << 4)
-#define N_USED_SCSI_DISKS  (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1)
-#define N_USED_SD_MAJORS   (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR)
+#define N_USED_SD_MAJORS	(1 + ((sd_template.dev_max - 1) >> 4))
 
 #define MAX_RETRIES 5
 
@@ -89,14 +84,12 @@
 static Scsi_Disk *rscsi_disks;
 static int *sd_sizes;
 static int *sd_blocksizes;
-static int *sd_hardsizes;	/* Hardware sector size */
 
 static int check_scsidisk_media_change(kdev_t);
 static int fop_revalidate_scsidisk(kdev_t);
 
 static int sd_init_onedisk(int);
 
-
 static int sd_init(void);
 static void sd_finish(void);
 static int sd_attach(Scsi_Device *);
@@ -177,9 +170,11 @@
 	
 		        diskinfo[0] = 0x40;
 	        	diskinfo[1] = 0x20;
-	        	diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+	        	diskinfo[2] =
+				rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
 	
-			/* override with calculated, extended default, or driver values */
+			/* override with calculated, extended default,
+			   or driver values */
 	
 			if(host->hostt->bios_param != NULL)
 				host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)],
@@ -189,48 +184,14 @@
 					dev, &diskinfo[0]);
 
 			if (put_user(diskinfo[0], &loc->heads) ||
-				put_user(diskinfo[1], &loc->sectors) ||
-				put_user(diskinfo[2], &loc->cylinders) ||
-				put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start))
-				return -EFAULT;
-			return 0;
-		}
-		case HDIO_GETGEO_BIG:
-		{
-			struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
-
-			if(!loc)
-				return -EINVAL;
-
-			host = rscsi_disks[DEVICE_NR(dev)].device->host;
-
-			/* default to most commonly used values */
-
-			diskinfo[0] = 0x40;
-			diskinfo[1] = 0x20;
-			diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
-
-			/* override with calculated, extended default, or driver values */
-
-			if(host->hostt->bios_param != NULL)
-				host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)],
-					    dev,
-					    &diskinfo[0]);
-			else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
-					dev, &diskinfo[0]);
-
-			if (put_user(diskinfo[0], &loc->heads) ||
-				put_user(diskinfo[1], &loc->sectors) ||
-				put_user(diskinfo[2], (unsigned int *) &loc->cylinders) ||
-				put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start))
+			    put_user(diskinfo[1], &loc->sectors) ||
+			    put_user(diskinfo[2], &loc->cylinders) ||
+			    put_user(get_start_sect(inode->i_rdev),
+				     &loc->start))
 				return -EFAULT;
 			return 0;
 		}
-		case BLKGETSIZE:   /* Return device size */
-			if (!arg)
-				return -EINVAL;
-			return put_user(sd[SD_PARTITION(inode->i_rdev)].nr_sects, (long *) arg);
-
+		case BLKGETSIZE:
 		case BLKROSET:
 		case BLKROGET:
 		case BLKRASET:
@@ -240,6 +201,8 @@
 		case BLKPG:
                 case BLKELVGET:
                 case BLKELVSET:
+		case BLKHASHPROF:
+		case BLKHASHCLEAR:
 			return blk_ioctl(inode->i_rdev, cmd, arg);
 
 		case BLKRRPART: /* Re-read partition tables */
@@ -248,7 +211,8 @@
 			return revalidate_scsidisk(dev, 1);
 
 		default:
-			return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg);
+			return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device,
+					  cmd, (void *) arg);
 	}
 }
 
@@ -298,7 +262,7 @@
 	SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = %d, block = %d\n", devm, block));
 
 	dpnt = &rscsi_disks[dev];
-	if (devm >= (sd_template.dev_max << 4) ||
+	if (devm >= (sd_template.dev_max << 4) || (devm & 0xf) ||
 	    !dpnt ||
 	    !dpnt->device->online ||
  	    block + SCpnt->request.nr_sectors > sd[devm].nr_sects) {
@@ -306,7 +270,7 @@
 		SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
 		return 0;
 	}
-	block += sd[devm].start_sect;
+
 	if (dpnt->device->changed) {
 		/*
 		 * quietly refuse to do anything to a changed disc until the changed
@@ -576,7 +540,6 @@
 static struct gendisk *sd_gendisks = &sd_gendisk;
 
 #define SD_GENDISK(i)    sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR]
-#define LAST_SD_GENDISK  sd_gendisks[N_USED_SD_MAJORS - 1]
 
 /*
  * rw_intr is the interrupt routine for the device driver.
@@ -616,8 +579,8 @@
 			(SCpnt->sense_buffer[4] << 16) |
 			(SCpnt->sense_buffer[5] << 8) |
 			SCpnt->sense_buffer[6];
-			if (SCpnt->request.bh != NULL)
-				block_sectors = SCpnt->request.bh->b_size >> 9;
+			if (SCpnt->request.bio != NULL)
+				block_sectors = bio_sectors(SCpnt->request.bio);
 			switch (SCpnt->device->sector_size) {
 			case 1024:
 				error_sector <<= 1;
@@ -640,7 +603,7 @@
 			default:
 				break;
 			}
-			error_sector -= sd[SD_PARTITION(SCpnt->request.rq_dev)].start_sect;
+
 			error_sector &= ~(block_sectors - 1);
 			good_sectors = error_sector - SCpnt->request.sector;
 			if (good_sectors < 0 || good_sectors >= this_count)
@@ -959,15 +922,11 @@
 			 * So I have created this table. See ll_rw_blk.c
 			 * Jacques Gelinas (Jacques@solucorp.qc.ca)
 			 */
-			int m;
 			int hard_sector = sector_size;
 			int sz = rscsi_disks[i].capacity * (hard_sector/256);
 
 			/* There are 16 minors allocated for each major device */
-			for (m = i << 4; m < ((i + 1) << 4); m++) {
-				sd_hardsizes[m] = hard_sector;
-			}
-
+			blk_queue_hardsect_size(blk_get_queue(SD_MAJOR(i)), hard_sector);
 			printk("SCSI device %s: "
 			       "%d %d-byte hdwr sectors (%d MB)\n",
 			       nbuff, rscsi_disks[i].capacity,
@@ -1052,7 +1011,7 @@
 
 static int sd_init()
 {
-	int i;
+	int i, maxparts;
 
 	if (sd_template.dev_noticed == 0)
 		return 0;
@@ -1063,10 +1022,17 @@
 	if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR)
 		sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR;
 
+	/* At most 16 partitions on each scsi disk. */
+	maxparts = (sd_template.dev_max << 4);
+	if (maxparts == 0)
+		return 0;
+
 	if (!sd_registered) {
 		for (i = 0; i < N_USED_SD_MAJORS; i++) {
-			if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) {
-				printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i));
+			if (devfs_register_blkdev(SD_MAJOR(i), "sd",
+						  &sd_fops)) {
+				printk("Unable to get major %d for SCSI disk\n",
+				       SD_MAJOR(i));
 				return 1;
 			}
 		}
@@ -1076,93 +1042,78 @@
 	if (rscsi_disks)
 		return 0;
 
-	rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC);
-	if (!rscsi_disks)
-		goto cleanup_devfs;
-	memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk));
+	/* allocate memory */
+#define init_mem_lth(x,n)	x = kmalloc((n) * sizeof(*x), GFP_ATOMIC)
+#define zero_mem_lth(x,n)	memset(x, 0, (n) * sizeof(*x))
+
+	init_mem_lth(rscsi_disks, sd_template.dev_max);
+	init_mem_lth(sd_sizes, maxparts);
+	init_mem_lth(sd_blocksizes, maxparts);
+	init_mem_lth(sd, maxparts);
+	init_mem_lth(sd_gendisks, N_USED_SD_MAJORS);
+
+	if (!rscsi_disks || !sd_sizes || !sd_blocksizes || !sd || !sd_gendisks)
+		goto cleanup_mem;
+
+	zero_mem_lth(rscsi_disks, sd_template.dev_max);
+	zero_mem_lth(sd_sizes, maxparts);
+	zero_mem_lth(sd, maxparts);
 
-	/* for every (necessary) major: */
-	sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
-	if (!sd_sizes)
-		goto cleanup_disks;
-	memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int));
-
-	sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
-	if (!sd_blocksizes)
-		goto cleanup_sizes;
-	
-	sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
-	if (!sd_hardsizes)
-		goto cleanup_blocksizes;
-
-	for (i = 0; i < sd_template.dev_max << 4; i++) {
+	for (i = 0; i < maxparts; i++) {
 		sd_blocksizes[i] = 1024;
-		sd_hardsizes[i] = 512;
 	}
 
 	for (i = 0; i < N_USED_SD_MAJORS; i++) {
-		blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4);
-		hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4);
-	}
-	sd = kmalloc((sd_template.dev_max << 4) *
-					  sizeof(struct hd_struct),
-					  GFP_ATOMIC);
-	if (!sd)
-		goto cleanup_sd;
-	memset(sd, 0, (sd_template.dev_max << 4) * sizeof(struct hd_struct));
-
-	if (N_USED_SD_MAJORS > 1)
-		sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC);
-		if (!sd_gendisks)
-			goto cleanup_sd_gendisks;
+		request_queue_t *q = blk_get_queue(SD_MAJOR(i));
+		int parts_per_major = (SCSI_DISKS_PER_MAJOR << 4);
+
+		blksize_size[SD_MAJOR(i)] =
+			sd_blocksizes + i * parts_per_major;
+		blk_queue_hardsect_size(q, 512);
+ 	}
+
 	for (i = 0; i < N_USED_SD_MAJORS; i++) {
+		int N = SCSI_DISKS_PER_MAJOR;
+
 		sd_gendisks[i] = sd_gendisk;
-		sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr,
-                                                 GFP_ATOMIC);
-		if (!sd_gendisks[i].de_arr)
-			goto cleanup_gendisks_de_arr;
-                memset (sd_gendisks[i].de_arr, 0,
-                        SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr);
-		sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags,
-                                                GFP_ATOMIC);
-		if (!sd_gendisks[i].flags)
-			goto cleanup_gendisks_flags;
-                memset (sd_gendisks[i].flags, 0,
-                        SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags);
+
+		init_mem_lth(sd_gendisks[i].de_arr, N);
+		init_mem_lth(sd_gendisks[i].flags, N);
+
+		if (!sd_gendisks[i].de_arr || !sd_gendisks[i].flags)
+			goto cleanup_gendisks;
+
+		zero_mem_lth(sd_gendisks[i].de_arr, N);
+		zero_mem_lth(sd_gendisks[i].flags, N);
+
 		sd_gendisks[i].major = SD_MAJOR(i);
 		sd_gendisks[i].major_name = "sd";
 		sd_gendisks[i].minor_shift = 4;
 		sd_gendisks[i].max_p = 1 << 4;
-		sd_gendisks[i].part = sd + (i * SCSI_DISKS_PER_MAJOR << 4);
-		sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4);
+		sd_gendisks[i].part = sd + i * (N << 4);
+		sd_gendisks[i].sizes = sd_sizes + i * (N << 4);
 		sd_gendisks[i].nr_real = 0;
-		sd_gendisks[i].next = sd_gendisks + i + 1;
 		sd_gendisks[i].real_devices =
 		    (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR);
 	}
 
-	LAST_SD_GENDISK.next = NULL;
 	return 0;
 
-cleanup_gendisks_flags:
-	kfree(sd_gendisks[i].de_arr);
-cleanup_gendisks_de_arr:
-	while (--i >= 0 ) {
+#undef init_mem_lth
+#undef zero_mem_lth
+
+cleanup_gendisks:
+	/* kfree can handle NULL, so no test is required here */
+	for (i = 0; i < N_USED_SD_MAJORS; i++) {
 		kfree(sd_gendisks[i].de_arr);
 		kfree(sd_gendisks[i].flags);
 	}
+cleanup_mem:
 	kfree(sd_gendisks);
-cleanup_sd_gendisks:
 	kfree(sd);
-cleanup_sd:
-	kfree(sd_hardsizes);
-cleanup_blocksizes:
 	kfree(sd_blocksizes);
-cleanup_sizes:
 	kfree(sd_sizes);
-cleanup_disks:
 	kfree(rscsi_disks);
-cleanup_devfs:
 	for (i = 0; i < N_USED_SD_MAJORS; i++) {
 		devfs_unregister_blkdev(SD_MAJOR(i), "sd");
 	}
@@ -1173,19 +1124,13 @@
 
 static void sd_finish()
 {
-	struct gendisk *gendisk;
 	int i;
 
 	for (i = 0; i < N_USED_SD_MAJORS; i++) {
 		blk_dev[SD_MAJOR(i)].queue = sd_find_queue;
+		add_gendisk(&(sd_gendisks[i]));
 	}
-	for (gendisk = gendisk_head; gendisk != NULL; gendisk = gendisk->next)
-		if (gendisk == sd_gendisks)
-			break;
-	if (gendisk == NULL) {
-		LAST_SD_GENDISK.next = gendisk_head;
-		gendisk_head = sd_gendisks;
-	}
+
 	for (i = 0; i < sd_template.dev_max; ++i)
 		if (!rscsi_disks[i].capacity && rscsi_disks[i].device) {
 			sd_init_onedisk(i);
@@ -1273,9 +1218,7 @@
 int revalidate_scsidisk(kdev_t dev, int maxusage)
 {
 	int target;
-	int max_p;
-	int start;
-	int i;
+	int res;
 
 	target = DEVICE_NR(dev);
 
@@ -1285,36 +1228,18 @@
 	}
 	DEVICE_BUSY = 1;
 
-	max_p = sd_gendisks->max_p;
-	start = target << sd_gendisks->minor_shift;
-
-	for (i = max_p - 1; i >= 0; i--) {
-		int index = start + i;
-		invalidate_device(MKDEV_SD_PARTITION(index), 1);
-		sd_gendisks->part[index].start_sect = 0;
-		sd_gendisks->part[index].nr_sects = 0;
-		/*
-		 * Reset the blocksize for everything so that we can read
-		 * the partition table.  Technically we will determine the
-		 * correct block size when we revalidate, but we do this just
-		 * to make sure that everything remains consistent.
-		 */
-		sd_blocksizes[index] = 1024;
-		if (rscsi_disks[target].device->sector_size == 2048)
-			sd_blocksizes[index] = 2048;
-		else
-			sd_blocksizes[index] = 1024;
-	}
+	res = wipe_partitions(dev);
+	if (res)
+		goto leave;
 
 #ifdef MAYBE_REINIT
 	MAYBE_REINIT;
 #endif
 
-	grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR,
-			1<<4, CAPACITY);
-
+	grok_partitions(dev, CAPACITY);
+leave:
 	DEVICE_BUSY = 0;
-	return 0;
+	return res;
 }
 
 static int fop_revalidate_scsidisk(kdev_t dev)
@@ -1324,6 +1249,7 @@
 static void sd_detach(Scsi_Device * SDp)
 {
 	Scsi_Disk *dpnt;
+	kdev_t dev;
 	int i, j;
 	int max_p;
 	int start;
@@ -1331,18 +1257,13 @@
 	for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
 		if (dpnt->device == SDp) {
 
-			/* If we are disconnecting a disk driver, sync and invalidate
-			 * everything */
 			max_p = sd_gendisk.max_p;
 			start = i << sd_gendisk.minor_shift;
+			dev = MKDEV_SD_PARTITION(start);
+			wipe_partitions(dev);
+			for (j = max_p - 1; j >= 0; j--)
+				sd_sizes[start + j] = 0;
 
-			for (j = max_p - 1; j >= 0; j--) {
-				int index = start + j;
-				invalidate_device(MKDEV_SD_PARTITION(index), 1);
-				sd_gendisks->part[index].start_sect = 0;
-				sd_gendisks->part[index].nr_sects = 0;
-				sd_sizes[index] = 0;
-			}
                         devfs_register_partitions (&SD_GENDISK (i),
                                                    SD_MINOR_NUMBER (start), 1);
 			/* unregister_disk() */
@@ -1355,7 +1276,6 @@
 			SD_GENDISK(i).nr_real--;
 			return;
 		}
-	return;
 }
 
 static int __init init_sd(void)
@@ -1366,10 +1286,7 @@
 
 static void __exit exit_sd(void)
 {
-	struct gendisk **prev_sdgd_link;
-	struct gendisk *sdgd;
 	int i;
-	int removed = 0;
 
 	scsi_unregister_module(MODULE_SCSI_DEV, &sd_template);
 
@@ -1381,31 +1298,11 @@
 		kfree(rscsi_disks);
 		kfree(sd_sizes);
 		kfree(sd_blocksizes);
-		kfree(sd_hardsizes);
 		kfree((char *) sd);
-
-		/*
-		 * Now remove sd_gendisks from the linked list
-		 */
-		prev_sdgd_link = &gendisk_head;
-		while ((sdgd = *prev_sdgd_link) != NULL) {
-			if (sdgd >= sd_gendisks && sdgd <= &LAST_SD_GENDISK) {
-				removed++;
-				*prev_sdgd_link = sdgd->next;
-				continue;
-			}
-			prev_sdgd_link = &sdgd->next;
-		}
-
-		if (removed != N_USED_SD_MAJORS)
-			printk("%s %d sd_gendisks in disk chain",
-			       removed > N_USED_SD_MAJORS ? "total" : "just", removed);
-
 	}
 	for (i = 0; i < N_USED_SD_MAJORS; i++) {
-		blk_size[SD_MAJOR(i)] = NULL;
-		hardsect_size[SD_MAJOR(i)] = NULL;
-		read_ahead[SD_MAJOR(i)] = 0;
+		del_gendisk(&(sd_gendisks[i]));
+		blk_clear(SD_MAJOR(i));
 	}
 	sd_template.dev_max = 0;
 	if (sd_gendisks != &sd_gendisk)
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sr.c linux/drivers/scsi/sr.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/sr.c	Thu Jul  5 20:28:17 2001
+++ linux/drivers/scsi/sr.c	Wed Jan  1 00:07:23 1997
@@ -85,10 +85,9 @@
 };
 
 Scsi_CD *scsi_CDs;
-static int *sr_sizes;
+static sector_t *sr_sizes;
 
 static int *sr_blocksizes;
-static int *sr_hardsizes;
 
 static int sr_open(struct cdrom_device_info *, int);
 void get_sectorsize(int);
@@ -219,8 +218,8 @@
 		(SCpnt->sense_buffer[4] << 16) |
 		(SCpnt->sense_buffer[5] << 8) |
 		SCpnt->sense_buffer[6];
-		if (SCpnt->request.bh != NULL)
-			block_sectors = SCpnt->request.bh->b_size >> 9;
+		if (SCpnt->request.bio != NULL)
+			block_sectors = bio_sectors(SCpnt->request.bio);
 		if (block_sectors < 4)
 			block_sectors = 4;
 		if (scsi_CDs[device_nr].device->sector_size == 2048)
@@ -643,6 +642,7 @@
 		scsi_CDs[i].needs_sector_size = 0;
 		sr_sizes[i] = scsi_CDs[i].capacity >> (BLOCK_SIZE_BITS - 9);
 	};
+	blk_queue_hardsect_size(blk_get_queue(MAJOR_NR), sector_size);
 	scsi_free(buffer, 512);
 }
 
@@ -791,21 +791,14 @@
 	if (!sr_blocksizes)
 		goto cleanup_sizes;
 
-	sr_hardsizes = kmalloc(sr_template.dev_max * sizeof(int), GFP_ATOMIC);
-	if (!sr_hardsizes)
-		goto cleanup_blocksizes;
 	/*
 	 * These are good guesses for the time being.
 	 */
-	for (i = 0; i < sr_template.dev_max; i++) {
+	for (i = 0; i < sr_template.dev_max; i++)
 		sr_blocksizes[i] = 2048;
-		sr_hardsizes[i] = 2048;
-        }
+
 	blksize_size[MAJOR_NR] = sr_blocksizes;
-        hardsect_size[MAJOR_NR] = sr_hardsizes;
 	return 0;
-cleanup_blocksizes:
-	kfree(sr_blocksizes);
 cleanup_sizes:
 	kfree(sr_sizes);
 cleanup_cds:
@@ -877,7 +870,6 @@
 	else
 		read_ahead[MAJOR_NR] = 4;	/* 4 sector read-ahead */
 
-	return;
 }
 
 static void sr_detach(Scsi_Device * SDp)
@@ -885,17 +877,18 @@
 	Scsi_CD *cpnt;
 	int i;
 
-	for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++)
+	for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) {
 		if (cpnt->device == SDp) {
 			/*
-			 * Since the cdrom is read-only, no need to sync the device.
+			 * Since the cdrom is read-only, no need to sync
+			 * the device.
 			 * We should be kind to our buffer cache, however.
 			 */
 			invalidate_device(MKDEV(MAJOR_NR, i), 0);
 
 			/*
-			 * Reset things back to a sane state so that one can re-load a new
-			 * driver (perhaps the same one).
+			 * Reset things back to a sane state so that one can
+			 * re-load a new driver (perhaps the same one).
 			 */
 			unregister_cdrom(&(cpnt->cdi));
 			cpnt->device = NULL;
@@ -906,7 +899,7 @@
 			sr_sizes[i] = 0;
 			return;
 		}
-	return;
+	}
 }
 
 static int __init init_sr(void)
@@ -928,13 +921,9 @@
 
 		kfree(sr_blocksizes);
 		sr_blocksizes = NULL;
-		kfree(sr_hardsizes);
-		sr_hardsizes = NULL;
 	}
-	blksize_size[MAJOR_NR] = NULL;
-        hardsect_size[MAJOR_NR] = NULL;
-	blk_size[MAJOR_NR] = NULL;
 	read_ahead[MAJOR_NR] = 0;
+	blk_clear(MAJOR_NR);
 
 	sr_template.dev_max = 0;
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c
--- /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.c	Thu Jul  5 20:28:16 2001
+++ linux/drivers/scsi/sym53c8xx.c	Tue Jul 24 15:30:11 2001
@@ -644,10 +644,10 @@
 #define	NCR_LOCK_NCB(np, flags)    spin_lock_irqsave(&np->smp_lock, flags)
 #define	NCR_UNLOCK_NCB(np, flags)  spin_unlock_irqrestore(&np->smp_lock, flags)
 
-#define	NCR_LOCK_SCSI_DONE(np, flags) \
-		spin_lock_irqsave(&io_request_lock, flags)
-#define	NCR_UNLOCK_SCSI_DONE(np, flags) \
-		spin_unlock_irqrestore(&io_request_lock, flags)
+#define	NCR_LOCK_SCSI_DONE(host, flags) \
+		spin_lock_irqsave(&((host)->host_lock), flags)
+#define	NCR_UNLOCK_SCSI_DONE(host, flags) \
+		spin_unlock_irqrestore(&((host)->host_lock), flags)
 
 #else
 
@@ -658,8 +658,8 @@
 #define	NCR_LOCK_NCB(np, flags)    do { save_flags(flags); cli(); } while (0)
 #define	NCR_UNLOCK_NCB(np, flags)  do { restore_flags(flags); } while (0)
 
-#define	NCR_LOCK_SCSI_DONE(np, flags)    do {;} while (0)
-#define	NCR_UNLOCK_SCSI_DONE(np, flags)  do {;} while (0)
+#define	NCR_LOCK_SCSI_DONE(host, flags)    do {;} while (0)
+#define	NCR_UNLOCK_SCSI_DONE(host, flags)  do {;} while (0)
 
 #endif
 
@@ -13667,9 +13667,9 @@
      if (DEBUG_FLAGS & DEBUG_TINY) printk ("]\n");
 
      if (done_list) {
-          NCR_LOCK_SCSI_DONE(np, flags);
+          NCR_LOCK_SCSI_DONE(done_list->host, flags);
           ncr_flush_done_cmds(done_list);
-          NCR_UNLOCK_SCSI_DONE(np, flags);
+          NCR_UNLOCK_SCSI_DONE(done_list->host, flags);
      }
 }
 
@@ -13690,9 +13690,9 @@
      NCR_UNLOCK_NCB(np, flags);
 
      if (done_list) {
-          NCR_LOCK_SCSI_DONE(np, flags);
+          NCR_LOCK_SCSI_DONE(done_list->host, flags);
           ncr_flush_done_cmds(done_list);
-          NCR_UNLOCK_SCSI_DONE(np, flags);
+          NCR_UNLOCK_SCSI_DONE(done_list->host, flags);
      }
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h
--- /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.h	Fri Jul 20 21:56:08 2001
+++ linux/drivers/scsi/sym53c8xx.h	Wed Jan  1 00:07:23 1997
@@ -96,7 +96,8 @@
 			this_id:        7,			\
 			sg_tablesize:   SCSI_NCR_SG_TABLESIZE,	\
 			cmd_per_lun:    SCSI_NCR_CMD_PER_LUN,	\
-			use_clustering: DISABLE_CLUSTERING} 
+			use_clustering: DISABLE_CLUSTERING,	\
+			can_dma_32:	1} 
 
 #else
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/Makefile linux/fs/Makefile
--- /opt/kernel/linux-2.4.7/fs/Makefile	Tue May 22 18:35:42 2001
+++ linux/fs/Makefile	Wed Jan  1 00:07:22 1997
@@ -7,12 +7,12 @@
 
 O_TARGET := fs.o
 
-export-objs :=	filesystems.o dcache.o
+export-objs :=	filesystems.o dcache.o bio.o
 mod-subdirs :=	nls
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
-		super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
-		fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
+		bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \
+		namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
 		filesystems.o
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/bio.c linux/fs/bio.c
--- /opt/kernel/linux-2.4.7/fs/bio.c	Thu Jan  1 01:00:00 1970
+++ linux/fs/bio.c	Tue Jul 24 14:19:09 2001
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ *
+ */
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/iobuf.h>
+#include <linux/blk.h>
+#include <linux/vmalloc.h>
+
+kmem_cache_t *bio_cachep, *biovec_cachep;
+static spinlock_t bio_lock = SPIN_LOCK_UNLOCKED;
+static struct bio *bio_pool;
+static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait);
+
+#define BIO_HASH_SCALE	3
+
+#define bhash_fn(htable, key) \
+	((((key) >> BIO_HASH_SCALE) & (htable)->mask) + (htable)->table)
+
+/*
+ * make this changeable (1 or 2 would be a good default, 32 is insane :-)
+ */
+#define BIO_POOL_PAGES	1
+
+int bio_hash_init(bio_hash_table_t *hash_table, int entries)
+{
+	int bit_size, hash_bits, hash_size;
+
+	memset(hash_table, 0, sizeof(bio_hash_table_t));
+
+	bit_size = entries * sizeof(void);
+	hash_bits = 0;
+	while (bit_size) {
+		hash_bits++;
+		bit_size >>= 1;
+	}
+
+	hash_size = (1UL << hash_bits) * sizeof(void *);
+	hash_table->table = vmalloc(hash_size);
+	if (hash_table->table == NULL)
+		return -ENOMEM;
+
+	memset(hash_table->table, 0, hash_size);
+	hash_table->mask = (1UL << hash_bits) - 1;
+	hash_table->valid_counter = 1;
+
+	return 0;
+}
+
+void bio_hash_cleanup(bio_hash_table_t *table)
+{
+	vfree(table->table);
+	memset(table, 0, sizeof(bio_hash_table_t));
+}
+
+inline void __bio_hash_remove(bio_hash_t *entry)
+{
+	if (entry->valid_counter) {
+		bio_hash_t *nxt = entry->next_hash;
+		bio_hash_t **pprev = entry->pprev_hash;
+
+		if (nxt)
+			nxt->pprev_hash = pprev;
+
+		*pprev = nxt;
+		entry->pprev_hash = NULL;
+		entry->valid_counter = 0;
+	}
+}
+
+inline void bio_hash_remove(request_queue_t *q, struct bio *bio)
+{
+	unsigned long flags;
+
+	if (spin_is_locked(&q->queue_lock))
+		BUG();
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	__bio_hash_remove(&bio->bi_hash);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
+}
+
+inline void __bio_hash_add(bio_hash_table_t *htable, struct bio *bio)
+{
+	bio_hash_t *entry = &bio->bi_hash;
+	bio_hash_t **hash = bhash_fn(htable, bio->bi_sector);
+	bio_hash_t *nxt = *hash;
+
+	*hash = entry;
+	entry->next_hash = nxt;
+	entry->pprev_hash = hash;
+	entry->valid_counter = htable->valid_counter;
+
+	if (nxt)
+		nxt->pprev_hash = &entry->next_hash;
+
+	htable->st.nr_inserts++;
+}
+
+inline void bio_hash_add(request_queue_t *q, struct bio *bio)
+{
+	unsigned long flags;
+
+	if (spin_is_locked(&q->queue_lock))
+		BUG();
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	__bio_hash_add(&q->queue_hash, bio);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
+}
+
+inline struct bio *__bio_hash_find(bio_hash_table_t *htable, sector_t sector)
+{
+	bio_hash_t **hash = bhash_fn(htable, sector);
+	bio_hash_t *next = *hash, *entry;
+	struct bio *bio;
+	int nr = 0;
+
+	htable->st.nr_lookups++;
+
+	while ((entry = next)) {
+		next = entry->next_hash;
+		prefetch(next);
+		if (entry->valid_counter == htable->valid_counter) {
+			bio = hash_entry(entry, struct bio, bi_hash);
+			if (bio->bi_sector == sector) {
+				if (nr > htable->st.max_bucket_size)
+					htable->st.max_bucket_size = nr;
+				if (nr <= MAX_PROFILE_BUCKETS)
+					htable->st.bucket_size[nr]++;
+				htable->st.nr_hits++;
+#if 0
+				bio_get(bio);
+#endif
+				return bio;
+			}
+
+			nr++;
+			continue;
+		}
+
+		/*
+		 * prune out-of-date entries as we go along
+		 */
+		__bio_hash_remove(entry);
+		nr++;
+	}
+
+	return NULL;
+}
+
+inline struct bio *bio_hash_find(request_queue_t *q, sector_t sector)
+{
+	struct bio *bio;
+	unsigned long flags;
+
+	if (spin_is_locked(&q->queue_lock))
+		BUG();
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	bio = __bio_hash_find(&q->queue_hash, sector);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
+
+	return bio;
+}
+
+inline int __bio_hash_add_unique(bio_hash_table_t *htable, struct bio *bio)
+{
+	int ret = 1;
+
+	if (!__bio_hash_find(htable, bio->bi_sector)) {
+		__bio_hash_add(htable, bio);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+inline int bio_hash_add_unique(request_queue_t *q, struct bio *bio)
+{
+	unsigned long flags;
+	int ret;
+
+	if (spin_is_locked(&q->queue_lock))
+		BUG();
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	ret = __bio_hash_add_unique(&q->queue_hash, bio);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
+
+	return ret;
+}
+
+static inline struct bio *bio_pool_get(void)
+{
+	unsigned long flags;
+	struct bio *bio;
+
+	spin_lock_irqsave(&bio_lock, flags);
+	if ((bio = bio_pool)) {
+		bio_pool = bio->bi_next;
+		bio->bi_next = NULL;
+		bio->bi_flags = BIO_POOL;
+	}
+	spin_unlock_irqrestore(&bio_lock, flags);
+	return bio;
+}
+
+static inline void bio_pool_put(struct bio *bio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&bio_lock, flags);
+	bio->bi_next = bio_pool;
+	bio_pool = bio;
+	spin_unlock_irqrestore(&bio_lock, flags);
+
+	/*
+	 * use wake-one
+	 */
+	if (waitqueue_active(&bio_pool_wait))
+		wake_up_nr(&bio_pool_wait, 1);
+}
+
+/**
+ * bio_alloc - allocate a bio for I/O
+ * @gfp_mask:   the GFP_ mask given to the slab allocator
+ *
+ * Description:
+ *   bio_alloc will first try it's on internal pool to satisfy the allocation
+ *   and if that fails fall back to the bio slab cache. In the latter case,
+ *   the @gfp_mask specifies the priority of the allocation. In particular,
+ *   if %__GFP_WAIT is set then we will block on the internal pool waiting
+ *   for a &struct bio to become free.
+ **/
+#if 0
+struct bio *bio_alloc(int gfp_mask)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct bio *bio;
+
+	/*
+	 * first try our reserved pool
+	 */
+	if ((bio = bio_pool_get()))
+		goto gotit;
+
+	/*
+	 * no such luck, try slab alloc
+	 */
+
+	/*
+	 * first try slab
+	 */
+	if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) {
+		bio->bi_flags = 0;
+		goto gotit;
+	}
+
+	/*
+	 * hrmpf, not much luck. if __GFP_WAIT is set, wait on
+	 * bio_pool.
+	 */
+	if ((gfp_mask & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) {
+		add_wait_queue_exclusive(&bio_pool_wait, &wait);
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			bio = bio_pool_get();
+			if (bio)
+				break;
+
+			run_task_queue(&tq_disk);
+			schedule();
+		}
+		remove_wait_queue(&bio_pool_wait, &wait);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	if (bio) {
+gotit:
+		bio->bi_next = NULL;
+		memset(&bio->bi_io_vec, 0, sizeof(bio->bi_io_vec));
+		atomic_set(&bio->bi_cnt, 1);
+		bio->bi_end_io = NULL;
+		bio->bi_private = NULL;
+	}
+	return bio;
+}
+#endif
+
+struct bio *bio_alloc(int gfp_mask)
+{
+	struct bio *bio;
+
+	/*
+	 * first try slab
+	 */
+restart:
+	if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) {
+		bio->bi_flags = 0;
+		goto gotit;
+	}
+
+	wakeup_bdflush(0);
+
+	/*
+	 * first try our reserved pool
+	 */
+	if ((bio = bio_pool_get()))
+		goto gotit;
+
+	run_task_queue(&tq_disk);
+
+	current->policy |= SCHED_YIELD;
+	__set_current_state(TASK_RUNNING);
+	schedule();
+	goto restart;
+
+gotit:
+	bio->bi_next = NULL;
+	memset(&bio->bi_io_vec, 0, sizeof(bio->bi_io_vec));
+	atomic_set(&bio->bi_cnt, 1);
+	bio->bi_end_io = NULL;
+	bio->bi_private = NULL;
+	return bio;
+}
+
+
+/*
+ * queue lock assumed held!
+ */
+static inline void __bio_free(struct bio *bio)
+{
+	__bio_hash_remove(&bio->bi_hash);
+
+	if (bio->bi_flags & BIO_POOL)
+		bio_pool_put(bio);
+	else
+		kmem_cache_free(bio_cachep, bio);
+}
+
+/**
+ * bio_put - release a reference to a bio
+ * @bio:   bio to be released
+ *
+ * Description:
+ *   Put a reference to a &struct bio, either one you have gotten with
+ *   bio_alloc or bio_get. The last put of a bio will free it.
+ **/
+void bio_put(struct bio *bio)
+{
+	if (!atomic_read(&bio->bi_cnt))
+		BUG();
+
+	/*
+	 * last put frees it
+	 */
+	if (atomic_dec_and_test(&bio->bi_cnt)) {
+		if (bio->bi_next)
+			BUG();
+
+		__bio_free(bio);
+	}
+}
+
+struct bio_vec *biovec_alloc(int gfp_mask)
+{
+	return kmem_cache_alloc(biovec_cachep, gfp_mask);
+}
+
+void biovec_free(struct bio_vec *biov)
+{
+	kmem_cache_free(biovec_cachep, biov);
+}
+
+#ifdef BIO_PAGEIO
+static int bio_end_io_page(struct bio *bio)
+{
+	struct page *page = bio_page(bio);
+
+	if (!(bio->bi_flags & BIO_UPTODATE))
+		SetPageError(page);
+	if (!PageError(page))
+		SetPageUptodate(page);
+
+	/*
+	 * Run the hooks that have to be done when a page I/O has completed.
+	 */
+	if (PageTestandClearDecrAfter(page))
+		atomic_dec(&nr_async_pages);
+
+	UnlockPage(page);
+	bio_put(bio);
+	return 1;
+}
+#endif
+
+static void bio_end_io_kio(struct bio *bio)
+{
+	struct kiobuf *kio = bio->bi_private;
+
+	end_kio_request(kio, bio->bi_flags & BIO_UPTODATE);
+	bio_put(bio);
+}
+
+/**
+ * ll_rw_kio - submit a &struct kiobuf for I/O
+ * @rw:   %READ or %WRITE
+ * @kio:   the kiobuf to do I/O on
+ * @dev:   target device
+ * @sector:   start location on disk
+ *
+ * Description:
+ *   ll_rw_kio will map the page list inside the &struct kiobuf to
+ *   &struct bio and queue them for I/O. The kiobuf given must describe
+ *   a continous range of data, and must be fully prepared for I/O.
+ **/
+void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long sector)
+{
+	struct bio *bio;
+	int i, offset, len, size;
+
+	if ((rw & WRITE) && is_read_only(dev)) {
+		printk("ll_rw_bio: WRITE to ro device %s\n", kdevname(dev));
+		kio->errno = -EPERM;
+		return;
+	}
+
+	offset = kio->offset & ~PAGE_MASK;
+
+	len = kio->length;
+	for (i = 0; i < kio->nr_pages; i++) {
+		bio = bio_alloc(GFP_NOIO);
+
+		bio->bi_dev = dev;
+		bio->bi_sector = sector;
+
+		size = PAGE_SIZE - offset;
+		if (size > len)
+			size = len;
+
+		bio->bi_io_vec.bv_page = kio->maplist[i];
+		bio->bi_io_vec.bv_len = size;
+		bio->bi_io_vec.bv_offset = offset;
+
+		bio->bi_end_io = bio_end_io_kio;
+		bio->bi_private = kio;
+
+		/*
+		 * kiobuf only has an offset into the first page
+		 */
+		offset = 0;
+		len -= size;
+		sector += (size >> 9);
+
+		atomic_inc(&kio->io_count);
+		submit_bio(rw, bio);
+	}
+}
+
+static void bio_init(void *foo, kmem_cache_t *cachep, unsigned long flg)
+{
+	if ((flg & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		struct bio *bio = foo;
+
+		bio->bi_next = NULL;
+		bio->bi_flags = 0;
+		bio->bi_end_io = NULL;
+	}
+}
+
+static int __init bio_init_pool(void)
+{
+	struct page *page;
+	struct bio *bio, *bio_map;
+	int nr = BIO_POOL_PAGES, i, total;
+
+	total = 0;
+	do {
+		page = alloc_page(GFP_KERNEL);
+
+		bio_map = page_address(page);
+		memset(bio_map, 0, PAGE_SIZE);
+		for (i = 0; i < PAGE_SIZE / sizeof(struct bio); i++) {
+			bio = bio_map + i;
+			bio_pool_put(bio);
+			total++;
+		}
+	} while (--nr);
+
+	return total;
+}
+
+static int __init init_bio(void)
+{
+	int nr;
+
+	bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0,
+					SLAB_HWCACHE_ALIGN, bio_init, NULL);
+	if (!bio_cachep)
+		panic("bio: can't create bio_cachep slab cache\n");
+
+	biovec_cachep = kmem_cache_create("biovec", sizeof(struct bio_vec), 0,
+					  SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!biovec_cachep)
+		panic("bio: can't create biovec_cachep slab cache\n");
+
+	nr = bio_init_pool();
+	printk("BIO: pool of %d setup, %luKb (%d bytes/bio)\n", nr, (BIO_POOL_PAGES * PAGE_SIZE) >> 10, sizeof(struct bio));
+
+	return 0;
+}
+
+module_init(init_bio);
+
+EXPORT_SYMBOL(bio_alloc);
+EXPORT_SYMBOL(bio_put);
+EXPORT_SYMBOL(ll_rw_kio);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/block_dev.c linux/fs/block_dev.c
--- /opt/kernel/linux-2.4.7/fs/block_dev.c	Tue Jun 12 04:15:27 2001
+++ linux/fs/block_dev.c	Wed Jan  1 00:07:22 1997
@@ -17,7 +17,6 @@
 
 #include <asm/uaccess.h>
 
-extern int *blk_size[];
 extern int *blksize_size[];
 
 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/buffer.c linux/fs/buffer.c
--- /opt/kernel/linux-2.4.7/fs/buffer.c	Wed Jul 18 16:55:05 2001
+++ linux/fs/buffer.c	Wed Jan  1 00:07:22 1997
@@ -557,7 +557,8 @@
  * will force it bad). This shouldn't really happen currently, but
  * the code is ready.
  */
-static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size)
+static inline struct buffer_head * __get_hash_table(kdev_t dev, sector_t block,
+						    int size)
 {
 	struct buffer_head *bh = hash(dev, block);
 
@@ -572,7 +573,7 @@
 	return bh;
 }
 
-struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
+struct buffer_head * get_hash_table(kdev_t dev, sector_t block, int size)
 {
 	struct buffer_head *bh;
 
@@ -691,7 +692,6 @@
 
 void set_blocksize(kdev_t dev, int size)
 {
-	extern int *blksize_size[];
 	int i, nlist, slept;
 	struct buffer_head * bh, * bh_next;
 
@@ -738,8 +738,10 @@
 			if (!atomic_read(&bh->b_count)) {
 				if (buffer_dirty(bh))
 					printk(KERN_WARNING
-					       "set_blocksize: dev %s buffer_dirty %lu size %hu\n",
-					       kdevname(dev), bh->b_blocknr, bh->b_size);
+					       "set_blocksize: dev %s buffer_dirty %Lu size %hu\n",
+					kdevname(dev),
+					(unsigned long long) bh->b_blocknr,
+					bh->b_size);
 				remove_inode_queue(bh);
 				__remove_from_queues(bh);
 				put_last_free(bh);
@@ -749,9 +751,10 @@
 				clear_bit(BH_Uptodate, &bh->b_state);
 				printk(KERN_WARNING
 				       "set_blocksize: "
-				       "b_count %d, dev %s, block %lu, from %p\n",
+				       "b_count %d, dev %s, block %Lu, from %p\n",
 				       atomic_read(&bh->b_count), bdevname(bh->b_dev),
-				       bh->b_blocknr, __builtin_return_address(0));
+				       (unsigned long long) bh->b_blocknr,
+					__builtin_return_address(0));
 			}
 			write_unlock(&hash_table_lock);
 			if (slept)
@@ -997,7 +1000,7 @@
  * 14.02.92: changed it to sync dirty buffers a bit: better performance
  * when the filesystem starts to get full of dirty blocks (I hope).
  */
-struct buffer_head * getblk(kdev_t dev, int block, int size)
+struct buffer_head * getblk(kdev_t dev, sector_t block, int size)
 {
 	struct buffer_head * bh;
 	int isize;
@@ -1977,7 +1980,8 @@
 	goto done;
 }
 
-int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block)
+sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
+			    get_block_t *get_block)
 {
 	struct buffer_head tmp;
 	struct inode *inode = mapping->host;
@@ -1988,57 +1992,6 @@
 }
 
 /*
- * IO completion routine for a buffer_head being used for kiobuf IO: we
- * can't dispatch the kiobuf callback until io_count reaches 0.  
- */
-
-static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
-{
-	struct kiobuf *kiobuf;
-	
-	mark_buffer_uptodate(bh, uptodate);
-
-	kiobuf = bh->b_private;
-	unlock_buffer(bh);
-	end_kio_request(kiobuf, uptodate);
-}
-
-/*
- * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
- * for them to complete.  Clean up the buffer_heads afterwards.  
- */
-
-static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
-{
-	int iosize, err;
-	int i;
-	struct buffer_head *tmp;
-
-	iosize = 0;
-	err = 0;
-
-	for (i = nr; --i >= 0; ) {
-		iosize += size;
-		tmp = bh[i];
-		if (buffer_locked(tmp)) {
-			wait_on_buffer(tmp);
-		}
-		
-		if (!buffer_uptodate(tmp)) {
-			/* We are traversing bh'es in reverse order so
-                           clearing iosize on error calculates the
-                           amount of IO before the first error. */
-			iosize = 0;
-			err = -EIO;
-		}
-	}
-	
-	if (iosize)
-		return iosize;
-	return err;
-}
-
-/*
  * Start I/O on a physical range of kernel memory, defined by a vector
  * of kiobuf structs (much like a user-space iovec list).
  *
@@ -2050,21 +2003,14 @@
  * passed in to completely map the iobufs to disk.
  */
 
-int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
-	       kdev_t dev, unsigned long b[], int size)
+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, sector_t b[],
+	       int size)
 {
-	int		err;
-	int		length;
 	int		transferred;
 	int		i;
-	int		bufind;
 	int		pageind;
-	int		bhind;
-	int		offset;
-	unsigned long	blocknr;
-	struct kiobuf *	iobuf = NULL;
+	struct kiobuf *	iobuf;
 	struct page *	map;
-	struct buffer_head *tmp, **bhs = NULL;
 
 	if (!nr)
 		return 0;
@@ -2074,92 +2020,37 @@
 	 */
 	for (i = 0; i < nr; i++) {
 		iobuf = iovec[i];
-		if ((iobuf->offset & (size-1)) ||
-		    (iobuf->length & (size-1)))
+		if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1)))
 			return -EINVAL;
 		if (!iobuf->nr_pages)
 			panic("brw_kiovec: iobuf not initialised");
+		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
+			map = iobuf->maplist[pageind];
+			if (!map)
+				return -EFAULT;
+		}
 	}
 
 	/* 
 	 * OK to walk down the iovec doing page IO on each page we find. 
 	 */
-	bufind = bhind = transferred = err = 0;
 	for (i = 0; i < nr; i++) {
 		iobuf = iovec[i];
-		offset = iobuf->offset;
-		length = iobuf->length;
 		iobuf->errno = 0;
-		if (!bhs)
-			bhs = iobuf->bh;
-		
-		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
-			map  = iobuf->maplist[pageind];
-			if (!map) {
-				err = -EFAULT;
-				goto finished;
-			}
-			
-			while (length > 0) {
-				blocknr = b[bufind++];
-				tmp = bhs[bhind++];
-
-				tmp->b_dev = B_FREE;
-				tmp->b_size = size;
-				set_bh_page(tmp, map, offset);
-				tmp->b_this_page = tmp;
-
-				init_buffer(tmp, end_buffer_io_kiobuf, iobuf);
-				tmp->b_dev = dev;
-				tmp->b_blocknr = blocknr;
-				tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req);
-
-				if (rw == WRITE) {
-					set_bit(BH_Uptodate, &tmp->b_state);
-					clear_bit(BH_Dirty, &tmp->b_state);
-				} else
-					set_bit(BH_Uptodate, &tmp->b_state);
-
-				length -= size;
-				offset += size;
-
-				atomic_inc(&iobuf->io_count);
-				submit_bh(rw, tmp);
-				/* 
-				 * Wait for IO if we have got too much 
-				 */
-				if (bhind >= KIO_MAX_SECTORS) {
-					kiobuf_wait_for_io(iobuf); /* wake-one */
-					err = wait_kio(rw, bhind, bhs, size);
-					if (err >= 0)
-						transferred += err;
-					else
-						goto finished;
-					bhind = 0;
-				}
-				
-				if (offset >= PAGE_SIZE) {
-					offset = 0;
-					break;
-				}
-			} /* End of block loop */
-		} /* End of page loop */		
-	} /* End of iovec loop */
-
-	/* Is there any IO still left to submit? */
-	if (bhind) {
-		kiobuf_wait_for_io(iobuf); /* wake-one */
-		err = wait_kio(rw, bhind, bhs, size);
-		if (err >= 0)
-			transferred += err;
-		else
-			goto finished;
-	}
-
- finished:
-	if (transferred)
-		return transferred;
-	return err;
+		ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9));
+	}
+
+	/*
+	 * now they are all submitted, wait for completion
+	 */
+	transferred = 0;
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		kiobuf_wait_for_io(iobuf);
+		transferred += iobuf->length;
+	}
+
+	return transferred;
 }
 
 /*
@@ -2174,7 +2065,7 @@
  * FIXME: we need a swapper_inode->get_block function to remove
  *        some of the bmap kludges and interface ugliness here.
  */
-int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
+int brw_page(int rw, struct page *page, kdev_t dev, sector_t b[], int size)
 {
 	struct buffer_head *head, *bh;
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/iobuf.c linux/fs/iobuf.c
--- /opt/kernel/linux-2.4.7/fs/iobuf.c	Fri Apr 27 23:23:25 2001
+++ linux/fs/iobuf.c	Wed Jan  1 00:07:23 1997
@@ -8,7 +8,6 @@
 
 #include <linux/iobuf.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 
 void end_kio_request(struct kiobuf *kiobuf, int uptodate)
 {
@@ -26,52 +25,23 @@
 {
 	memset(iobuf, 0, sizeof(*iobuf));
 	init_waitqueue_head(&iobuf->wait_queue);
+	atomic_set(&iobuf->io_count, 0);
 	iobuf->array_len = KIO_STATIC_PAGES;
 	iobuf->maplist   = iobuf->map_array;
 }
 
-int alloc_kiobuf_bhs(struct kiobuf * kiobuf)
-{
-	int i;
-
-	for (i = 0; i < KIO_MAX_SECTORS; i++)
-		if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) {
-			while (i--) {
-				kmem_cache_free(bh_cachep, kiobuf->bh[i]);
-				kiobuf->bh[i] = NULL;
-			}
-			return -ENOMEM;
-		}
-	return 0;
-}
-
-void free_kiobuf_bhs(struct kiobuf * kiobuf)
-{
-	int i;
-
-	for (i = 0; i < KIO_MAX_SECTORS; i++) {
-		kmem_cache_free(bh_cachep, kiobuf->bh[i]);
-		kiobuf->bh[i] = NULL;
-	}
-}
-
 int alloc_kiovec(int nr, struct kiobuf **bufp)
 {
 	int i;
 	struct kiobuf *iobuf;
 	
 	for (i = 0; i < nr; i++) {
-		iobuf = vmalloc(sizeof(struct kiobuf));
+		iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL);
 		if (!iobuf) {
 			free_kiovec(i, bufp);
 			return -ENOMEM;
 		}
 		kiobuf_init(iobuf);
- 		if (alloc_kiobuf_bhs(iobuf)) {
-			vfree(iobuf);
- 			free_kiovec(i, bufp);
- 			return -ENOMEM;
- 		}
 		bufp[i] = iobuf;
 	}
 	
@@ -89,8 +59,7 @@
 			unlock_kiovec(1, &iobuf);
 		if (iobuf->array_len > KIO_STATIC_PAGES)
 			kfree (iobuf->maplist);
-		free_kiobuf_bhs(iobuf);
-		vfree(bufp[i]);
+		kfree(bufp[i]);
 	}
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/partitions/check.c linux/fs/partitions/check.c
--- /opt/kernel/linux-2.4.7/fs/partitions/check.c	Wed Jul 11 23:55:41 2001
+++ linux/fs/partitions/check.c	Tue Jul 24 15:32:25 2001
@@ -1,4 +1,6 @@
 /*
+ *  fs/partitions/check.c
+ *
  *  Code extracted from drivers/block/genhd.c
  *  Copyright (C) 1991-1998  Linus Torvalds
  *  Re-organised Feb 1998 Russell King
@@ -33,12 +35,11 @@
 #include "ibm.h"
 #include "ultrix.h"
 
-extern int *blk_size[];
-
 struct gendisk *gendisk_head;
 int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
 
-static int (*check_part[])(struct gendisk *hd, kdev_t dev, unsigned long first_sect, int first_minor) = {
+static int (*check_part[])(struct gendisk *hd, kdev_t dev,
+			   unsigned long first_sect, int first_minor) = {
 #ifdef CONFIG_ACORN_PARTITION
 	acorn_partition,
 #endif
@@ -250,8 +251,9 @@
 				char buf[64];
 
 				len += sprintf(page + len,
-					       "%4d  %4d %10d %s\n",
-					       dsk->major, n, dsk->sizes[n],
+					       "%4d  %4d %10Lu %s\n",
+					       dsk->major, n,
+					       (u64) dsk->sizes[n],
 					       disk_name(dsk, n, buf));
 				if (len < offset)
 					offset -= len, len = 0;
@@ -409,32 +411,85 @@
 {
 	if (!gdev)
 		return;
-	grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size);
+	grok_partitions(dev, size);
 }
 
-void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size)
+void grok_partitions(kdev_t dev, long size)
 {
-	int i;
-	int first_minor	= drive << dev->minor_shift;
-	int end_minor	= first_minor + dev->max_p;
+	int i, minors, first_minor, end_minor;
+	struct gendisk *g = get_gendisk(dev);
 
-	if(!dev->sizes)
-		blk_size[dev->major] = NULL;
+	if (!g)
+		return;
+
+	minors = 1 << g->minor_shift;
+	first_minor = MINOR(dev);
+	if (first_minor & (minors-1)) {
+		printk("grok_partitions: bad device 0x%02x:%02x\n",
+		       MAJOR(dev), first_minor);
+		first_minor &= ~(minors-1);
+	}
+	end_minor = first_minor + minors;
+ 
+	if (!g->sizes)
+		blk_size[g->major] = NULL;
+
+	g->part[first_minor].nr_sects = size;
 
-	dev->part[first_minor].nr_sects = size;
 	/* No such device or no minors to use for partitions */
 	if (!size || minors == 1)
 		return;
 
-	check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor);
+	check_partition(g, MKDEV(g->major, first_minor), 1 + first_minor);
 
  	/*
  	 * We need to set the sizes array before we will be able to access
  	 * any of the partitions on this device.
  	 */
-	if (dev->sizes != NULL) {	/* optional safeguard in ll_rw_blk.c */
+	if (g->sizes != NULL) {	/* optional safeguard in ll_rw_blk.c */
 		for (i = first_minor; i < end_minor; i++)
-			dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9);
-		blk_size[dev->major] = dev->sizes;
+			g->sizes[i] = g->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9);
+		blk_size[g->major] = (sector_t *) g->sizes;
+ 
+	}
+}
+
+int wipe_partitions(kdev_t dev)
+{
+	struct gendisk *g;
+	kdev_t devp;
+	int p, major, minor, minor0, max_p, res;
+
+	g = get_gendisk(dev);
+	if (g == NULL)
+		return -EINVAL;
+
+	max_p = 1 << g->minor_shift;
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	minor0 = minor & ~(max_p - 1);
+	if (minor0 != minor)		/* for now only whole-disk reread */
+		return -EINVAL;		/* %%% later.. */
+
+	/* invalidate stuff */
+	for (p = max_p - 1; p >= 0; p--) {
+		minor = minor0 + p;
+		devp = MKDEV(major,minor);
+#if 0					/* %%% superfluous? */
+		if (g->part[minor].nr_sects == 0)
+			continue;
+#endif
+		res = invalidate_device(devp, 1);
+		if (res)
+			return res;
+		g->part[minor].start_sect = 0;
+		g->part[minor].nr_sects = 0;
 	}
+
+	/* some places do blksize_size[major][minor] = 1024,
+	   as preparation for reading partition table - superfluous */
+	/* sd.c used to set blksize_size to 2048 in case
+	   rscsi_disks[target].device->sector_size == 2048 */
+
+	return 0;
 }
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/partitions/check.h linux/fs/partitions/check.h
--- /opt/kernel/linux-2.4.7/fs/partitions/check.h	Thu Feb 17 00:42:06 2000
+++ linux/fs/partitions/check.h	Wed Jan  1 00:07:23 1997
@@ -1,5 +1,5 @@
 /*
- * add_partition adds a partitions details to the devices partition
+ * add_gd_partition adds a partitions details to the devices partition
  * description.
  */
 void add_gd_partition(struct gendisk *hd, int minor, int start, int size);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/partitions/ibm.c linux/fs/partitions/ibm.c
--- /opt/kernel/linux-2.4.7/fs/partitions/ibm.c	Sun May 20 21:11:39 2001
+++ linux/fs/partitions/ibm.c	Wed Jan  1 00:07:23 1997
@@ -29,13 +29,11 @@
 #include "check.h"
 #include <asm/vtoc.h>
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,3,98))
 /* We hook in when DASD is a module... */
 int (*genhd_dasd_name)(char*,int,int,struct gendisk*) = NULL;
 int (*genhd_dasd_fillgeo)(int,struct hd_geometry *) = NULL;
 EXPORT_SYMBOL(genhd_dasd_fillgeo);
 EXPORT_SYMBOL(genhd_dasd_name);
-#endif /* LINUX_IS_24 */
 
 typedef enum {
   ibm_partition_lnx1 = 0,
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/udf/lowlevel.c linux/fs/udf/lowlevel.c
--- /opt/kernel/linux-2.4.7/fs/udf/lowlevel.c	Tue Jun 12 04:15:27 2001
+++ linux/fs/udf/lowlevel.c	Tue Jul 24 15:04:44 2001
@@ -1,5 +1,5 @@
 /*
- * lowlevel.c
+ * fs/udf/lowlevel.c
  *
  * PURPOSE
  *  Low Level Device Routines for the UDF filesystem
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/highmem.h	Fri Jul 20 21:52:18 2001
+++ linux/include/asm-i386/highmem.h	Wed Jan  1 00:07:23 1997
@@ -74,17 +74,15 @@
 	kunmap_high(page);
 }
 
-/*
- * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
- * gives a more generic (and caching) interface. But kmap_atomic can
- * be used in IRQ contexts, so in some (very limited) cases we need
- * it.
- */
-static inline void *kmap_atomic(struct page *page, enum km_type type)
+static inline void *__kmap_atomic(struct page *page, enum km_type type)
 {
 	enum fixed_addresses idx;
 	unsigned long vaddr;
 
+	/*
+	 * could be moved outside __cli context, but then caller would
+	 * have to check...
+	 */
 	if (page < highmem_start_page)
 		return page_address(page);
 
@@ -100,7 +98,26 @@
 	return (void*) vaddr;
 }
 
-static inline void kunmap_atomic(void *kvaddr, enum km_type type)
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need
+ * it.
+ */
+static inline void *kmap_atomic(struct page *page, enum km_type type)
+{
+	unsigned long flags;
+	void *vaddr;
+
+	__save_flags(flags);
+	__cli();
+	vaddr = __kmap_atomic(page, type);
+	__restore_flags(flags);
+
+	return vaddr;
+}
+
+static inline void __kunmap_atomic(void *kvaddr, enum km_type type)
 {
 #if HIGHMEM_DEBUG
 	unsigned long vaddr = (unsigned long) kvaddr;
@@ -118,6 +135,18 @@
 	 */
 	pte_clear(kmap_pte-idx);
 	__flush_tlb_one(vaddr);
+#endif
+}
+
+static inline void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+	unsigned long flags;
+
+	__save_flags(flags);
+	__cli();
+	__kunmap_atomic(kvaddr, type);
+	__restore_flags(flags);
 #endif
 }
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/kmap_types.h	Thu Apr 12 21:11:39 2001
+++ linux/include/asm-i386/kmap_types.h	Wed Jan  1 00:07:23 1997
@@ -6,6 +6,7 @@
 	KM_BOUNCE_WRITE,
 	KM_SKB_DATA,
 	KM_SKB_DATA_SOFTIRQ,
+	KM_BIO_IRQ,
 	KM_TYPE_NR
 };
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/page.h linux/include/asm-i386/page.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/page.h	Fri Jul 20 21:52:18 2001
+++ linux/include/asm-i386/page.h	Wed Jan  1 00:07:23 1997
@@ -116,6 +116,8 @@
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define virt_to_page(kaddr)	(mem_map + (__pa(kaddr) >> PAGE_SHIFT))
 #define VALID_PAGE(page)	((page - mem_map) < max_mapnr)
+#define page_to_phys(page)	(((page) - mem_map) * PAGE_SIZE)
+#define page_to_bus(page)	page_to_phys((page))
 
 
 #endif /* __KERNEL__ */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/pci.h linux/include/asm-i386/pci.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/pci.h	Fri Jul 20 21:52:38 2001
+++ linux/include/asm-i386/pci.h	Tue Jul 24 15:30:29 2001
@@ -28,6 +28,7 @@
 
 #include <linux/types.h>
 #include <linux/slab.h>
+#include <linux/highmem.h>
 #include <asm/scatterlist.h>
 #include <linux/string.h>
 #include <asm/io.h>
@@ -84,6 +85,27 @@
 	/* Nothing to do */
 }
 
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct page instead of a virtual address
+ */
+extern inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
+				      size_t size, int offset, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	return (page - mem_map) * PAGE_SIZE + offset;
+}
+
+extern inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+				  size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	/* Nothing to do */
+}
+
 /* Map a set of buffers described by scatterlist in streaming
  * mode for DMA.  This is the scather-gather version of the
  * above pci_map_single interface.  Here the scatter gather list
@@ -102,8 +124,20 @@
 extern inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 			     int nents, int direction)
 {
+	int i;
+
 	if (direction == PCI_DMA_NONE)
 		BUG();
+
+	for (i = 0; i < nents; i++ ) {
+		if (sg[i].address)
+			sg[i].dma_address = virt_to_bus(sg[i].address);
+		else if (sg[i].page)
+			sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+		else
+			BUG();
+	}
+
 	return nents;
 }
 
@@ -119,6 +153,33 @@
 	/* Nothing to do */
 }
 
+/*
+ * meant to replace the pci_map_sg api, new drivers should use this
+ * interface
+ */
+extern inline int pci_map_sgl(struct pci_dev *hwdev, struct sg_list *sg,
+			      int nents, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nents; i++)
+		sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+
+	return nents;
+}
+
+extern inline void pci_unmap_sgl(struct pci_dev *hwdev, struct sg_list *sg,
+				 int nents, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	/* Nothing to do */
+}
+
+
 /* Make physical memory consistent for a single
  * streaming mode DMA translation after a transfer.
  *
@@ -173,10 +234,9 @@
 /* These macros should be used after a pci_map_sg call has been done
  * to get bus addresses of each of the SG entries and their lengths.
  * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
+ * returns.
  */
-#define sg_dma_address(sg)	(virt_to_bus((sg)->address))
+#define sg_dma_address(sg)	((sg)->dma_address)
 #define sg_dma_len(sg)		((sg)->length)
 
 /* Return the index of the PCI controller for device. */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/processor.h linux/include/asm-i386/processor.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/processor.h	Fri Jul 20 21:52:18 2001
+++ linux/include/asm-i386/processor.h	Wed Jan  1 00:07:23 1997
@@ -477,4 +477,32 @@
 	__asm__ __volatile__("rep;nop");
 }
 
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#ifdef 	CONFIG_MPENTIUMIII
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+	__asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+	 __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+	 __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+#endif
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/scatterlist.h linux/include/asm-i386/scatterlist.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/scatterlist.h	Mon Dec 30 12:01:10 1996
+++ linux/include/asm-i386/scatterlist.h	Wed Jan  1 00:07:23 1997
@@ -1,12 +1,59 @@
 #ifndef _I386_SCATTERLIST_H
 #define _I386_SCATTERLIST_H
 
+/*
+ * temporary measure, include a page and offset.
+ */
 struct scatterlist {
-    char *  address;    /* Location data is to be transferred to */
+    struct page * page; /* Location for highmem page, if any */
+    char *  address;    /* Location data is to be transferred to, NULL for
+			 * highmem page */
     char * alt_address; /* Location of actual if address is a 
 			 * dma indirect buffer.  NULL otherwise */
+    dma_addr_t dma_address;
     unsigned int length;
+    unsigned int offset;/* for highmem, page offset */
 };
+
+/*
+ * new style scatter gather list -- move to this completely?
+ */
+struct sg_list {
+	/*
+	 * input
+	 */
+	struct page *page;	/* page to do I/O to */
+	unsigned int length;	/* length of I/O */
+	unsigned int offset;	/* offset into page */
+
+	/*
+	 * original page, if bounced
+	 */
+	struct page *bounce_page;
+
+	/*
+	 * output
+	 */
+	dma_addr_t dma_address;	/* mapped address */
+};
+
+/*
+ * compat function... go to sg_list instead for new stuff!
+ */
+extern inline void set_bio_sg(struct scatterlist *sg, struct bio *bio)
+{
+	if (PageHighMem(bio_page(bio))) {
+		sg->page = bio_page(bio);
+		sg->offset = bio_offset(bio);
+		sg->address = NULL;
+	} else {
+		sg->page = NULL;
+		sg->offset = 0;
+		sg->address = bio_data(bio);
+	}
+
+	sg->length = bio_size(bio);
+}
 
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/spinlock.h	Fri Jul 20 21:52:18 2001
+++ linux/include/asm-i386/spinlock.h	Wed Jan  1 00:07:23 1997
@@ -12,7 +12,7 @@
  * initialize their spinlocks properly, tsk tsk.
  * Remember to turn this off in 2.4. -ben
  */
-#define SPINLOCK_DEBUG	0
+#define SPINLOCK_DEBUG	2
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/uaccess.h linux/include/asm-i386/uaccess.h
--- /opt/kernel/linux-2.4.7/include/asm-i386/uaccess.h	Fri Jul 20 21:52:18 2001
+++ linux/include/asm-i386/uaccess.h	Wed Jan  1 00:07:23 1997
@@ -6,6 +6,7 @@
  */
 #include <linux/config.h>
 #include <linux/sched.h>
+#include <linux/prefetch.h>
 #include <asm/page.h>
 
 #define VERIFY_READ 0
@@ -526,6 +527,7 @@
 static inline unsigned long
 __constant_copy_to_user(void *to, const void *from, unsigned long n)
 {
+	prefetch(from);
 	if (access_ok(VERIFY_WRITE, to, n))
 		__constant_copy_user(to,from,n);
 	return n;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-m68k/machdep.h linux/include/asm-m68k/machdep.h
--- /opt/kernel/linux-2.4.7/include/asm-m68k/machdep.h	Tue Nov 28 02:57:34 2000
+++ linux/include/asm-m68k/machdep.h	Wed Jan  1 00:07:23 1997
@@ -5,7 +5,6 @@
 struct kbd_repeat;
 struct mktime;
 struct hwclk_time;
-struct gendisk;
 struct buffer_head;
 
 extern void (*mach_sched_init) (void (*handler)(int, void *, struct pt_regs *));
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-s390/dasd.h linux/include/asm-s390/dasd.h
--- /opt/kernel/linux-2.4.7/include/asm-s390/dasd.h	Thu Apr 12 04:02:28 2001
+++ linux/include/asm-s390/dasd.h	Wed Jan  1 00:07:23 1997
@@ -319,7 +319,7 @@
         unsigned long dasd_io_times[32];	/* histogram of requests's times */
         unsigned long dasd_io_timps[32];	/* histogram of requests's times per sector */
         unsigned long dasd_io_time1[32];	/* histogram of time from build to start */
-       unsigned  long dasd_io_time2[32];	/* histogram of time from start to irq */
+        unsigned long dasd_io_time2[32];	/* histogram of time from start to irq */
         unsigned long dasd_io_time2ps[32];	/* histogram of time from start to irq */
         unsigned long dasd_io_time3[32];	/* histogram of time from irq to end */
 } dasd_profile_info_t;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-s390x/dasd.h linux/include/asm-s390x/dasd.h
--- /opt/kernel/linux-2.4.7/include/asm-s390x/dasd.h	Thu Apr 12 04:02:29 2001
+++ linux/include/asm-s390x/dasd.h	Wed Jan  1 00:07:23 1997
@@ -319,7 +319,7 @@
         unsigned long dasd_io_times[32];	/* histogram of requests's times */
         unsigned long dasd_io_timps[32];	/* histogram of requests's times per sector */
         unsigned long dasd_io_time1[32];	/* histogram of time from build to start */
-       unsigned  long dasd_io_time2[32];	/* histogram of time from start to irq */
+        unsigned long dasd_io_time2[32];	/* histogram of time from start to irq */
         unsigned long dasd_io_time2ps[32];	/* histogram of time from start to irq */
         unsigned long dasd_io_time3[32];	/* histogram of time from irq to end */
 } dasd_profile_info_t;
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/bio.h linux/include/linux/bio.h
--- /opt/kernel/linux-2.4.7/include/linux/bio.h	Thu Jan  1 01:00:00 1970
+++ linux/include/linux/bio.h	Wed Jan  1 00:07:23 1997
@@ -0,0 +1,199 @@
+/*
+ * New 2.5 block I/O model
+ *
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or mo
+ * it under the terms of the GNU General Public License as publishe
+ * the Free Software Foundation; either version 2 of the License, o
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#ifndef __LINUX_BIO_H
+#define __LINUX_BIO_H
+
+/*
+ * transition to 64-bit sector_t, possibly making it an option...
+ */
+#undef BLK_64BIT_SECTOR
+
+#ifdef BLK_64BIT_SECTOR
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+
+struct bio_vec {
+	struct page	*bv_page;
+	unsigned short	bv_len;
+	unsigned short	bv_offset;
+};
+
+typedef struct bio_hash_s {
+	struct bio_hash_s *next_hash;
+	struct bio_hash_s **pprev_hash;
+	unsigned int valid_counter;
+} bio_hash_t;
+
+#define BLKHASHPROF	_IOR(0x12,108,sizeof(struct bio_hash_stats))
+#define BLKHASHCLEAR	_IO(0x12,109)
+
+#define MAX_PROFILE_BUCKETS	64
+
+struct bio_hash_stats {
+	unsigned long nr_lookups;
+	unsigned long nr_hits;
+	unsigned long nr_inserts;
+	unsigned long max_bucket_size;
+	unsigned long bucket_size[MAX_PROFILE_BUCKETS + 1];
+
+	unsigned long q_nr_back_lookups;
+	unsigned long q_nr_back_hits;
+	unsigned long q_nr_back_merges;
+	unsigned long q_nr_front_lookups;
+	unsigned long q_nr_front_hits;
+	unsigned long q_nr_front_merges;
+};
+
+/*
+ * hash table must be a power of two
+ */
+typedef struct bio_hash_table_s {
+	bio_hash_t **table;
+	unsigned long mask;
+	unsigned int valid_counter;
+
+	struct bio_hash_stats st;
+} bio_hash_table_t;
+
+/*
+ * shamelessly stolen from the list.h implementation
+ */
+#define hash_entry(ptr, type, member)	\
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers)
+ */
+struct bio {
+	kdev_t			bi_dev;
+	sector_t		bi_sector;
+	bio_hash_t		bi_hash;
+	struct bio		*bi_next;	/* request queue link */
+	struct bio_vec		bi_io_vec;
+	unsigned long		bi_flags;	/* status, command, etc */
+	atomic_t		bi_cnt;		/* free when it hits zero */
+	void (*bi_end_io)(struct bio *bio);
+	void			*bi_private;
+	struct request		*bi_req;	/* linked to this request */
+};
+
+#define BIO_SECTOR_BITS	9
+#define BIO_OFFSET_MASK	((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1)
+#define BIO_PAGE_MASK	(PAGE_CACHE_SIZE - 1)
+
+/*
+ * bio flags
+ */
+#define BIO_UPTODATE	1	/* ok after I/O completion */
+#define BIO_READ	2	/* read request */
+#define BIO_WRITE	4	/* write request */
+#define BIO_RW_AHEAD	8	/* read/write ahead */
+#define BIO_BARRIER	16	/* barrier I/O */
+#define BIO_RW_BLOCK	32	/* RW_AHEAD set, and read/write would block */
+#define BIO_EOF		64	/* out-out-bounds error */
+#define BIO_POOL	128	/* from bio pool, not slab cache */
+
+/*
+ * if you change any of the above, make sure this is still correct!!
+ */
+#define BIO_RW_MASK	(BIO_READ + BIO_WRITE + BIO_RW_AHEAD + BIO_BARRIER)
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define bio_iovec(bio)	(&(bio)->bi_io_vec)
+#define bio_page(bio)	bio_iovec((bio))->bv_page
+#define bio_size(bio)	bio_iovec((bio))->bv_len
+#define bio_offset(bio)	bio_iovec((bio))->bv_offset
+#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS)
+#define bio_data(bio)	(page_address(bio_page((bio))) + bio_offset((bio)))
+#define bio_to_bus(bio)	(page_to_bus(bio_page((bio))) + bio_offset((bio)))
+#define bio_barrier(bio)	((bio)->bi_flags & BIO_BARRIER)
+
+/*
+ * queues that have highmem support enabled may still need to revert to
+ * PIO transfers occasionally and thus map high pages temporarily. For
+ * permanent PIO fall back, user is probably better off disabling highmem
+ * I/O completely on that queue (see ide-dma for example)
+ */
+#define bio_kmap(bio)	(kmap(bio_page((bio))) + bio_offset((bio)))
+#define bio_kunmap(bio)	kunmap(bio_page((bio)))
+#define bio_kmap_irq(bio) (kmap_atomic(bio_page((bio)), KM_BIO_IRQ) + bio_offset((bio)))
+#define bio_kunmap_irq(ptr) kunmap_atomic((void *) (((unsigned long) (ptr)) & PAGE_MASK), KM_BIO_IRQ)
+
+#define BIO_CONTIG(bio, nxt) (bio_to_bus((bio)) + bio_size((bio)) == bio_to_bus((nxt)))
+
+typedef void (bi_end_io_t) (struct bio *);
+
+#define bio_endio(bio, ok) do {						\
+				if (ok)					\
+					(bio)->bi_flags |= BIO_UPTODATE;\
+				else					\
+					(bio)->bi_flags &= ~BIO_UPTODATE; \
+				(bio)->bi_end_io((bio));		\
+			   } while (0)
+
+#define bio_io_error(bio) bio_endio((bio), 0)
+
+/*
+ * get a reference to a bio, so it won't disappear. the intended use is
+ * something like:
+ *
+ * bio_get(bio);
+ * submit_bio(rw, bio);
+ * if (bio->bi_flags ...)
+ *	do_something
+ * bio_put(bio);
+ *
+ * without the bio_get(), it could potentially complete I/O before submit_bio
+ * returns. and then bio would be freed memory when if (bio->bi_flags ...)
+ * runs
+ */
+#define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
+
+struct request_queue;
+
+extern struct bio *bio_alloc(int);
+extern inline void bio_put(struct bio *);
+extern struct bio_vec *biovec_alloc(int);
+extern void biovec_free(struct bio_vec *);
+
+extern int bio_hash_init(bio_hash_table_t *, int);
+extern void bio_hash_cleanup(bio_hash_table_t *);
+extern inline void bio_hash_remove(struct request_queue *, struct bio *);
+extern inline void bio_hash_add(struct request_queue *, struct bio *);
+extern inline struct bio *bio_hash_find(struct request_queue *, sector_t);
+extern inline struct bio *__bio_hash_find(bio_hash_table_t *, sector_t);
+extern inline int bio_hash_add_unique(struct request_queue *, struct bio *);
+extern inline int __bio_hash_add_unique(bio_hash_table_t *, struct bio *);
+
+/*
+ * increment valid_counter, and make sure that wraps go to 1 and not 0
+ */
+#define __bio_hash_inval(htable) do {					\
+					if (!++(htable)->valid_counter)	\
+						(htable)->valid_counter = 1; \
+				} while (0)
+
+#endif /* __LINUX_BIO_H */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/blk.h linux/include/linux/blk.h
--- /opt/kernel/linux-2.4.7/include/linux/blk.h	Fri Jul 20 21:53:47 2001
+++ linux/include/linux/blk.h	Tue Jul 24 15:30:40 2001
@@ -7,13 +7,6 @@
 #include <linux/spinlock.h>
 
 /*
- * Spinlock for protecting the request queue which
- * is mucked around with in interrupts on potentially
- * multiple CPU's..
- */
-extern spinlock_t io_request_lock;
-
-/*
  * Initialization functions.
  */
 extern int isp16_init(void);
@@ -85,13 +78,14 @@
  * code duplication in drivers.
  */
 
-static inline void blkdev_dequeue_request(struct request * req)
+static inline void blkdev_dequeue_request(struct request *req)
 {
-	list_del(&req->queue);
+	list_del(&req->queuelist);
 }
 
-int end_that_request_first(struct request *req, int uptodate, char *name);
-void end_that_request_last(struct request *req);
+int end_that_request_first(request_queue_t *, struct request *, int uptodate);
+extern inline int __end_that_request_first(struct request *, int uptodate);
+void end_that_request_last(struct request *);
 
 #if defined(MAJOR_NR) || defined(IDE_DRIVER)
 
@@ -336,12 +330,16 @@
 #if !defined(IDE_DRIVER)
 
 #ifndef CURRENT
-#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#define CURRENT elv_next_request(&blk_dev[MAJOR_NR].request_queue)
+#endif
+#ifndef QUEUE
+#define QUEUE (&blk_dev[MAJOR_NR].request_queue)
 #endif
 #ifndef QUEUE_EMPTY
-#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#define QUEUE_EMPTY blk_queue_empty(QUEUE)
 #endif
 
+
 #ifndef DEVICE_NAME
 #define DEVICE_NAME "unknown"
 #endif
@@ -365,16 +363,14 @@
 #endif
 
 #define INIT_REQUEST \
-	if (QUEUE_EMPTY) {\
+	if (QUEUE_EMPTY) { \
 		CLEAR_INTR; \
-		return; \
+		return;	 \
 	} \
 	if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
 		panic(DEVICE_NAME ": request list destroyed"); \
-	if (CURRENT->bh) { \
-		if (!buffer_locked(CURRENT->bh)) \
-			panic(DEVICE_NAME ": block not locked"); \
-	}
+	if (!CURRENT->bio) \
+		panic(DEVICE_NAME ": no bio"); \
 
 #endif /* !defined(IDE_DRIVER) */
 
@@ -383,18 +379,26 @@
 
 #if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
 
-static inline void end_request(int uptodate) {
-	struct request *req = CURRENT;
-
-	if (end_that_request_first(req, uptodate, DEVICE_NAME))
-		return;
-
+static inline void __end_request(struct request *req, int uptodate)
+{
+	if (!__end_that_request_first(req, uptodate)) {
 #ifndef DEVICE_NO_RANDOM
-	add_blkdev_randomness(MAJOR(req->rq_dev));
+		add_blkdev_randomness(MAJOR(req->rq_dev));
 #endif
-	DEVICE_OFF(req->rq_dev);
-	blkdev_dequeue_request(req);
-	end_that_request_last(req);
+		DEVICE_OFF(req->rq_dev);
+		blkdev_dequeue_request(req);
+		end_that_request_last(req);
+	}
+}
+
+static inline void end_request(int uptodate)
+{
+	request_queue_t *q = CURRENT->q;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	__end_request(CURRENT, uptodate);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 
 #endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/blkdev.h linux/include/linux/blkdev.h
--- /opt/kernel/linux-2.4.7/include/linux/blkdev.h	Fri Jul 20 21:53:09 2001
+++ linux/include/linux/blkdev.h	Tue Jul 24 15:29:49 2001
@@ -12,57 +12,46 @@
 struct elevator_s;
 typedef struct elevator_s elevator_t;
 
-/*
- * Ok, this is an expanded form so that we can use the same
- * request for paging requests when that is implemented. In
- * paging, 'bh' is NULL, and the completion is used to wait
- * for the IO to be ready.
- */
 struct request {
-	struct list_head queue;
+	struct list_head queuelist; /* look for ->queue? you must _not_
+				     * access it directly, use
+				     * blkdev_dequeue_request! */
 	int elevator_sequence;
-	struct list_head table;
 
 	volatile int rq_status;	/* should split this into a few status bits */
-#define RQ_INACTIVE		(-1)
-#define RQ_ACTIVE		1
-#define RQ_SCSI_BUSY		0xffff
-#define RQ_SCSI_DONE		0xfffe
-#define RQ_SCSI_DISCONNECTING	0xffe0
-
 	kdev_t rq_dev;
 	int cmd;		/* READ or WRITE */
 	int errors;
-	unsigned long sector;
+	sector_t sector;
 	unsigned long nr_sectors;
 	unsigned long hard_sector, hard_nr_sectors;
-	unsigned int nr_segments;
-	unsigned int nr_hw_segments;
-	unsigned long current_nr_sectors;
+	unsigned short nr_segments;
+	unsigned short nr_hw_segments;
+	unsigned short current_nr_sectors;
+	unsigned short hard_cur_sectors;
 	void * special;
 	char * buffer;
 	struct completion * waiting;
-	struct buffer_head * bh;
-	struct buffer_head * bhtail;
+	struct bio *bio, *biotail;
 	request_queue_t *q;
 };
 
 #include <linux/elevator.h>
 
-typedef int (merge_request_fn) (request_queue_t *q, 
-				struct request  *req,
-				struct buffer_head *bh,
-				int);
-typedef int (merge_requests_fn) (request_queue_t *q, 
-				 struct request  *req,
-				 struct request  *req2,
-				 int);
+typedef int (merge_request_fn) (request_queue_t *, struct request *,
+				struct bio *);
+typedef int (merge_requests_fn) (request_queue_t *, struct request *,
+				 struct request *);
 typedef void (request_fn_proc) (request_queue_t *q);
 typedef request_queue_t * (queue_proc) (kdev_t dev);
-typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh);
-typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
+typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
 typedef void (unplug_device_fn) (void *q);
 
+enum blk_queue_state {
+	Queue_down,
+	Queue_up,
+};
+
 /*
  * Default nr free requests per queue, ll_rw_blk will scale it down
  * according to available RAM at init time
@@ -89,7 +78,7 @@
 	merge_request_fn	* front_merge_fn;
 	merge_requests_fn	* merge_requests_fn;
 	make_request_fn		* make_request_fn;
-	plug_device_fn		* plug_device_fn;
+
 	/*
 	 * The queue owner gets to use this for whatever they like.
 	 * ll_rw_blk doesn't touch it.
@@ -97,33 +86,112 @@
 	void			* queuedata;
 
 	/*
-	 * This is used to remove the plug when tq_disk runs.
+	 * queue needs bounce pages for pages above this limit (phys addr)
 	 */
-	struct tq_struct	plug_tq;
+	struct page		*bounce_limit;
+	int			bounce_gfp;
 
 	/*
-	 * Boolean that indicates whether this queue is plugged or not.
+	 * This is used to remove the plug when tq_disk runs.
 	 */
-	char			plugged;
+	struct tq_struct	plug_tq;
 
 	/*
-	 * Boolean that indicates whether current_request is active or
-	 * not.
+	 * various queue flags, see QUEUE_* below
 	 */
-	char			head_active;
+	unsigned long		queue_flags;
 
 	/*
-	 * Is meant to protect the queue in the future instead of
-	 * io_request_lock
+	 * protects queue structures from reentrancy
 	 */
 	spinlock_t		queue_lock;
 
 	/*
 	 * Tasks wait here for free request
 	 */
-	wait_queue_head_t	wait_for_request;
+	wait_queue_head_t	wait_for_request[2];
+
+	/*
+	 * queue settings
+	 */
+	unsigned short		max_sectors;
+	unsigned short		max_segments;
+	unsigned short		hardsect_size;
+
+	/*
+	 * queue state
+	 */
+	enum blk_queue_state	queue_state;
+
+	/*
+	 * bio hash table
+	 */
+	bio_hash_table_t	queue_hash;
 };
 
+#define RQ_INACTIVE		(-1)
+#define RQ_ACTIVE		1
+#define RQ_IDE_MAPPED		2
+#define RQ_SCSI_BUSY		0xffff
+#define RQ_SCSI_DONE		0xfffe
+#define RQ_SCSI_DISCONNECTING	0xffe0
+
+#define QUEUE_FLAG_PLUGGED	0	/* queue is plugged */
+#define QUEUE_FLAG_HEADACTIVE	1	/* has active head (going away) */
+
+#define blk_queue_flag(q, flag)	test_bit(QUEUE_FLAG_##flag, &(q)->queue_flags)
+#define blk_set_flag(q, flag)	set_bit(QUEUE_FLAG_##flag, &(q)->queue_flags)
+#define blk_clear_flag(q, flag)	clear_bit(QUEUE_FLAG_##flag, &(q)->queue_flags)
+
+#define blk_queue_plugged(q)	blk_queue_flag(q, PLUGGED)
+#define blk_queue_headlive(q)	blk_queue_flag(q, HEADACTIVE)
+
+#define blk_mark_plugged(q)	blk_set_flag(q, PLUGGED)
+#define blk_mark_headactive(q)	blk_set_flag(q, HEADACTIVE)
+
+#define blk_set_unplugged(q)	test_and_clear_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
+#define blk_set_plugged(q)	test_and_set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
+
+#define blk_queue_empty(q)	list_empty(&(q)->queue_head)
+
+#ifdef CONFIG_HIGHMEM
+extern struct bio *create_bounce(struct bio *, int);
+extern inline struct bio *blk_queue_bounce(request_queue_t *q, struct bio *bio)
+{
+	if (bio_page(bio) <= q->bounce_limit)
+		return bio;
+
+	return create_bounce(bio, q->bounce_gfp);
+}
+#else
+#define blk_queue_bounce(q, bio) 	(bio)
+#endif
+
+extern unsigned long blk_max_low_pfn;
+
+static inline struct request *elv_next_request(request_queue_t *q)
+{
+	return q->elevator.elevator_next_req_fn(q);
+}
+
+static inline void elv_add_request_fn(request_queue_t *q, struct request *rq,
+				      struct list_head *insert_here)
+{
+	/*
+	 * insert into queue pending list, merge hash, and possible latency
+	 * list
+	 */
+	list_add(&rq->queuelist, insert_here);
+}
+
+static inline struct request *elv_next_request_fn(request_queue_t *q)
+{
+	return list_entry(q->queue_head.next, struct request, queuelist);
+}
+
+#define BLK_BOUNCE_HIGH		(blk_max_low_pfn * PAGE_SIZE)
+#define BLK_BOUNCE_4G		(0xffffffff)
+
 struct blk_dev_struct {
 	/*
 	 * queue_proc has to be atomic
@@ -148,61 +216,68 @@
 
 extern struct sec_size * blk_sec[MAX_BLKDEV];
 extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
-extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size);
+extern void grok_partitions(kdev_t dev, long size);
+extern int wipe_partitions(kdev_t dev);
 extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
-extern void generic_make_request(int rw, struct buffer_head * bh);
-extern request_queue_t *blk_get_queue(kdev_t dev);
-extern inline request_queue_t *__blk_get_queue(kdev_t dev);
+extern void generic_make_request(struct bio *bio);
+extern inline request_queue_t *blk_get_queue(kdev_t dev);
 extern void blkdev_release_request(struct request *);
+extern inline void blk_wake_queue(request_queue_t *);
+extern void blk_attempt_remerge(request_queue_t *, struct request *);
 
 /*
  * Access functions for manipulating queue properties
  */
-extern void blk_init_queue(request_queue_t *, request_fn_proc *);
+extern int blk_init_queue(request_queue_t *, request_fn_proc *);
 extern void blk_cleanup_queue(request_queue_t *);
 extern void blk_queue_headactive(request_queue_t *, int);
 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long);
+extern void blk_queue_max_sectors(request_queue_t *q, unsigned short);
+extern void blk_queue_max_segments(request_queue_t *q, unsigned short);
+extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short);
 extern void generic_unplug_device(void *);
 
-extern int * blk_size[MAX_BLKDEV];
+extern sector_t * blk_size[MAX_BLKDEV];
 
 extern int * blksize_size[MAX_BLKDEV];
 
-extern int * hardsect_size[MAX_BLKDEV];
-
 extern int * max_readahead[MAX_BLKDEV];
 
-extern int * max_sectors[MAX_BLKDEV];
-
-extern int * max_segments[MAX_BLKDEV];
-
 extern atomic_t queued_sectors;
 
 #define MAX_SEGMENTS 128
 #define MAX_SECTORS 255
 
-#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
-
 /* read-ahead in pages.. */
 #define MAX_READAHEAD	31
 #define MIN_READAHEAD	3
 
-#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue)
+#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 #define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
 #define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
-#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next)
-#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev)
+#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next)
+#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev)
 
 extern void drive_stat_acct (kdev_t dev, int rw,
 					unsigned long nr_sectors, int new_io);
 
+static inline void blk_clear(int major)
+{
+	blk_size[major] = NULL;
+#if 0
+	blk_size_in_bytes[major] = NULL;
+#endif
+	blksize_size[major] = NULL;
+	max_readahead[major] = NULL;
+	read_ahead[major] = 0;
+}
+
 static inline int get_hardsect_size(kdev_t dev)
 {
-	extern int *hardsect_size[];
-	if (hardsect_size[MAJOR(dev)] != NULL)
-		return hardsect_size[MAJOR(dev)][MINOR(dev)];
-	else
-		return 512;
+	request_queue_t *q = blk_get_queue(dev);
+
+	return q ? q->hardsect_size : 512;
 }
 
 #define blk_finished_io(nsects)				\
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/cdrom.h linux/include/linux/cdrom.h
--- /opt/kernel/linux-2.4.7/include/linux/cdrom.h	Fri Jul 20 21:53:03 2001
+++ linux/include/linux/cdrom.h	Wed Jan  1 00:07:23 1997
@@ -577,6 +577,8 @@
 	struct dvd_manufact	manufact;
 } dvd_struct;
 
+#define CDROM_MAX_CDROMS	256
+
 /*
  * DVD authentication ioctl
  */
@@ -733,6 +735,7 @@
 	int number;			/* generic driver updates this  */
 /* specifications */
         kdev_t dev;	                /* device number */
+	int nr;				/* cdrom entry */
 	int mask;                       /* mask of capability: disables them */
 	int speed;			/* maximum speed for reading data */
 	int capacity;			/* number of discs in jukebox */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/devfs_fs_kernel.h linux/include/linux/devfs_fs_kernel.h
--- /opt/kernel/linux-2.4.7/include/linux/devfs_fs_kernel.h	Fri Jul 20 21:52:57 2001
+++ linux/include/linux/devfs_fs_kernel.h	Wed Jan  1 00:07:23 1997
@@ -3,7 +3,7 @@
 
 #include <linux/fs.h>
 #include <linux/config.h>
-#include <linux/locks.h>
+#include <linux/spinlock.h>
 #include <linux/kdev_t.h>
 #include <linux/types.h>
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/elevator.h linux/include/linux/elevator.h
--- /opt/kernel/linux-2.4.7/include/linux/elevator.h	Fri Feb 16 01:58:34 2001
+++ linux/include/linux/elevator.h	Wed Jan  1 00:07:23 1997
@@ -5,13 +5,19 @@
 			    struct list_head *,
 			    struct list_head *, int);
 
-typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
-				 struct buffer_head *, int, int);
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
+				 struct list_head *, struct bio *);
 
 typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
 
 typedef void (elevator_merge_req_fn) (struct request *, struct request *);
 
+typedef struct request *(elevator_next_req_fn) (request_queue_t *);
+
+typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *);
+
+typedef int (elevator_init_fn) (request_queue_t *, elevator_t *);
+
 struct elevator_s
 {
 	int read_latency;
@@ -21,14 +27,24 @@
 	elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
 
+	elevator_next_req_fn *elevator_next_req_fn;
+	elevator_add_req_fn *elevator_add_req_fn;
+
+	elevator_init_fn *elevator_init_fn;
+
+	/*
+	 * per-elevator private data
+	 */
+	void *elevator_data;
+
 	unsigned int queue_ID;
 };
 
-int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *);
 void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
 void elevator_noop_merge_req(struct request *, struct request *);
 
-int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *);
 void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
 void elevator_linus_merge_req(struct request *, struct request *);
 
@@ -45,7 +61,7 @@
 extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *);
 extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *);
 
-extern void elevator_init(elevator_t *, elevator_t);
+extern int elevator_init(request_queue_t *, elevator_t *, elevator_t);
 
 /*
  * Return values from elevator merger
@@ -99,6 +115,8 @@
 	elevator_linus_merge,		/* elevator_merge_fn */		\
 	elevator_linus_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
 	elevator_linus_merge_req,	/* elevator_merge_req_fn */	\
+	elv_next_request_fn,						\
+	elv_add_request_fn,						\
 	})
 
 #endif
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/fs.h linux/include/linux/fs.h
--- /opt/kernel/linux-2.4.7/include/linux/fs.h	Fri Jul 20 21:52:18 2001
+++ linux/include/linux/fs.h	Wed Jan  1 00:07:23 1997
@@ -21,6 +21,7 @@
 #include <linux/cache.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <linux/bio.h>
 
 #include <asm/atomic.h>
 #include <asm/bitops.h>
@@ -235,32 +236,31 @@
 struct buffer_head {
 	/* First cache line: */
 	struct buffer_head *b_next;	/* Hash queue list */
-	unsigned long b_blocknr;	/* block number */
+	sector_t b_blocknr;		/* block number */
 	unsigned short b_size;		/* block size */
 	unsigned short b_list;		/* List that this buffer appears */
 	kdev_t b_dev;			/* device (B_FREE = free) */
 
 	atomic_t b_count;		/* users using this block */
-	kdev_t b_rdev;			/* Real device */
 	unsigned long b_state;		/* buffer state bitmap (see above) */
 	unsigned long b_flushtime;	/* Time when (dirty) buffer should be written */
 
 	struct buffer_head *b_next_free;/* lru/free list linkage */
 	struct buffer_head *b_prev_free;/* doubly linked list of buffers */
 	struct buffer_head *b_this_page;/* circular list of buffers in one page */
-	struct buffer_head *b_reqnext;	/* request queue */
-
 	struct buffer_head **b_pprev;	/* doubly linked list of hash-queue */
 	char * b_data;			/* pointer to data block */
 	struct page *b_page;		/* the page this bh is mapped to */
-	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completio
+n */
  	void *b_private;		/* reserved for b_end_io */
 
-	unsigned long b_rsector;	/* Real buffer location on disk */
 	wait_queue_head_t b_wait;
 
 	struct inode *	     b_inode;
 	struct list_head     b_inode_buffers;	/* doubly linked list of inode dirty buffers */
+
+	struct bio *b_bio;		/* allocated on I/O to/from buffer */
 };
 
 typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
@@ -1133,10 +1133,24 @@
 static inline void buffer_IO_error(struct buffer_head * bh)
 {
 	mark_buffer_clean(bh);
+
 	/*
-	 * b_end_io has to clear the BH_Uptodate bitflag in the error case!
+	 * b_end_io has to clear the BH_Uptodate bitflag in the read error
+	 * case, however buffer contents are not necessarily bad if a
+	 * write fails
 	 */
-	bh->b_end_io(bh, 0);
+	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+}
+
+extern inline int bio_rw(struct bio *bio)
+{
+	if (bio->bi_flags & BIO_READ)
+		return READ;
+	else if (bio->bi_flags & BIO_WRITE)
+		return WRITE;
+
+	BUG();
+	return -1;	/* ahem */
 }
 
 extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *);
@@ -1297,10 +1311,11 @@
 extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_moveto(struct file *new, struct file *old);
-extern struct buffer_head * get_hash_table(kdev_t, int, int);
-extern struct buffer_head * getblk(kdev_t, int, int);
+extern struct buffer_head * get_hash_table(kdev_t, sector_t, int);
+extern struct buffer_head * getblk(kdev_t, sector_t, int);
 extern void ll_rw_block(int, int, struct buffer_head * bh[]);
 extern void submit_bh(int, struct buffer_head *);
+extern void submit_bio(int, struct bio *);
 extern int is_read_only(kdev_t);
 extern void __brelse(struct buffer_head *);
 static inline void brelse(struct buffer_head *buf)
@@ -1318,7 +1333,7 @@
 extern struct buffer_head * bread(kdev_t, int, int);
 extern void wakeup_bdflush(int wait);
 
-extern int brw_page(int, struct page *, kdev_t, int [], int);
+extern int brw_page(int, struct page *, kdev_t, sector_t [], int);
 
 typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int);
 
@@ -1332,7 +1347,7 @@
 				unsigned long *);
 extern int block_sync_page(struct page *);
 
-int generic_block_bmap(struct address_space *, long, get_block_t *);
+sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/genhd.h linux/include/linux/genhd.h
--- /opt/kernel/linux-2.4.7/include/linux/genhd.h	Fri Jul 20 21:53:03 2001
+++ linux/include/linux/genhd.h	Tue Jul 24 15:29:46 2001
@@ -74,6 +74,47 @@
 	devfs_handle_t *de_arr;         /* one per physical disc */
 	char *flags;                    /* one per physical disc */
 };
+
+extern struct gendisk *blk_gendisk[MAX_BLKDEV];	/* in ll_rw_blk.c */
+extern struct gendisk *gendisk_head; 		/* in partitions/check.c */
+
+static inline void
+add_gendisk(struct gendisk *g) {
+	if (!blk_gendisk[g->major]) {
+		g->next = gendisk_head;
+		gendisk_head = g;
+		blk_gendisk[g->major] = g;
+	}
+}
+
+static inline void
+del_gendisk(struct gendisk *g) {
+	struct gendisk ** gp;
+
+	blk_gendisk[g->major] = NULL;
+	for (gp = &gendisk_head; *gp; gp = &((*gp)->next))
+		if (*gp == g)
+			break;
+	if (*gp)
+		*gp = (*gp)->next;
+}
+
+static inline struct gendisk *
+get_gendisk(kdev_t dev) {
+	return blk_gendisk[MAJOR(dev)];
+}
+
+static inline unsigned long
+get_start_sect(kdev_t dev) {
+	struct gendisk *g = get_gendisk(dev);
+	return g ? g->part[MINOR(dev)].start_sect : 0;
+}
+
+static inline unsigned long
+get_nr_sects(kdev_t dev) {
+	struct gendisk *g = get_gendisk(dev);
+	return g ? g->part[MINOR(dev)].nr_sects : 0;
+}
 #endif  /*  __KERNEL__  */
 
 #ifdef CONFIG_SOLARIS_X86_PARTITION
@@ -237,35 +278,10 @@
 extern void devfs_register_partitions (struct gendisk *dev, int minor,
 				       int unregister);
 
-
-
-/*
- * FIXME: this should use genhd->minor_shift, but that is slow to look up.
- */
 static inline unsigned int disk_index (kdev_t dev)
 {
-	int major = MAJOR(dev);
-	int minor = MINOR(dev);
-	unsigned int index;
-
-	switch (major) {
-		case DAC960_MAJOR+0:
-			index = (minor & 0x00f8) >> 3;
-			break;
-		case SCSI_DISK0_MAJOR:
-			index = (minor & 0x00f0) >> 4;
-			break;
-		case IDE0_MAJOR:	/* same as HD_MAJOR */
-		case XT_DISK_MAJOR:
-			index = (minor & 0x0040) >> 6;
-			break;
-		case IDE1_MAJOR:
-			index = ((minor & 0x0040) >> 6) + 2;
-			break;
-		default:
-			return 0;
-	}
-	return index;
+	struct gendisk *g = get_gendisk(dev);
+	return g ? (MINOR(dev) >> g->minor_shift) : 0;
 }
 
 #endif
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/highmem.h linux/include/linux/highmem.h
--- /opt/kernel/linux-2.4.7/include/linux/highmem.h	Fri Jul 20 21:52:18 2001
+++ linux/include/linux/highmem.h	Tue Jul 24 15:29:51 2001
@@ -2,6 +2,7 @@
 #define _LINUX_HIGHMEM_H
 
 #include <linux/config.h>
+#include <linux/blkdev.h>
 #include <asm/pgalloc.h>
 
 #ifdef CONFIG_HIGHMEM
@@ -13,7 +14,7 @@
 /* declarations for linux/mm/highmem.c */
 FASTCALL(unsigned int nr_free_highpages(void));
 
-extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig);
+extern struct bio *create_bounce(struct bio * bio_orig, int gfp_mask);
 
 
 static inline char *bh_kmap(struct buffer_head *bh)
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/ide.h linux/include/linux/ide.h
--- /opt/kernel/linux-2.4.7/include/linux/ide.h	Fri Jul 20 21:53:47 2001
+++ linux/include/linux/ide.h	Tue Jul 24 15:30:40 2001
@@ -87,6 +87,11 @@
 #define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
 
 /*
+ * state flags
+ */
+#define DMA_PIO_RETRY	1	/* retrying in PIO */
+
+/*
  * Ensure that various configuration flags have compatible settings
  */
 #ifdef REALLY_SLOW_IO
@@ -152,6 +157,21 @@
 #define DATA_READY		(DRQ_STAT)
 
 /*
+ * Our Physical Region Descriptor (PRD) table should be large enough
+ * to handle the biggest I/O request we are likely to see.  Since requests
+ * can have no more than 256 sectors, and since the typical blocksize is
+ * two or more sectors, we could get by with a limit of 128 entries here for
+ * the usual worst case.  Most requests seem to include some contiguous blocks,
+ * further reducing the number of table entries required.
+ *
+ * As it turns out though, we must allocate a full 4KB page for this,
+ * so the two PRD tables (ide0 & ide1) will each get half of that,
+ * allowing each to have about 256 entries (8 bytes each) from this.
+ */
+#define PRD_BYTES	8
+#define PRD_ENTRIES	(PAGE_SIZE / (2 * PRD_BYTES))
+
+/*
  * Some more useful definitions
  */
 #define IDE_MAJOR_NAME	"hd"	/* the same for all i/f; see also genhd.c */
@@ -287,6 +307,8 @@
 	byte     keep_settings;		/* restore settings after drive reset */
 	byte     using_dma;		/* disk is using dma for read/write */
 	byte     waiting_for_dma;	/* dma currently in progress */
+	byte	 retry_pio;		/* retrying in pio mode */
+	byte	 state;			/* retry state */
 	byte     unmask;		/* flag: okay to unmask other irqs */
 	byte     slow;			/* flag: slow data port */
 	byte     bswap;			/* flag: byte swap data */
@@ -436,7 +458,7 @@
 	ide_dmaproc_t	*dmaproc;	/* dma read/write/abort routine */
 	unsigned int	*dmatable_cpu;	/* dma physical region descriptor table (cpu view) */
 	dma_addr_t	dmatable_dma;	/* dma physical region descriptor table (dma view) */
-	struct scatterlist *sg_table;	/* Scatter-gather list used to build the above */
+	struct sg_list	*sg_table;	/* Scatter-gather list used to build the above */
 	int sg_nents;			/* Current number of entries in it */
 	int sg_dma_direction;		/* dma transfer direction */
 	struct hwif_s	*mate;		/* other hwif from same PCI chip */
@@ -457,6 +479,7 @@
 	unsigned	reset      : 1;	/* reset after probe */
 	unsigned	autodma    : 1;	/* automatically try to enable DMA at boot */
 	unsigned	udma_four  : 1;	/* 1=ATA-66 capable, 0=default */
+	unsigned	highmem	   : 1; /* can do full 32-bit dma */
 	byte		channel;	/* for dual-port chips: 0=primary, 1=secondary */
 #ifdef CONFIG_BLK_DEV_IDEPCI
 	struct pci_dev	*pci_dev;	/* for pci chipsets */
@@ -489,10 +512,12 @@
  */
 typedef int (ide_expiry_t)(ide_drive_t *);
 
+#define IDE_BUSY	0
+#define IDE_SLEEP	1
+
 typedef struct hwgroup_s {
 	ide_handler_t		*handler;/* irq handler, if active */
-	volatile int		busy;	/* BOOL: protects all fields below */
-	int			sleeping; /* BOOL: wake us up on timer expiry */
+	unsigned long		flags;	/* BUSY, SLEEPING */
 	ide_drive_t		*drive;	/* current drive */
 	ide_hwif_t		*hwif;	/* ptr to current hwif in linked-list */
 	struct request		*rq;	/* current request */
@@ -733,6 +758,11 @@
 unsigned long current_capacity (ide_drive_t *drive);
 
 /*
+ * Revalidate (read partition tables)
+ */
+void ide_revalidate_drive (ide_drive_t *drive);
+
+/*
  * Start a reset operation for an IDE interface.
  * The caller should return immediately after invoking this.
  */
@@ -754,6 +784,21 @@
 } ide_action_t;
 
 /*
+ * temporarily mapping a (possible) highmem bio for PIO transfer
+ */
+#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9)
+
+extern inline void *ide_map_buffer(struct request *rq)
+{
+	return bio_kmap_irq(rq->bio) + ide_rq_offset(rq);
+}
+
+extern inline void ide_unmap_buffer(char *buffer)
+{
+	bio_kunmap_irq(buffer);
+}
+
+/*
  * This function issues a special IDE device request
  * onto the request queue.
  *
@@ -900,5 +945,8 @@
 #endif
 
 void hwif_unregister (ide_hwif_t *hwif);
+
+#define DRIVE_LOCK(drive)	((drive)->queue.queue_lock)
+extern spinlock_t ide_lock;
 
 #endif /* _IDE_H */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/iobuf.h linux/include/linux/iobuf.h
--- /opt/kernel/linux-2.4.7/include/linux/iobuf.h	Fri Jul 20 21:52:24 2001
+++ linux/include/linux/iobuf.h	Wed Jan  1 00:07:23 1997
@@ -26,7 +26,6 @@
 
 #define KIO_MAX_ATOMIC_IO	512 /* in kb */
 #define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
-#define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
 
 /* The main kiobuf struct used for all our IO! */
 
@@ -48,8 +47,6 @@
 	
 	/* Always embed enough struct pages for atomic IO */
 	struct page *	map_array[KIO_STATIC_PAGES];
-	struct buffer_head * bh[KIO_MAX_SECTORS];
-	unsigned long blocks[KIO_MAX_SECTORS];
 
 	/* Dynamic state for IO completion: */
 	atomic_t	io_count;	/* IOs still in progress */
@@ -81,6 +78,9 @@
 /* fs/buffer.c */
 
 int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
-		   kdev_t dev, unsigned long b[], int size);
+		   kdev_t dev, sector_t [], int size);
+
+/* fs/bio.c */
+void	ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block);
 
 #endif /* __LINUX_IOBUF_H */
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/list.h linux/include/linux/list.h
--- /opt/kernel/linux-2.4.7/include/linux/list.h	Sat Feb 17 01:06:17 2001
+++ linux/include/linux/list.h	Wed Jan  1 00:07:23 1997
@@ -3,6 +3,8 @@
 
 #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE)
 
+#include <linux/prefetch.h>
+
 /*
  * Simple doubly linked list implementation.
  *
@@ -147,7 +149,8 @@
  * @head:	the head for your list.
  */
 #define list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
+	for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+		pos = pos->next, prefetch(pos->next))
 
 #endif /* __KERNEL__ || _LVM_H_INCLUDE */
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/loop.h linux/include/linux/loop.h
--- /opt/kernel/linux-2.4.7/include/linux/loop.h	Wed Mar  7 04:35:36 2001
+++ linux/include/linux/loop.h	Wed Jan  1 00:07:23 1997
@@ -49,8 +49,8 @@
 	int		old_gfp_mask;
 
 	spinlock_t		lo_lock;
-	struct buffer_head	*lo_bh;
-	struct buffer_head	*lo_bhtail;
+	struct bio 		*lo_bio;
+	struct bio		*lo_biotail;
 	int			lo_state;
 	struct semaphore	lo_sem;
 	struct semaphore	lo_ctl_mutex;
@@ -77,6 +77,7 @@
  */
 #define LO_FLAGS_DO_BMAP	1
 #define LO_FLAGS_READ_ONLY	2
+#define LO_FLAGS_BH_REMAP	4
 
 /* 
  * Note that this structure gets the wrong offsets when directly used
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/nbd.h linux/include/linux/nbd.h
--- /opt/kernel/linux-2.4.7/include/linux/nbd.h	Tue May  1 23:20:25 2001
+++ linux/include/linux/nbd.h	Tue Jul 24 15:34:38 2001
@@ -37,24 +37,25 @@
 static void
 nbd_end_request(struct request *req)
 {
-	struct buffer_head *bh;
+	struct bio *bio;
 	unsigned nsect;
 	unsigned long flags;
 	int uptodate = (req->errors == 0) ? 1 : 0;
+	request_queue_t *q = req->q;
 
 #ifdef PARANOIA
 	requests_out++;
 #endif
-	spin_lock_irqsave(&io_request_lock, flags);
-	while((bh = req->bh) != NULL) {
-		nsect = bh->b_size >> 9;
+	spin_lock_irqsave(&q->queue_lock, flags);
+	while((bio = req->bio) != NULL) {
+		nsect = bio_sectors(bio);
 		blk_finished_io(nsect);
-		req->bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-		bh->b_end_io(bh, uptodate);
+		req->bio = bio->bi_next;
+		bio->bi_next = NULL;
+		bio_endio(bio, uptodate);
 	}
 	blkdev_release_request(req);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 
 #define MAX_NBD 128
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/pci.h linux/include/linux/pci.h
--- /opt/kernel/linux-2.4.7/include/linux/pci.h	Fri Jul 20 21:52:38 2001
+++ linux/include/linux/pci.h	Tue Jul 24 15:30:29 2001
@@ -314,6 +314,8 @@
 #define PCI_DMA_FROMDEVICE	2
 #define PCI_DMA_NONE		3
 
+#define PCI_MAX_DMA32		(0xffffffff)
+
 #define DEVICE_COUNT_COMPATIBLE	4
 #define DEVICE_COUNT_IRQ	2
 #define DEVICE_COUNT_DMA	2
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/prefetch.h linux/include/linux/prefetch.h
--- /opt/kernel/linux-2.4.7/include/linux/prefetch.h	Thu Jan  1 01:00:00 1970
+++ linux/include/linux/prefetch.h	Wed Jan  1 00:07:23 1997
@@ -0,0 +1,60 @@
+/*
+ *  Generic cache management functions. Everything is arch-specific,  
+ *  but this header exists to make sure the defines/functions can be
+ *  used in a generic way.
+ *
+ *  2000-11-13  Arjan van de Ven   <arjan@fenrus.demon.nl>
+ *
+ */
+
+#ifndef _LINUX_PREFETCH_H
+#define _LINUX_PREFETCH_H
+
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+/*
+	prefetch(x) attempts to pre-emptively get the memory pointed to
+	by address "x" into the CPU L1 cache. 
+	prefetch(x) should not cause any kind of exception, prefetch(0) is
+	specifically ok.
+
+	prefetch() should be defined by the architecture, if not, the 
+	#define below provides a no-op define.	
+	
+	There are 3 prefetch() macros:
+	
+	prefetch(x)  	- prefetches the cacheline at "x" for read
+	prefetchw(x)	- prefetches the cacheline at "x" for write
+	spin_lock_prefetch(x) - prefectches the spinlock *x for taking
+	
+	there is also PREFETCH_STRIDE which is the architecure-prefered 
+	"lookahead" size for prefetching streamed operations.
+	
+*/
+
+/*
+ *	These cannot be do{}while(0) macros. See the mental gymnastics in
+ *	the loop macro.
+ */
+ 
+#ifndef ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_PREFETCHW
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_SPINLOCK_PREFETCH
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define spin_lock_prefetch(x) prefetchw(x)
+#endif
+
+#ifndef PREFETCH_STRIDE
+#define PREFETCH_STRIDE (4*L1_CACHE_BYTE)
+#endif
+
+#endif
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/raid/md.h linux/include/linux/raid/md.h
--- /opt/kernel/linux-2.4.7/include/linux/raid/md.h	Fri Jul 20 21:53:47 2001
+++ linux/include/linux/raid/md.h	Tue Jul 24 15:30:47 2001
@@ -77,7 +77,6 @@
 extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors);
 extern void md_recover_arrays (void);
 extern int md_check_ordering (mddev_t *mddev);
-extern struct gendisk * find_gendisk (kdev_t dev);
 extern int md_notify_reboot(struct notifier_block *this,
 					unsigned long code, void *x);
 extern int md_error (mddev_t *mddev, kdev_t rdev);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/slab.h linux/include/linux/slab.h
--- /opt/kernel/linux-2.4.7/include/linux/slab.h	Fri Jul 20 21:52:18 2001
+++ linux/include/linux/slab.h	Wed Jan  1 00:07:23 1997
@@ -75,6 +75,8 @@
 extern kmem_cache_t	*bh_cachep;
 extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*sigact_cachep;
+extern kmem_cache_t	*bio_cachep;
+extern kmem_cache_t	*biovec_cachep;
 
 #endif	/* __KERNEL__ */
 
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/kernel/ksyms.c linux/kernel/ksyms.c
--- /opt/kernel/linux-2.4.7/kernel/ksyms.c	Fri Jul 20 21:41:02 2001
+++ linux/kernel/ksyms.c	Wed Jan  1 00:07:23 1997
@@ -121,6 +121,8 @@
 EXPORT_SYMBOL(kunmap_high);
 EXPORT_SYMBOL(highmem_start_page);
 EXPORT_SYMBOL(create_bounce);
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
 #endif
 
 /* filesystem internal functions */
@@ -282,7 +284,6 @@
 EXPORT_SYMBOL(block_read);
 EXPORT_SYMBOL(block_write);
 EXPORT_SYMBOL(blksize_size);
-EXPORT_SYMBOL(hardsect_size);
 EXPORT_SYMBOL(blk_size);
 EXPORT_SYMBOL(blk_dev);
 EXPORT_SYMBOL(is_read_only);
@@ -300,9 +301,10 @@
 EXPORT_SYMBOL(tq_disk);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(refile_buffer);
-EXPORT_SYMBOL(max_sectors);
 EXPORT_SYMBOL(max_readahead);
 EXPORT_SYMBOL(file_moveto);
+EXPORT_SYMBOL(wipe_partitions);
+EXPORT_SYMBOL(blk_gendisk);
 
 /* tty routines */
 EXPORT_SYMBOL(tty_hangup);
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/mm/highmem.c linux/mm/highmem.c
--- /opt/kernel/linux-2.4.7/mm/highmem.c	Sat Jun 30 01:17:34 2001
+++ linux/mm/highmem.c	Wed Jan  1 00:07:23 1997
@@ -22,6 +22,8 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 
+#include <linux/kernel_stat.h>
+
 /*
  * Virtual_count is not a pure "count".
  *  0 means that it is not mapped, and has not been mapped
@@ -173,18 +175,19 @@
 static LIST_HEAD(emergency_bhs);
 
 /*
- * Simple bounce buffer support for highmem pages.
- * This will be moved to the block layer in 2.5.
+ * Simple bounce buffer support for highmem pages. Depending on the
+ * queue gfp mask set, *to may or may not be a highmem page. kmap it
+ * always, it will do the Right Thing
  */
-
-static inline void copy_from_high_bh (struct buffer_head *to,
-			 struct buffer_head *from)
+static inline void copy_from_high_bio(struct bio *to, struct bio *from)
 {
-	struct page *p_from;
-	char *vfrom;
+	char *vfrom, *vto;
 	unsigned long flags;
 
-	p_from = from->b_page;
+	if (bio_offset(to))
+		BUG();
+	if (bio_size(to) != bio_size(from))
+		BUG();
 
 	/*
 	 * Since this can be executed from IRQ context, reentrance
@@ -192,38 +195,60 @@
 	 */
 	__save_flags(flags);
 	__cli();
-	vfrom = kmap_atomic(p_from, KM_BOUNCE_WRITE);
-	memcpy(to->b_data, vfrom + bh_offset(from), to->b_size);
-	kunmap_atomic(vfrom, KM_BOUNCE_WRITE);
+	vfrom = __kmap_atomic(bio_page(from), KM_BOUNCE_WRITE);
+	vto = __kmap_atomic(bio_page(to), KM_BOUNCE_WRITE);
+	 memcpy(vto, vfrom + bio_offset(from), bio_size(to));
+	__kunmap_atomic(vto, KM_BOUNCE_WRITE);
+	__kunmap_atomic(vfrom, KM_BOUNCE_WRITE);
 	__restore_flags(flags);
 }
 
-static inline void copy_to_high_bh_irq (struct buffer_head *to,
-			 struct buffer_head *from)
+static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 {
-	struct page *p_to;
-	char *vto;
+	char *vto, *vfrom;
 	unsigned long flags;
 
-	p_to = to->b_page;
+	if (bio_offset(from))
+		BUG();
+	if (bio_size(to) != bio_size(from))
+		BUG();
+
 	__save_flags(flags);
 	__cli();
-	vto = kmap_atomic(p_to, KM_BOUNCE_READ);
-	memcpy(vto + bh_offset(to), from->b_data, to->b_size);
-	kunmap_atomic(vto, KM_BOUNCE_READ);
+	vto = __kmap_atomic(bio_page(to), KM_BOUNCE_READ);
+	vfrom = __kmap_atomic(bio_page(from), KM_BOUNCE_READ);
+	memcpy(vto + bio_offset(to), vfrom, bio_size(to));
+	__kunmap_atomic(vfrom, KM_BOUNCE_READ);
+	__kunmap_atomic(vto, KM_BOUNCE_READ);
 	__restore_flags(flags);
 }
 
-static inline void bounce_end_io (struct buffer_head *bh, int uptodate)
+static __init int init_emergency_pool(void)
 {
-	struct page *page;
-	struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private);
-	unsigned long flags;
+	spin_lock_irq(&emergency_lock);
+	while (nr_emergency_pages < POOL_SIZE) {
+		struct page * page = alloc_page(GFP_ATOMIC);
+		if (!page) {
+			printk("couldn't refill highmem emergency pages");
+			break;
+		}
+		list_add(&page->list, &emergency_pages);
+		nr_emergency_pages++;
+	}
+	spin_unlock_irq(&emergency_lock);
+	printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages);
+	return 0;
+}
 
-	bh_orig->b_end_io(bh_orig, uptodate);
+__initcall(init_emergency_pool);
 
-	page = bh->b_page;
+static inline void bounce_end_io (struct bio *bio)
+{
+	struct bio *bio_orig = bio->bi_private;
+	struct page *page = bio_page(bio);
+	unsigned long flags;
 
+	bio_endio(bio_orig, bio->bi_flags & BIO_UPTODATE);
 	spin_lock_irqsave(&emergency_lock, flags);
 	if (nr_emergency_pages >= POOL_SIZE)
 		__free_page(page);
@@ -235,74 +260,32 @@
 		list_add(&page->list, &emergency_pages);
 		nr_emergency_pages++;
 	}
-	
-	if (nr_emergency_bhs >= POOL_SIZE) {
-#ifdef HIGHMEM_DEBUG
-		/* Don't clobber the constructed slab cache */
-		init_waitqueue_head(&bh->b_wait);
-#endif
-		kmem_cache_free(bh_cachep, bh);
-	} else {
-		/*
-		 * Ditto in the bh case, here we abuse b_inode_buffers:
-		 */
-		list_add(&bh->b_inode_buffers, &emergency_bhs);
-		nr_emergency_bhs++;
-	}
 	spin_unlock_irqrestore(&emergency_lock, flags);
+	bio_put(bio);
 }
 
-static __init int init_emergency_pool(void)
+static void bounce_end_io_write (struct bio *bio)
 {
-	spin_lock_irq(&emergency_lock);
-	while (nr_emergency_pages < POOL_SIZE) {
-		struct page * page = alloc_page(GFP_ATOMIC);
-		if (!page) {
-			printk("couldn't refill highmem emergency pages");
-			break;
-		}
-		list_add(&page->list, &emergency_pages);
-		nr_emergency_pages++;
-	}
-	while (nr_emergency_bhs < POOL_SIZE) {
-		struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC);
-		if (!bh) {
-			printk("couldn't refill highmem emergency bhs");
-			break;
-		}
-		list_add(&bh->b_inode_buffers, &emergency_bhs);
-		nr_emergency_bhs++;
-	}
-	spin_unlock_irq(&emergency_lock);
-	printk("allocated %d pages and %d bhs reserved for the highmem bounces\n",
-	       nr_emergency_pages, nr_emergency_bhs);
-
-	return 0;
+	bounce_end_io(bio);
 }
 
-__initcall(init_emergency_pool);
-
-static void bounce_end_io_write (struct buffer_head *bh, int uptodate)
+static void bounce_end_io_read (struct bio *bio)
 {
-	bounce_end_io(bh, uptodate);
-}
+	struct bio *bio_orig = bio->bi_private;
 
-static void bounce_end_io_read (struct buffer_head *bh, int uptodate)
-{
-	struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private);
+	if (bio->bi_flags & BIO_UPTODATE)
+		copy_to_high_bio_irq(bio_orig, bio);
 
-	if (uptodate)
-		copy_to_high_bh_irq(bh_orig, bh);
-	bounce_end_io(bh, uptodate);
+	bounce_end_io(bio);
 }
 
-struct page *alloc_bounce_page (void)
+struct page *alloc_bounce_page(int gfp_mask)
 {
 	struct list_head *tmp;
 	struct page *page;
 
 repeat_alloc:
-	page = alloc_page(GFP_NOIO);
+	page = alloc_page(gfp_mask);
 	if (page)
 		return page;
 	/*
@@ -334,91 +317,35 @@
 	goto repeat_alloc;
 }
 
-struct buffer_head *alloc_bounce_bh (void)
+struct bio *create_bounce(struct bio *bio_orig, int gfp_mask)
 {
-	struct list_head *tmp;
-	struct buffer_head *bh;
+	struct page *page;
+	struct bio *bio;
 
-repeat_alloc:
-	bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
-	if (bh)
-		return bh;
-	/*
-	 * No luck. First, kick the VM so it doesnt idle around while
-	 * we are using up our emergency rations.
-	 */
-	wakeup_bdflush(0);
+	bio = bio_alloc(GFP_NOIO);
 
 	/*
-	 * Try to allocate from the emergency pool.
+	 * wasteful for 1kB fs, but machines with lots of ram are less likely
+	 * to have 1kB fs for anything that needs to go fast. so all things
+	 * considered, it should be ok.
 	 */
-	tmp = &emergency_bhs;
-	spin_lock_irq(&emergency_lock);
-	if (!list_empty(tmp)) {
-		bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers);
-		list_del(tmp->next);
-		nr_emergency_bhs--;
-	}
-	spin_unlock_irq(&emergency_lock);
-	if (bh)
-		return bh;
+	page = alloc_bounce_page(gfp_mask);
 
-	/* we need to wait I/O completion */
-	run_task_queue(&tq_disk);
+	bio->bi_dev = bio_orig->bi_dev;
+	bio->bi_sector = bio_orig->bi_sector;
+	bio->bi_flags |= bio_orig->bi_flags & BIO_RW_MASK;
 
-	current->policy |= SCHED_YIELD;
-	__set_current_state(TASK_RUNNING);
-	schedule();
-	goto repeat_alloc;
-}
+	bio->bi_io_vec.bv_page = page;
+	bio->bi_io_vec.bv_len = bio_size(bio_orig);
+	bio->bi_io_vec.bv_offset = 0;
 
-struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig)
-{
-	struct page *page;
-	struct buffer_head *bh;
+	bio->bi_private = bio_orig;
 
-	if (!PageHighMem(bh_orig->b_page))
-		return bh_orig;
-
-	bh = alloc_bounce_bh();
-	/*
-	 * This is wasteful for 1k buffers, but this is a stopgap measure
-	 * and we are being ineffective anyway. This approach simplifies
-	 * things immensly. On boxes with more than 4GB RAM this should
-	 * not be an issue anyway.
-	 */
-	page = alloc_bounce_page();
-
-	set_bh_page(bh, page, 0);
-
-	bh->b_next = NULL;
-	bh->b_blocknr = bh_orig->b_blocknr;
-	bh->b_size = bh_orig->b_size;
-	bh->b_list = -1;
-	bh->b_dev = bh_orig->b_dev;
-	bh->b_count = bh_orig->b_count;
-	bh->b_rdev = bh_orig->b_rdev;
-	bh->b_state = bh_orig->b_state;
-#ifdef HIGHMEM_DEBUG
-	bh->b_flushtime = jiffies;
-	bh->b_next_free = NULL;
-	bh->b_prev_free = NULL;
-	/* bh->b_this_page */
-	bh->b_reqnext = NULL;
-	bh->b_pprev = NULL;
-#endif
-	/* bh->b_page */
-	if (rw == WRITE) {
-		bh->b_end_io = bounce_end_io_write;
-		copy_from_high_bh(bh, bh_orig);
+	if (bio->bi_flags & BIO_WRITE) {
+		bio->bi_end_io = bounce_end_io_write;
+		copy_from_high_bio(bio, bio_orig);
 	} else
-		bh->b_end_io = bounce_end_io_read;
-	bh->b_private = (void *)bh_orig;
-	bh->b_rsector = bh_orig->b_rsector;
-#ifdef HIGHMEM_DEBUG
-	memset(&bh->b_wait, -1, sizeof(bh->b_wait));
-#endif
+		bio->bi_end_io = bounce_end_io_read;
 
-	return bh;
+	return bio;
 }
-
diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/mm/page_io.c linux/mm/page_io.c
--- /opt/kernel/linux-2.4.7/mm/page_io.c	Wed Apr 25 23:46:21 2001
+++ linux/mm/page_io.c	Wed Jan  1 00:07:23 1997
@@ -36,7 +36,7 @@
 static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page)
 {
 	unsigned long offset;
-	int zones[PAGE_SIZE/512];
+	sector_t zones[PAGE_SIZE/512];
 	int zones_used;
 	kdev_t dev = 0;
 	int block_size;