diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/CREDITS linux/CREDITS --- /opt/kernel/linux-2.4.7/CREDITS Wed Jul 4 23:41:33 2001 +++ linux/CREDITS Wed Jan 1 00:07:23 1997 @@ -140,9 +140,11 @@ D: VIA MVP-3/TX Pro III chipset IDE N: Jens Axboe -E: axboe@image.dk -D: Linux CD-ROM maintainer -D: jiffies wrap fixes + schedule timeouts depending on HZ == 100 +E: axboe@suse.de +D: Linux CD-ROM maintainer, DVD support +D: elevator + block layer rewrites +D: highmem I/O support +D: misc hacking on IDE, SCSI, block drivers, etc S: Peter Bangs Vej 258, 2TH S: 2500 Valby S: Denmark diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/Documentation/Configure.help linux/Documentation/Configure.help --- /opt/kernel/linux-2.4.7/Documentation/Configure.help Fri Jul 20 02:48:15 2001 +++ linux/Documentation/Configure.help Wed Jan 1 00:07:23 1997 @@ -5520,17 +5520,6 @@ Documentation/scsi.txt. The module will be called sg.o. If unsure, say N. -Debug new queueing code for SCSI -CONFIG_SCSI_DEBUG_QUEUES - This option turns on a lot of additional consistency checking for - the new queueing code. This will adversely affect performance, but - it is likely that bugs will be caught sooner if this is turned on. - This will typically cause the kernel to panic if an error is - detected, but it would have probably crashed if the panic weren't - there. Comments/questions/problems to linux-scsi mailing list - please. See http://www.andante.org/scsi_queue.html for more - up-to-date information. - Probe all LUNs on each SCSI device CONFIG_SCSI_MULTI_LUN If you have a SCSI device that supports more than one LUN (Logical diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/acorn/block/mfmhd.c linux/drivers/acorn/block/mfmhd.c --- /opt/kernel/linux-2.4.7/drivers/acorn/block/mfmhd.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/acorn/block/mfmhd.c Tue Jul 24 15:04:44 2001 @@ -932,26 +932,25 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; -#ifdef DEBUG - /*if ((dev>>6)==1) */ console_printf("mfm_request: raw vals: dev=%d (block=512 bytes) block=%d nblocks=%d\n", dev, block, nsect); -#endif - if (dev >= (mfm_drives << 6) || - block >= mfm[dev].nr_sects || ((block+nsect) > mfm[dev].nr_sects)) { - if (dev >= (mfm_drives << 6)) - printk("mfm: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); + + if (dev >= (mfm_drives << 6) || (dev & 0x3f) || + block >= mfm[dev].nr_sects || + (block+nsect > mfm[dev].nr_sects)) { + if (dev >= (mfm_drives << 6) || (dev & 0x3f)) + printk("mfm: bad minor number: device=%s\n", + kdevname(CURRENT->rq_dev)); else - printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", (dev >> 6)+'a', - block, nsect, mfm[dev].nr_sects); + printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", + (dev >> 6)+'a', block, nsect, + mfm[dev].nr_sects); printk("mfm: continue 1\n"); end_request(0); Busy = 0; continue; } - block += mfm[dev].start_sect; - - /* DAG: Linux doesn't cope with this - even though it has an array telling - it the hardware block size - silly */ + /* DAG: Linux doesn't cope with this - even though it has + an array telling it the hardware block size - silly */ block <<= 1; /* Now in 256 byte sectors */ nsect <<= 1; /* Ditto */ @@ -1180,22 +1179,21 @@ static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg) { - struct hd_geometry *geo = (struct hd_geometry *) arg; kdev_t dev; - int device, major, minor, err; + int device; if (!inode || !(dev = inode->i_rdev)) return -EINVAL; - major = MAJOR(dev); - minor = MINOR(dev); - - device = DEVICE_NR(MINOR(inode->i_rdev)), err; + device = DEVICE_NR(MINOR(inode->i_rdev)); if (device >= mfm_drives) return -EINVAL; switch (cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *) arg; + if (!arg) return -EINVAL; if (put_user (mfm_info[device].heads, &geo->heads)) @@ -1204,31 +1202,21 @@ return -EFAULT; if (put_user (mfm_info[device].cylinders, &geo->cylinders)) return -EFAULT; - if (put_user (mfm[minor].start_sect, &geo->start)) + if (put_user (get_start_sect(inode->i_rdev), &geo->start)) return -EFAULT; return 0; - - case BLKGETSIZE: - return put_user (mfm[minor].nr_sects, (long *)arg); - - case BLKFRASET: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - max_readahead[major][minor] = arg; - return 0; - - case BLKFRAGET: - return put_user(max_readahead[major][minor], (long *) arg); - - case BLKSECTGET: - return put_user(max_sectors[major][minor], (long *) arg); + } case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; return mfm_reread_partitions(dev); + case BLKGETSIZE: + case BLKSECTGET: case BLKFLSBUF: + case BLKFRASET: + case BLKFRAGET: case BLKROSET: case BLKROGET: case BLKRASET: @@ -1294,8 +1282,10 @@ if ((heads < 1) || (mfm_info[drive].cylinders > 1024)) { printk("mfm%c: Insane disc shape! Setting to 512/4/32\n",'a' + (dev >> 6)); - /* These values are fairly arbitary, but are there so that if your - * lucky you can pick apart your disc to find out what is going on - + /* + * These values are fairly arbitary, but are there so + * that if you're lucky you can pick apart your disc + * to find out what is going on - * I reckon these figures won't hurt MOST drives */ mfm_info[drive].sectors = 32; @@ -1306,7 +1296,8 @@ mfm_specify (); mfm_geometry (drive); mfm[drive << 6].start_sect = 0; - mfm[drive << 6].nr_sects = mfm_info[drive].cylinders * mfm_info[drive].heads * mfm_info[drive].sectors / 2; + mfm[drive << 6].nr_sects = mfm_info[drive].cylinders + * mfm_info[drive].heads * mfm_info[drive].sectors / 2; } } @@ -1347,7 +1338,8 @@ mfm_drives == 1 ? "" : "s"); mfm_gendisk.nr_real = mfm_drives; - if (request_irq(mfm_irq, mfm_interrupt_handler, SA_INTERRUPT, "MFM harddisk", NULL)) + if (request_irq(mfm_irq, mfm_interrupt_handler, SA_INTERRUPT, + "MFM harddisk", NULL)) printk("mfm: unable to get IRQ%d\n", mfm_irq); if (mfm_irqenable) @@ -1450,10 +1442,7 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB?) read ahread */ -#ifndef MODULE - mfm_gendisk.next = gendisk_head; - gendisk_head = &mfm_gendisk; -#endif + add_gendisk(&mfm_gendisk); Busy = 0; lastspecifieddrive = -1; @@ -1469,8 +1458,9 @@ */ static int mfm_reread_partitions(kdev_t dev) { - unsigned int start, i, maxp, target = DEVICE_NR(MINOR(dev)); + unsigned int target = DEVICE_NR(MINOR(dev)); unsigned long flags; + int res; save_flags_cli(flags); if (mfm_info[target].busy || mfm_info[target].access_count > 1) { @@ -1480,24 +1470,19 @@ mfm_info[target].busy = 1; restore_flags (flags); - maxp = mfm_gendisk.max_p; - start = target << mfm_gendisk.minor_shift; - - for (i = maxp - 1; i >= 0; i--) { - int minor = start + i; - invalidate_device (MKDEV(MAJOR_NR, minor), 1); - mfm_gendisk.part[minor].start_sect = 0; - mfm_gendisk.part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; /* Divide by 2, since sectors are 2 times smaller than usual ;-) */ - grok_partitions(&mfm_gendisk, target, 1<<6, mfm_info[target].heads * + grok_partitions(dev, mfm_info[target].heads * mfm_info[target].cylinders * mfm_info[target].sectors / 2); +leave: mfm_info[target].busy = 0; wake_up (&mfm_wait_open); - return 0; + return res; } #ifdef MODULE @@ -1512,6 +1497,7 @@ outw (0, mfm_irqenable); /* Required to enable IRQs from MFM podule */ free_irq(mfm_irq, NULL); unregister_blkdev(MAJOR_NR, "mfm"); + del_gendisk(&mfm_gendisk); if (ecs) ecard_release(ecs); if (mfm_addr) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- /opt/kernel/linux-2.4.7/drivers/block/DAC960.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/block/DAC960.c Tue Jul 24 15:04:44 2001 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -300,9 +302,9 @@ static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } @@ -484,14 +486,14 @@ static void DAC960_ExecuteCommand(DAC960_Command_T *Command) { DAC960_Controller_T *Controller = Command->Controller; - DECLARE_MUTEX_LOCKED(Semaphore); + DECLARE_COMPLETION(Wait); unsigned long ProcessorFlags; - Command->Semaphore = &Semaphore; + Command->Waiting = &Wait; DAC960_AcquireControllerLock(Controller, &ProcessorFlags); DAC960_QueueCommand(Command); DAC960_ReleaseControllerLock(Controller, &ProcessorFlags); if (in_interrupt()) return; - down(&Semaphore); + wait_for_completion(&Wait); } @@ -1316,7 +1318,7 @@ *Controller) { DAC960_V1_DCDB_T DCDBs[DAC960_V1_MaxChannels], *DCDB; - Semaphore_T Semaphores[DAC960_V1_MaxChannels], *Semaphore; + Completion_T Wait[DAC960_V1_MaxChannels], *wait; unsigned long ProcessorFlags; int Channel, TargetID; for (TargetID = 0; TargetID < Controller->Targets; TargetID++) @@ -1327,12 +1329,12 @@ DAC960_SCSI_Inquiry_T *InquiryStandardData = &Controller->V1.InquiryStandardData[Channel][TargetID]; InquiryStandardData->PeripheralDeviceType = 0x1F; - Semaphore = &Semaphores[Channel]; - init_MUTEX_LOCKED(Semaphore); + wait = &Wait[Channel]; + init_completion(wait); DCDB = &DCDBs[Channel]; DAC960_V1_ClearCommand(Command); Command->CommandType = DAC960_ImmediateCommand; - Command->Semaphore = Semaphore; + Command->Waiting = wait; Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; Command->V1.CommandMailbox.Type3.BusAddress = Virtual_to_Bus32(DCDB); DCDB->Channel = Channel; @@ -1363,11 +1365,11 @@ DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - Semaphore = &Semaphores[Channel]; - down(Semaphore); + wait = &Wait[Channel]; + wait_for_completion(wait); if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) continue; - Command->Semaphore = Semaphore; + Command->Waiting = wait; DCDB = &DCDBs[Channel]; DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); DCDB->BusAddress = Virtual_to_Bus32(InquiryUnitSerialNumber); @@ -1381,7 +1383,7 @@ DAC960_AcquireControllerLock(Controller, &ProcessorFlags); DAC960_QueueCommand(Command); DAC960_ReleaseControllerLock(Controller, &ProcessorFlags); - down(Semaphore); + wait_for_completion(wait); } } return true; @@ -1804,76 +1806,6 @@ /* - DAC960_BackMergeFunction is the Back Merge Function for the DAC960 driver. -*/ - -static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (Request->bhtail->b_data + Request->bhtail->b_size == BufferHeader->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_FrontMergeFunction is the Front Merge Function for the DAC960 driver. -*/ - -static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (BufferHeader->b_data + BufferHeader->b_size == Request->bh->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_MergeRequestsFunction is the Merge Requests Function for the - DAC960 driver. -*/ - -static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - IO_Request_T *NextRequest, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - int TotalSegments = Request->nr_segments + NextRequest->nr_segments; - if (Request->bhtail->b_data + Request->bhtail->b_size - == NextRequest->bh->b_data) - TotalSegments--; - if (TotalSegments > MaxSegments || - TotalSegments > Controller->DriverScatterGatherLimit) - return false; - Request->nr_segments = TotalSegments; - return true; -} - - -/* DAC960_RegisterBlockDevice registers the Block Device structures associated with Controller. */ @@ -1881,7 +1813,6 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) { int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - GenericDiskInfo_T *GenericDiskInfo; RequestQueue_T *RequestQueue; int MinorNumber; /* @@ -1900,25 +1831,20 @@ RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber); blk_init_queue(RequestQueue, DAC960_RequestFunction); blk_queue_headactive(RequestQueue, 0); - RequestQueue->back_merge_fn = DAC960_BackMergeFunction; - RequestQueue->front_merge_fn = DAC960_FrontMergeFunction; - RequestQueue->merge_requests_fn = DAC960_MergeRequestsFunction; RequestQueue->queuedata = Controller; + RequestQueue->max_segments = Controller->DriverScatterGatherLimit; + RequestQueue->max_sectors = Controller->MaxBlocksPerCommand; Controller->RequestQueue = RequestQueue; /* Initialize the Disk Partitions array, Partition Sizes array, Block Sizes array, and Max Sectors per Request array. */ for (MinorNumber = 0; MinorNumber < DAC960_MinorCount; MinorNumber++) - { Controller->BlockSizes[MinorNumber] = BLOCK_SIZE; - Controller->MaxSectorsPerRequest[MinorNumber] = - Controller->MaxBlocksPerCommand; - } + Controller->GenericDiskInfo.part = Controller->DiskPartitions; Controller->GenericDiskInfo.sizes = Controller->PartitionSizes; blksize_size[MajorNumber] = Controller->BlockSizes; - max_sectors[MajorNumber] = Controller->MaxSectorsPerRequest; /* Initialize Read Ahead to 128 sectors. */ @@ -1934,15 +1860,10 @@ Controller->GenericDiskInfo.next = NULL; Controller->GenericDiskInfo.fops = &DAC960_BlockDeviceOperations; /* - Install the Generic Disk Information structure at the end of the list. + Install the Generic Disk Information structure. */ - if ((GenericDiskInfo = gendisk_head) != NULL) - { - while (GenericDiskInfo->next != NULL) - GenericDiskInfo = GenericDiskInfo->next; - GenericDiskInfo->next = &Controller->GenericDiskInfo; - } - else gendisk_head = &Controller->GenericDiskInfo; + add_gendisk(&Controller->GenericDiskInfo); + /* Indicate the Block Device Registration completed successfully, */ @@ -1967,27 +1888,16 @@ */ blk_cleanup_queue(BLK_DEFAULT_QUEUE(MajorNumber)); /* + Remove the Generic Disk Information structure from the list. + */ + del_gendisk(&Controller->GenericDiskInfo); + /* Remove the Disk Partitions array, Partition Sizes array, Block Sizes array, Max Sectors per Request array, and Max Segments per Request array. */ Controller->GenericDiskInfo.part = NULL; Controller->GenericDiskInfo.sizes = NULL; - blk_size[MajorNumber] = NULL; - blksize_size[MajorNumber] = NULL; - max_sectors[MajorNumber] = NULL; - /* - Remove the Generic Disk Information structure from the list. - */ - if (gendisk_head != &Controller->GenericDiskInfo) - { - GenericDiskInfo_T *GenericDiskInfo = gendisk_head; - while (GenericDiskInfo != NULL && - GenericDiskInfo->next != &Controller->GenericDiskInfo) - GenericDiskInfo = GenericDiskInfo->next; - if (GenericDiskInfo != NULL) - GenericDiskInfo->next = GenericDiskInfo->next->next; - } - else gendisk_head = Controller->GenericDiskInfo.next; + blk_clear(MajorNumber); } @@ -2625,23 +2535,24 @@ CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2715,23 +2626,24 @@ .ScatterGatherSegments; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2759,7 +2671,7 @@ while (true) { if (list_empty(RequestQueueHead)) return false; - Request = blkdev_entry_next_request(RequestQueueHead); + Request = elv_next_request(RequestQueue); Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; @@ -2768,14 +2680,12 @@ if (Request->cmd == READ) Command->CommandType = DAC960_ReadCommand; else Command->CommandType = DAC960_WriteCommand; - Command->Semaphore = Request->sem; + Command->Waiting = Request->waiting; Command->LogicalDriveNumber = DAC960_LogicalDriveNumber(Request->rq_dev); - Command->BlockNumber = - Request->sector - + Controller->GenericDiskInfo.part[MINOR(Request->rq_dev)].start_sect; + Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; Command->SegmentCount = Request->nr_segments; - Command->BufferHeader = Request->bh; + Command->BufferHeader = Request->bio; Command->RequestBuffer = Request->buffer; blkdev_dequeue_request(Request); blkdev_release_request(Request); @@ -2828,8 +2738,10 @@ static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, boolean SuccessfulIO) { - blk_finished_io(BufferHeader->b_size >> 9); - BufferHeader->b_end_io(BufferHeader, SuccessfulIO); + if (SuccessfulIO) + BufferHeader->bi_flags |= BIO_UPTODATE; + blk_finished_io(bio_sectors(BufferHeader)); + BufferHeader->bi_end_io(BufferHeader); } @@ -2883,13 +2795,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %d..%d\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -2916,25 +2828,25 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } /* Wake up requestor for swap file paging requests. */ - if (Command->Semaphore != NULL) + if (Command->Waiting) { - up(Command->Semaphore); - Command->Semaphore = NULL; + complete(Command->Waiting); + Command->Waiting = NULL; } add_blkdev_randomness(DAC960_MAJOR + Controller->ControllerNumber); } else if ((CommandStatus == DAC960_V1_IrrecoverableDataError || CommandStatus == DAC960_V1_BadDataEncountered) && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; @@ -2948,10 +2860,10 @@ Command->CommandType = DAC960_WriteRetryCommand; CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; } - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); DAC960_QueueCommand(Command); return; } @@ -2964,26 +2876,23 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } - /* - Wake up requestor for swap file paging requests. - */ - if (Command->Semaphore != NULL) + if (Command->Waiting) { - up(Command->Semaphore); - Command->Semaphore = NULL; + complete(Command->Waiting); + Command->Waiting = NULL; } } } else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -3000,14 +2909,14 @@ DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(NextBufferHeader->b_data); + Virtual_to_Bus32(bio_data(NextBufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3589,8 +3498,8 @@ } if (CommandType == DAC960_ImmediateCommand) { - up(Command->Semaphore); - Command->Semaphore = NULL; + complete(Command->Waiting); + Command->Waiting = NULL; return; } if (CommandType == DAC960_QueuedCommand) @@ -3666,13 +3575,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %d..%d\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -3926,37 +3835,34 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } - /* - Wake up requestor for swap file paging requests. - */ - if (Command->Semaphore != NULL) + if (Command->Waiting) { - up(Command->Semaphore); - Command->Semaphore = NULL; + complete(Command->Waiting); + Command->Waiting = NULL; } add_blkdev_randomness(DAC960_MAJOR + Controller->ControllerNumber); } else if (Command->V2.RequestSense.SenseKey == DAC960_SenseKey_MediumError && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { if (CommandType == DAC960_ReadCommand) Command->CommandType = DAC960_ReadRetryCommand; else Command->CommandType = DAC960_WriteRetryCommand; - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->SCSI_10.CommandControlBits .AdditionalScatterGatherListMemory = false; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentByteCount = CommandMailbox->SCSI_10.DataTransferSize; @@ -3974,26 +3880,23 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } - /* - Wake up requestor for swap file paging requests. - */ - if (Command->Semaphore != NULL) + if (Command->Waiting) { - up(Command->Semaphore); - Command->Semaphore = NULL; + complete(Command->Waiting); + Command->Waiting = NULL; } } } else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -4008,16 +3911,16 @@ if (NextBufferHeader != NULL) { Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentDataPointer = - Virtual_to_Bus64(NextBufferHeader->b_data); + Virtual_to_Bus64(bio_data(NextBufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentByteCount = @@ -4539,8 +4442,8 @@ } if (CommandType == DAC960_ImmediateCommand) { - up(Command->Semaphore); - Command->Semaphore = NULL; + complete(Command->Waiting); + Command->Waiting = NULL; return; } if (CommandType == DAC960_QueuedCommand) @@ -5045,7 +4948,8 @@ int LogicalDriveNumber = DAC960_LogicalDriveNumber(Inode->i_rdev); DiskGeometry_T Geometry, *UserGeometry; DAC960_Controller_T *Controller; - int PartitionNumber; + int res; + if (File != NULL && (File->f_flags & O_NONBLOCK)) return DAC960_UserIOCTL(Inode, File, Request, Argument); if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5094,16 +4998,10 @@ LogicalDeviceInfo->ConfigurableDeviceSizeIn512ByteBlocksOrMB / (Geometry.heads * Geometry.sectors); } - Geometry.start = - Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)].start_sect; + Geometry.start = get_start_sect(Inode->i_rdev); return (copy_to_user(UserGeometry, &Geometry, sizeof(DiskGeometry_T)) ? -EFAULT : 0); case BLKGETSIZE: - /* Get Device Size. */ - if ((long *) Argument == NULL) return -EINVAL; - return put_user(Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)] - .nr_sects, - (long *) Argument); case BLKRAGET: /* Get Read-Ahead. */ if ((long *) Argument == NULL) return -EINVAL; @@ -5125,46 +5023,17 @@ if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (Controller->LogicalDriveUsageCount[LogicalDriveNumber] > 1) return -EBUSY; - for (PartitionNumber = 0; - PartitionNumber < DAC960_MaxPartitions; - PartitionNumber++) - { - KernelDevice_T Device = DAC960_KernelDevice(ControllerNumber, - LogicalDriveNumber, - PartitionNumber); - int MinorNumber = DAC960_MinorNumber(LogicalDriveNumber, - PartitionNumber); - if (Controller->GenericDiskInfo.part[MinorNumber].nr_sects == 0) - continue; - /* - Flush all changes and invalidate buffered state. - */ - invalidate_device(Device, 1); - /* - Clear existing partition sizes. - */ - if (PartitionNumber > 0) - { - Controller->GenericDiskInfo.part[MinorNumber].start_sect = 0; - Controller->GenericDiskInfo.part[MinorNumber].nr_sects = 0; - } - /* - Reset the Block Size so that the partition table can be read. - */ - set_blocksize(Device, BLOCK_SIZE); - } + res = wipe_partitions(Inode->i_rdev); + if (res) /* nothing */ + return res; + if (Controller->FirmwareType == DAC960_V1_Controller) - grok_partitions(&Controller->GenericDiskInfo, - LogicalDriveNumber, - DAC960_MaxPartitions, - Controller->V1.LogicalDriveInformation - [LogicalDriveNumber] - .LogicalDriveSize); + grok_partitions(Inode->i_rdev, + Controller->V1.LogicalDriveInformation + [LogicalDriveNumber] + .LogicalDriveSize); else - grok_partitions( - &Controller->GenericDiskInfo, - LogicalDriveNumber, - DAC960_MaxPartitions, + grok_partitions(Inode->i_rdev, Controller->V2.LogicalDeviceInformation[LogicalDriveNumber] ->ConfigurableDeviceSizeIn512ByteBlocksOrMB); return 0; @@ -5287,11 +5156,11 @@ while (Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID]) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, !Controller->V1.DirectCommandActive [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID] = true; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/DAC960.h linux/drivers/block/DAC960.h --- /opt/kernel/linux-2.4.7/drivers/block/DAC960.h Wed Feb 21 06:26:22 2001 +++ linux/drivers/block/DAC960.h Tue Jul 24 15:36:20 2001 @@ -2136,7 +2136,7 @@ of the Linux Kernel and I/O Subsystem. */ -typedef struct buffer_head BufferHeader_T; +typedef struct bio BufferHeader_T; typedef struct file File_T; typedef struct block_device_operations BlockDeviceOperations_T; typedef struct gendisk GenericDiskInfo_T; @@ -2153,7 +2153,7 @@ typedef struct pt_regs Registers_T; typedef struct request IO_Request_T; typedef request_queue_t RequestQueue_T; -typedef struct semaphore Semaphore_T; +typedef struct completion Completion_T; typedef struct super_block SuperBlock_T; typedef struct timer_list Timer_T; typedef wait_queue_head_t WaitQueue_T; @@ -2220,7 +2220,7 @@ DAC960_CommandType_T CommandType; struct DAC960_Controller *Controller; struct DAC960_Command *Next; - Semaphore_T *Semaphore; + Completion_T *Waiting; unsigned int LogicalDriveNumber; unsigned int BlockNumber; unsigned int BlockCount; @@ -2414,7 +2414,6 @@ DiskPartition_T DiskPartitions[DAC960_MinorCount]; int PartitionSizes[DAC960_MinorCount]; int BlockSizes[DAC960_MinorCount]; - int MaxSectorsPerRequest[DAC960_MinorCount]; unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; unsigned char UserStatusBuffer[DAC960_UserMessageSize]; } @@ -2448,7 +2447,7 @@ void DAC960_AcquireControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2460,13 +2459,13 @@ void DAC960_ReleaseControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } /* DAC960_AcquireControllerLockRF acquires exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2478,7 +2477,7 @@ /* DAC960_ReleaseControllerLockRF releases exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2497,7 +2496,7 @@ void DAC960_AcquireControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2510,7 +2509,7 @@ void DAC960_ReleaseControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/acsi.c linux/drivers/block/acsi.c --- /opt/kernel/linux-2.4.7/drivers/block/acsi.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/block/acsi.c Tue Jul 24 15:04:44 2001 @@ -1014,7 +1014,6 @@ goto repeat; } - block += acsi_part[dev].start_sect; target = acsi_info[DEVICE_NR(dev)].target; lun = acsi_info[DEVICE_NR(dev)].lun; @@ -1126,7 +1125,7 @@ put_user( 64, &geo->heads ); put_user( 32, &geo->sectors ); put_user( acsi_info[dev].size >> 11, &geo->cylinders ); - put_user( acsi_part[MINOR(inode->i_rdev)].start_sect, &geo->start ); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; } @@ -1137,10 +1136,7 @@ put_user( 0, &((Scsi_Idlun *) arg)->host_unique_id ); return 0; - case BLKGETSIZE: /* Return device size */ - return put_user(acsi_part[MINOR(inode->i_rdev)].nr_sects, - (long *) arg); - + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -1795,8 +1791,7 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ - acsi_gendisk.next = gendisk_head; - gendisk_head = &acsi_gendisk; + add_gendisk(&acsi_gendisk); #ifdef CONFIG_ATARI_SLM err = slm_init(); @@ -1820,8 +1815,6 @@ void cleanup_module(void) { - struct gendisk ** gdp; - del_timer( &acsi_timer ); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); atari_stram_free( acsi_buffer ); @@ -1829,13 +1822,7 @@ if (devfs_unregister_blkdev( MAJOR_NR, "ad" ) != 0) printk( KERN_ERR "acsi: cleanup_module failed\n"); - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &acsi_gendisk) - break; - if (!*gdp) - printk( KERN_ERR "acsi: entry in disk chain missing!\n" ); - else - *gdp = (*gdp)->next; + del_gendisk(&acsi_gendisk); } #endif @@ -1866,7 +1853,7 @@ { int device; struct gendisk * gdev; - int max_p, start, i; + int res; struct acsi_info_struct *aip; device = DEVICE_NR(MINOR(dev)); @@ -1881,16 +1868,7 @@ DEVICE_BUSY = 1; sti(); - max_p = gdev->max_p; - start = device << gdev->minor_shift; - - for( i = max_p - 1; i >= 0 ; i-- ) { - if (gdev->part[start + i].nr_sects != 0) { - invalidate_device(MKDEV(MAJOR_NR, start + i), 1); - gdev->part[start + i].nr_sects = 0; - } - gdev->part[start+i].start_sect = 0; - }; + res = wipe_partitions(dev); stdma_lock( NULL, NULL ); @@ -1905,12 +1883,13 @@ ENABLE_IRQ(); stdma_release(); - - grok_partitions(gdev, device, (aip->type==HARDDISK)?1<<4:1, aip->size); + + if (!res) + grok_partitions(dev, aip->size); DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/amiflop.c linux/drivers/block/amiflop.c --- /opt/kernel/linux-2.4.7/drivers/block/amiflop.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/block/amiflop.c Wed Jan 1 00:07:23 1997 @@ -1890,10 +1890,9 @@ free_irq(IRQ_AMIGA_DSKBLK, NULL); custom.dmacon = DMAF_DISK; /* disable DMA */ amiga_chip_free(raw_buf); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); release_mem_region(CUSTOM_PHYSADDR+0x20, 8); unregister_blkdev(MAJOR_NR, "fd"); + blk_clear(MAJOR_NR); } #endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/blkpg.c linux/drivers/block/blkpg.c --- /opt/kernel/linux-2.4.7/drivers/block/blkpg.c Sun May 20 20:34:05 2001 +++ linux/drivers/block/blkpg.c Tue Jul 24 15:07:27 2001 @@ -54,17 +54,6 @@ * Note that several drives may have the same major. */ -/* a linear search, superfluous when dev is a pointer */ -static struct gendisk *get_gendisk(kdev_t dev) { - struct gendisk *g; - int m = MAJOR(dev); - - for (g = gendisk_head; g; g = g->next) - if (g->major == m) - break; - return g; -} - /* * Add a partition. * @@ -208,6 +197,9 @@ { int intval; + if (!dev) + return -EINVAL; + switch (cmd) { case BLKROSET: if (!capable(CAP_SYS_ADMIN)) @@ -216,6 +208,7 @@ return -EFAULT; set_device_ro(dev, intval); return 0; + case BLKROGET: intval = (is_read_only(dev) != 0); return put_user(intval, (int *)(arg)); @@ -223,20 +216,47 @@ case BLKRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if(!dev || arg > 0xff) + if(arg > 0xff) return -EINVAL; read_ahead[MAJOR(dev)] = arg; return 0; + case BLKRAGET: if (!arg) return -EINVAL; return put_user(read_ahead[MAJOR(dev)], (long *) arg); + case BLKFRASET: + { + int *mr; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!(mr = max_readahead[MAJOR(dev)])) + return -EINVAL; + mr[MINOR(dev)] = arg; + return 0; + } + + case BLKFRAGET: + { + int *mr; + if (!(mr = max_readahead[MAJOR(dev)])) + return -EINVAL; + return put_user(mr[MINOR(dev)], (long *) arg); + } + + case BLKSECTGET: + { + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -ENODEV; + return put_user(q->max_sectors, (unsigned short *) arg); + } + case BLKFLSBUF: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!dev) - return -EINVAL; fsync_dev(dev); invalidate_buffers(dev); return 0; @@ -273,6 +293,25 @@ case BLKELVSET: return blkelvset_ioctl(&blk_get_queue(dev)->elevator, (blkelv_ioctl_arg_t *) arg); + case BLKHASHPROF: { + request_queue_t *q = blk_get_queue(dev); + + if (!q) + return -EINVAL; + if (copy_to_user((struct bio_hash_stats *) arg, &q->queue_hash.st, sizeof(struct bio_hash_stats))) + return -EFAULT; + return 0; + } + + case BLKHASHCLEAR: { + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -EINVAL; + spin_lock_irq(&q->queue_lock); + memset(&q->queue_hash.st, 0, sizeof(struct bio_hash_stats)); + spin_unlock_irq(&q->queue_lock); + return 0; + } default: return -EINVAL; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.4.7/drivers/block/cciss.c Mon Jul 2 22:56:40 2001 +++ linux/drivers/block/cciss.c Tue Jul 24 15:04:44 2001 @@ -83,7 +83,7 @@ #define MAX_CONFIG_WAIT 1000 #define READ_AHEAD 128 -#define NR_CMDS 128 /* #commands that can be outstanding */ +#define NR_CMDS 384 /* #commands that can be outstanding */ #define MAX_CTLR 8 #define CCISS_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ @@ -145,7 +145,6 @@ " IRQ: %d\n" " Logical drives: %d\n" " Current Q depth: %d\n" - " Current # commands on controller %d\n" " Max Q depth since init: %d\n" " Max # commands on controller since init: %d\n" " Max SG entries since init: %d\n\n", @@ -156,8 +155,7 @@ (unsigned long)h->vaddr, (unsigned int)h->intr, h->num_luns, - h->Qdepth, h->commands_outstanding, - h->maxQsinceinit, h->max_outstanding, h->maxSG); + h->Qdepth, h->maxQsinceinit, h->max_outstanding, h->maxSG); pos += size; len += size; for(i=0; inum_luns; i++) { @@ -235,7 +233,7 @@ i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); if (i == NR_CMDS) return NULL; - } while(test_and_set_bit(i%32, h->cmd_pool_bits+(i/32)) != 0); + } while(test_and_set_bit(i & 31, h->cmd_pool_bits+(i/32)) != 0); #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss: using command buffer %d\n", i); #endif @@ -306,13 +304,10 @@ /* for each partition */ for(j=0; jblocksizes[(i<hardsizes[ (i<block_size; - } hba[ctlr]->gendisk.nr_real++; + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->block_size; } } /* @@ -377,8 +372,6 @@ { int ctlr = MAJOR(inode->i_rdev) - MAJOR_NR; int dsk = MINOR(inode->i_rdev) >> NWD_SHIFT; - int diskinfo[4]; - struct hd_geometry *geo = (struct hd_geometry *)arg; #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg); @@ -386,6 +379,10 @@ switch(cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *)arg; + int diskinfo[4]; + if (hba[ctlr]->drv[dsk].cylinders) { diskinfo[0] = hba[ctlr]->drv[dsk].heads; diskinfo[1] = hba[ctlr]->drv[dsk].sectors; @@ -393,25 +390,24 @@ } else { diskinfo[0] = 0xff; diskinfo[1] = 0x3f; - diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); } + diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); + } put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].start_sect, &geo->start); - return 0; - case BLKGETSIZE: - if (!arg) return -EINVAL; - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects, (long*)arg); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; + } case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); + case BLKGETSIZE: case BLKFLSBUF: case BLKROSET: case BLKROGET: case BLKRASET: case BLKRAGET: case BLKPG: - return( blk_ioctl(inode->i_rdev, cmd, arg)); + return blk_ioctl(inode->i_rdev, cmd, arg); case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; @@ -453,16 +449,7 @@ // printk("cciss_ioctl: delay and count cannot be 0\n"); return( -EINVAL); } - spin_lock_irqsave(&io_request_lock, flags); - /* Can only safely update if no commands outstanding */ - if (c->commands_outstanding > 0 ) - { -// printk("cciss_ioctl: cannot change coalasing " -// "%d commands outstanding on controller\n", -// c->commands_outstanding); - spin_unlock_irqrestore(&io_request_lock, flags); - return(-EINVAL); - } + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ writel( intinfo.delay, &(c->cfgtable->HostWrite.CoalIntDelay)); @@ -478,7 +465,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -509,7 +496,7 @@ if (copy_from_user(NodeName, (void *) arg, sizeof( NodeName_type))) return -EFAULT; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ for(i=0;i<16;i++) @@ -525,7 +512,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -652,11 +639,11 @@ c->SG[0].Ext = 0; // we are not chaining } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* Wait for completion */ while(c->cmd_type != CMD_IOCTL_DONE) @@ -704,42 +691,32 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = MINOR(dev) >> NWD_SHIFT; ctlr = MAJOR(dev) - MAJOR_NR; gdev = &(hba[ctlr]->gendisk); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); return -EBUSY; } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; + res = wipe_partitions(dev); + if (res) + goto leave; - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } /* setup partitions per disk */ - grok_partitions(gdev, target, MAX_PART, - hba[ctlr]->drv[target].nr_blocks); + grok_partitions(dev, hba[ctlr]->drv[target].nr_blocks); +leave: hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } static int frevalidate_logvol(kdev_t dev) @@ -770,15 +747,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -787,7 +764,6 @@ memset(hba[ctlr]->hd, 0, sizeof(struct hd_struct) * 256); memset(hba[ctlr]->sizes, 0, sizeof(int) * 256); memset(hba[ctlr]->blocksizes, 0, sizeof(int) * 256); - memset(hba[ctlr]->hardsizes, 0, sizeof(int) * 256); memset(hba[ctlr]->drv, 0, sizeof(drive_info_struct) * CISS_MAX_LUN); hba[ctlr]->gendisk.nr_real = 0; @@ -1083,11 +1059,11 @@ while(( c = h->reqQ) != NULL ) { /* can't do anything if fifo is full */ - if ((h->access.fifo_full(h))) - { - printk(KERN_WARNING "cciss: fifo full \n"); - return; + if ((h->access.fifo_full(h))) { + printk("cciss: fifo full\n"); + break; } + /* Get the frist entry from the Request Q */ removeQ(&(h->reqQ), c); h->Qdepth--; @@ -1100,17 +1076,16 @@ } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers( struct bio *bio, int status) { - struct buffer_head *xbh; + struct bio *xbh; - while(bh) - { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, status); - bh = xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; + blk_finished_io(bio_sectors(bio)); + bio_endio(bio, status); + bio = xbh; } } /* checks the status of the job and calls complete buffers to mark all @@ -1129,7 +1104,7 @@ { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, + pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, (cmd->Request.Type.Direction == XFER_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); @@ -1208,79 +1183,33 @@ status=0; } } - complete_buffers(cmd->bh, status); -} - - -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < MAXSGENTRIES) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > MAXSGENTRIES) - return 0; - - rq->nr_segments = total_segments; - return 1; + complete_buffers(cmd->bio, status); } /* * Get a request and submit it to the controller. - * Currently we do one request at a time. Ideally we would like to send - * everything to the controller on the first call, but there is a danger - * of holding the io_request_lock for to long. */ static void do_cciss_request(request_queue_t *q) { ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg, sect; - char *lastdataend; - struct buffer_head *bh; + unsigned long lastdataend; + struct bio *bio; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + int i, dir; - // Loop till the queue is empty if or it is plugged - while (1) - { - if (q->plugged || list_empty(queue_head)) { - start_io(h); - return; - } + if (blk_queue_plugged(q)) + goto startio; - creq = blkdev_entry_next_request(queue_head); +queue: + if (list_empty(queue_head)) + goto startio; + + creq = elv_next_request(q); if (creq->nr_segments > MAXSGENTRIES) BUG(); @@ -1289,18 +1218,15 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); - start_io(h); - return; + complete_buffers(creq->bio, 0); + goto startio; } if (( c = cmd_alloc(h, 1)) == NULL) - { - start_io(h); - return; - } + goto startio; + c->cmd_type = CMD_RWREQ; - bh = c->bh = creq->bh; + bio = c->bio = creq->bio; /* fill in the request */ log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; @@ -1315,43 +1241,43 @@ (creq->cmd == READ) ? XFER_READ: XFER_WRITE; c->Request.Timeout = 0; // Don't time out c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE; - start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector; + start_blk = creq->sector; #ifdef CCISS_DEBUG - if (bh == NULL) - panic("cciss: bh== NULL?"); + if (bio == NULL) + panic("cciss: bio== NULL?"); printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ seg = 0; - lastdataend = NULL; + lastdataend = 0; sect = 0; - while(bh) - { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) + while(bio) { + sect += bio_sectors(bio); + if (bio_to_bus(bio) == lastdataend) { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; - lastdataend += bh->b_size; - } else - { + tmp_sg[seg-1].len += bio_size(bio); + lastdataend += bio_size(bio); + } else { if (seg == MAXSGENTRIES) BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].len = bio_size(bio); + tmp_sg[seg].offset = bio_offset(bio); + tmp_sg[seg].page = bio_page(bio); + lastdataend = bio_to_bus(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* get the DMA records for the setup */ + if (c->Request.Type.Direction == XFER_READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].len, tmp_sg[i].offset, dir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1375,10 +1301,8 @@ c->Request.CDB[8]= sect & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; - blkdev_dequeue_request(creq); - /* * ehh, we can't really end the request here since it's not * even started yet. for now it shouldn't hurt though @@ -1392,7 +1316,10 @@ h->Qdepth++; if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - } // while loop + + goto queue; +startio: + start_io(h); } static void do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs) @@ -1411,7 +1338,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); while( h->access.intr_pending(h)) { while((a = h->access.command_completed(h)) != FIFO_EMPTY) @@ -1444,11 +1371,16 @@ } } } + /* * See if we can queue up some more IO */ +#if 0 + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); +#else do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); +#endif + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); } /* * We cannot read the structure directly, for portablity we must use @@ -1941,18 +1873,14 @@ q->queuedata = hba[i]; blk_init_queue(q, do_cciss_request); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, BLK_BOUNCE_4G); + q->max_segments = MAXSGENTRIES; + blk_queue_max_sectors(q, 512); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; - hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; read_ahead[MAJOR_NR+i] = READ_AHEAD; - /* Set the pointers to queue functions */ - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - - /* Fill in the gendisk data */ hba[i]->gendisk.major = MAJOR_NR + i; hba[i]->gendisk.major_name = "cciss"; @@ -1963,8 +1891,7 @@ hba[i]->gendisk.nr_real = hba[i]->num_luns; /* Get on the disk list */ - hba[i]->gendisk.next = gendisk_head; - gendisk_head = &(hba[i]->gendisk); + add_gendisk(&(hba[i]->gendisk)); cciss_geninit(i); for(j=0; jdriver_data == NULL) { @@ -2003,23 +1929,11 @@ unregister_blkdev(MAJOR_NR+i, hba[i]->devname); remove_proc_entry(hba[i]->devname, proc_cciss); - /* remove it from the disk list */ - if (gendisk_head == &(hba[i]->gendisk)) - { - gendisk_head = hba[i]->gendisk.next; - } else - { - for(g=gendisk_head; g ; g=g->next) - { - if(g->next == &(hba[i]->gendisk)) - { - g->next = hba[i]->gendisk.next; - } - } - } - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), - hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); + del_gendisk(&(hba[i]->gendisk)); + + pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), + hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); kfree(hba[i]->cmd_pool_bits); @@ -2027,32 +1941,31 @@ } static struct pci_driver cciss_pci_driver = { - name: "cciss", - probe: cciss_init_one, - remove: cciss_remove_one, - id_table: cciss_pci_device_id, /* id_table */ + name: "cciss", + probe: cciss_init_one, + remove: cciss_remove_one, + id_table: cciss_pci_device_id, /* id_table */ }; /* -* This is it. Register the PCI driver information for the cards we control -* the OS will call our registered routines when it finds one of our cards. -*/ + * This is it. Register the PCI driver information for the cards we control + * the OS will call our registered routines when it finds one of our cards. + */ int __init cciss_init(void) { - printk(KERN_INFO DRIVER_NAME "\n"); + /* Register for out PCI devices */ if (pci_register_driver(&cciss_pci_driver) > 0 ) return 0; else return -ENODEV; - } +} EXPORT_NO_SYMBOLS; static int __init init_cciss_module(void) { - return ( cciss_init()); } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cciss.h linux/drivers/block/cciss.h --- /opt/kernel/linux-2.4.7/drivers/block/cciss.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cciss.h Tue Jul 24 15:36:42 2001 @@ -17,7 +17,8 @@ struct my_sg { int len; - char *start_addr; + int offset; + struct page *page; }; struct ctlr_info; @@ -85,9 +86,8 @@ struct gendisk gendisk; // indexed by minor numbers struct hd_struct hd[256]; - int sizes[256]; + int sizes[256]; int blocksizes[256]; - int hardsizes[256]; }; /* Defining the diffent access_menthods */ @@ -247,5 +247,8 @@ char *product_name; struct access_method *access; }; + +#define CCISS_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif /* CCISS_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h --- /opt/kernel/linux-2.4.7/drivers/block/cciss_cmd.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cciss_cmd.h Wed Jan 1 00:07:23 1997 @@ -7,7 +7,7 @@ //general boundary defintions #define SENSEINFOBYTES 32//note that this value may vary between host implementations -#define MAXSGENTRIES 31 +#define MAXSGENTRIES 32 #define MAXREPLYQS 256 //Command Status value @@ -228,7 +228,7 @@ int cmd_type; struct _CommandList_struct *prev; struct _CommandList_struct *next; - struct buffer_head * bh; + struct bio * bio; } CommandList_struct; //Configuration Table Structure diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.4.7/drivers/block/cpqarray.c Tue May 22 19:23:16 2001 +++ linux/drivers/block/cpqarray.c Tue Jul 24 15:36:53 2001 @@ -102,7 +102,6 @@ static struct hd_struct * ida; static int * ida_sizes; static int * ida_blocksizes; -static int * ida_hardsizes; static struct gendisk ida_gendisk[MAX_CTLR]; static struct proc_dir_entry *proc_array; @@ -147,7 +146,7 @@ static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct bio *bio, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -178,12 +177,11 @@ ida_sizes[(ctlr<nr_blks; - for(j=0; j<16; j++) { + for(j=0; j<16; j++) ida_blocksizes[(ctlr<blk_size; - } + + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->blk_size; ida_gendisk[ctlr].nr_real++; } @@ -314,7 +312,6 @@ void cleanup_module(void) { int i; - struct gendisk *g; remove_proc_entry("array", proc_root_driver); @@ -331,66 +328,15 @@ hba[i]->cmd_pool_dhandle); kfree(hba[i]->cmd_pool_bits); - if (gendisk_head == &ida_gendisk[i]) { - gendisk_head = ida_gendisk[i].next; - } else { - for(g=gendisk_head; g; g=g->next) { - if (g->next == &ida_gendisk[i]) { - g->next = ida_gendisk[i].next; - break; - } - } - } + del_gendisk(&ida_gendisk[i]); } kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } #endif /* MODULE */ -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < SG_MAX) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > SG_MAX) - return 0; - - rq->nr_segments = total_segments; - return 1; -} - /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -437,20 +383,9 @@ return(num_cntlrs_reg); } - ida_hardsizes = kmalloc(sizeof(int)*nr_ctlr*NWD*16, GFP_KERNEL); - if(ida_hardsizes==NULL) - { - kfree(ida); - kfree(ida_sizes); - kfree(ida_blocksizes); - printk( KERN_ERR "cpqarray: out of memory"); - return(num_cntlrs_reg); - } - memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16); memset(ida_sizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_blocksizes, 0, sizeof(int)*nr_ctlr*NWD*16); - memset(ida_hardsizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_gendisk, 0, sizeof(struct gendisk)*MAX_CTLR); /* @@ -508,7 +443,6 @@ { kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } return(num_cntlrs_reg); @@ -529,14 +463,11 @@ q->queuedata = hba[i]; blk_init_queue(q, do_ida_request); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, BLK_BOUNCE_4G); + q->max_segments = SG_MAX; blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); - hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -546,8 +477,7 @@ ida_gendisk[i].nr_real = 0; /* Get on the disk list */ - ida_gendisk[i].next = gendisk_head; - gendisk_head = &ida_gendisk[i]; + add_gendisk(&ida_gendisk[i]); init_timer(&hba[i]->timer); hba[i]->timer.expires = jiffies + IDA_TIMER; @@ -919,22 +849,27 @@ ctlr_info_t *h = q->queuedata; cmdlist_t *c; int seg, sect; - char *lastdataend; struct list_head * queue_head = &q->queue_head; - struct buffer_head *bh; + struct bio *bio; struct request *creq; struct my_sg tmp_sg[SG_MAX]; - int i; + unsigned long lastdataend; + int i, dir; + + if (blk_queue_plugged(q)) { + start_io(h); + return; + } -// Loop till the queue is empty if or it is plugged +// Loop till the queue is empty while (1) { - if (q->plugged || list_empty(queue_head)) { + if (list_empty(queue_head)) { start_io(h); return; } - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > SG_MAX) BUG(); @@ -943,7 +878,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); start_io(h); return; } @@ -954,47 +889,50 @@ return; } - bh = creq->bh; + bio = creq->bio; c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = ida[(h->ctlr<rq_dev)].start_sect + creq->sector; - c->bh = bh; + c->req.hdr.blk = creq->sector; + c->bio = bio; DBGPX( - if (bh == NULL) - panic("bh == NULL?"); + if (bio == NULL) + panic("bio == NULL?"); printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; + seg = lastdataend = 0; sect = 0; - while(bh) { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; - lastdataend += bh->b_size; + while(bio) { + sect += bio_sectors(bio); + if (bio_to_bus(bio) == lastdataend) { + tmp_sg[seg-1].size += bio_size(bio); + lastdataend += bio_size(bio); } else { if (seg == SG_MAX) BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].size = bio_size(bio); + tmp_sg[seg].page = bio_page(bio); + tmp_sg[seg].offset = bio_offset(bio); + lastdataend = bio_to_bus(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* Now do all the DMA Mappings */ + if (creq->cmd == READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for( i=0; i < seg; i++) { c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, - (creq->cmd == READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, tmp_sg[i].page, + tmp_sg[i].size, + tmp_sg[i].offset, dir); } DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); c->req.hdr.sg_cnt = seg; @@ -1056,17 +994,17 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct bio *bio, int ok) { - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); + blk_finished_io(bio_sectors(bio)); + bio_endio(bio, ok); - bh = xbh; + bio = xbh; } } /* @@ -1099,11 +1037,11 @@ /* unmap the DMA mapping for all the scatter gather elements */ for(i=0; ireq.hdr.sg_cnt; i++) { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, cmd->req.sg[i].size, (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } - complete_buffers(cmd->bh, ok); + complete_buffers(cmd->bio, ok); } /* @@ -1128,7 +1066,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); if (istat & FIFO_NOT_EMPTY) { while((a = h->access.command_completed(h))) { a1 = a; a &= ~3; @@ -1162,8 +1100,12 @@ /* * See if we can queue up some more IO */ +#if 0 + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); +#else do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); +#endif + spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); } /* @@ -1209,14 +1151,10 @@ put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(ida[(ctlr<i_rdev)].start_sect, &geo->start); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; case IDAGETDRVINFO: return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t)); - case BLKGETSIZE: - if (!arg) return -EINVAL; - put_user(ida[(ctlr<i_rdev)].nr_sects, (long*)arg); - return 0; case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); case IDAPASSTHRU: @@ -1252,6 +1190,7 @@ return(0); } + case BLKGETSIZE: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -1352,11 +1291,11 @@ } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* Wait for completion */ while(c->type != CMD_IOCTL_DONE) @@ -1566,15 +1505,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -1583,7 +1522,6 @@ memset(ida+(ctlr*256), 0, sizeof(struct hd_struct)*NWD*16); memset(ida_sizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(ida_blocksizes+(ctlr*256), 0, sizeof(int)*NWD*16); - memset(ida_hardsizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(hba[ctlr]->drv, 0, sizeof(drv_info_t)*NWD); ida_gendisk[ctlr].nr_real = 0; @@ -1611,17 +1549,15 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); ctlr = MAJOR(dev) - MAJOR_NR; gdev = &ida_gendisk[ctlr]; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); @@ -1629,25 +1565,14 @@ } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, hba[ctlr]->drv[target].nr_blks); - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } - - /* 16 minors per disk... */ - grok_partitions(gdev, target, 16, hba[ctlr]->drv[target].nr_blks); hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- /opt/kernel/linux-2.4.7/drivers/block/cpqarray.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cpqarray.h Tue Jul 24 15:34:39 2001 @@ -58,7 +58,8 @@ struct my_sg { int size; - char *start_addr; + int offset; + struct page *page; }; struct ctlr_info; @@ -121,6 +122,9 @@ struct timer_list timer; unsigned int misc_tflags; }; + +#define IDA_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif #endif /* CPQARRAY_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/elevator.c linux/drivers/block/elevator.c --- /opt/kernel/linux-2.4.7/drivers/block/elevator.c Fri Jul 20 05:59:41 2001 +++ linux/drivers/block/elevator.c Wed Jan 1 00:07:23 1997 @@ -18,8 +18,13 @@ * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * + * Jens: + * - Rework again to work with bio instead of buffer_heads + * - added merge by hash-lookup + * - loose bi_dev comparisons, partition handling is right now + * - completely modularize elevator setup and teardown + * */ - #include #include #include @@ -28,38 +33,38 @@ #include /* - * This is a bit tricky. It's given that bh and rq are for the same + * This is a bit tricky. It's given that bio and rq are for the same * device, but the next request might of course not be. Run through * the tests below to check if we want to insert here if we can't merge - * bh into an existing request + * bio into an existing request */ -inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, - struct list_head *head) +inline int bio_rq_in_between(struct bio *bio, struct request *rq, + struct list_head *head) { struct list_head *next; struct request *next_rq; - next = rq->queue.next; + next = rq->queuelist.next; if (next == head) return 0; /* * if the device is different (usually on a different partition), - * just check if bh is after rq + * just check if bio is after rq */ - next_rq = blkdev_entry_to_request(next); + next_rq = list_entry(next, struct request, queuelist); if (next_rq->rq_dev != rq->rq_dev) - return bh->b_rsector > rq->sector; + return bio->bi_dev >= rq->rq_dev && bio->bi_dev <= next_rq->rq_dev; /* - * ok, rq, next_rq and bh are on the same device. if bh is in between + * ok, rq, next_rq and bio are on the same device. if bio is in between * the two, this is the sweet spot */ - if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector) return 1; /* - * next_rq is ordered wrt rq, but bh is not in between the two + * next_rq is ordered wrt rq, but bio is not in between the two */ if (next_rq->sector > rq->sector) return 0; @@ -68,66 +73,101 @@ * next_rq and rq not ordered, if we happen to be either before * next_rq or after rq insert here anyway */ - if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector) return 1; return 0; } - int elevator_linus_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head * head, struct bio *bio) { struct list_head *entry = &q->queue_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + unsigned int count = bio_sectors(bio); + struct request *__rq; + struct bio *bio_hash; + int rw = bio_rw(bio); + /* + * first try a back merge, then front, then give up and scan. this + * will of course fail for different size bios on the same queue, + * however that isn't an issue + */ + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector - count); + q->queue_hash.st.q_nr_back_lookups++; + if (bio_hash) { + q->queue_hash.st.q_nr_back_hits++; + __rq = bio_hash->bi_req; + if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd && + __rq->sector + __rq->nr_sectors == bio->bi_sector && + !__rq->waiting && !__rq->special && !bio_hash->bi_next) { + q->queue_hash.st.q_nr_back_merges++; + *req = __rq; + bio->bi_req = __rq; + return ELEVATOR_BACK_MERGE; + } +#if 0 + bio_put(bio_hash); +#endif + } + + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector + count); + q->queue_hash.st.q_nr_front_lookups++; + if (bio_hash) { + q->queue_hash.st.q_nr_front_hits++; + __rq = bio_hash->bi_req; + if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd && + __rq->sector - count == bio->bi_sector && + !__rq->waiting && !__rq->special && !bio_hash->bi_next) { + q->queue_hash.st.q_nr_front_merges++; + *req = __rq; + bio->bi_req = __rq; + return ELEVATOR_FRONT_MERGE; + } +#if 0 + bio_put(bio_hash); +#endif + } + + /* + * no merge possible, scan for insertion + */ while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + __rq = list_entry(entry, struct request, queuelist); /* - * simply "aging" of requests in queue + * get next entry into L1 cache */ - if (__rq->elevator_sequence-- <= 0) - break; + prefetch(entry->prev); if (__rq->waiting) continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) - *req = __rq; - if (__rq->cmd != rw) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->elevator_sequence < count) - break; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - ret = ELEVATOR_BACK_MERGE; + if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) *req = __rq; + + /* + * simply "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) break; - } else if (__rq->sector - count == bh->b_rsector) { - ret = ELEVATOR_FRONT_MERGE; - __rq->elevator_sequence -= count; - *req = __rq; + else if (__rq->elevator_sequence < count) break; - } } - return ret; + return ELEVATOR_NO_MERGE; } void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) { - struct list_head *entry = &req->queue, *head = &q->queue_head; + struct list_head *entry, *head = &q->queue_head; /* * second pass scan of requests that got passed over, if any */ + entry = &req->queuelist; while ((entry = entry->next) != head) { - struct request *tmp = blkdev_entry_to_request(entry); + struct request *tmp =list_entry(entry,struct request,queuelist); + prefetch(entry->next); tmp->elevator_sequence -= count; } } @@ -142,39 +182,41 @@ * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head * head, struct bio *bio) { - struct list_head *entry; - unsigned int count = bh->b_size >> 9; - - if (list_empty(&q->queue_head)) - return ELEVATOR_NO_MERGE; - - entry = &q->queue_head; - while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + struct bio *bio_hash; + struct request *__rq = NULL; + int rw, count, ret; + + count = bio_sectors(bio); + rw = bio_rw(bio); + ret = ELEVATOR_NO_MERGE; + + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector - count); + if (bio_hash) { + __rq = bio_hash->bi_req; + if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd && + __rq->sector + __rq->nr_sectors == bio->bi_sector && + !__rq->waiting && !__rq->special) { + ret = ELEVATOR_BACK_MERGE; + goto out; + } + } - if (__rq->cmd != rw) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->waiting) - continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - *req = __rq; - return ELEVATOR_BACK_MERGE; - } else if (__rq->sector - count == bh->b_rsector) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector + count); + if (bio_hash) { + __rq = bio_hash->bi_req; + if (__rq->rq_dev == bio->bi_dev && rw == __rq->cmd && + __rq->sector - count == bio->bi_sector && + !__rq->waiting && !__rq->special) { + ret = ELEVATOR_FRONT_MERGE; + goto out; } } - *req = blkdev_entry_to_request(q->queue_head.prev); - return ELEVATOR_NO_MERGE; +out: + *req = bio->bi_req = __rq; + return ret; } void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} @@ -196,16 +238,14 @@ return 0; } -int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg) +int blkelvset_ioctl(elevator_t *elevator, const blkelv_ioctl_arg_t *arg) { blkelv_ioctl_arg_t input; if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t))) return -EFAULT; - if (input.read_latency < 0) - return -EINVAL; - if (input.write_latency < 0) + if (input.read_latency < 0 || input.write_latency < 0) return -EINVAL; elevator->read_latency = input.read_latency; @@ -213,10 +253,15 @@ return 0; } -void elevator_init(elevator_t * elevator, elevator_t type) +int elevator_init(request_queue_t *q, elevator_t *elevator, elevator_t type) { static unsigned int queue_ID; *elevator = type; elevator->queue_ID = queue_ID++; + + if (elevator->elevator_init_fn) + return elevator->elevator_init_fn(q, elevator); + + return 0; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/floppy.c linux/drivers/block/floppy.c --- /opt/kernel/linux-2.4.7/drivers/block/floppy.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/block/floppy.c Tue Jul 24 15:37:39 2001 @@ -468,7 +468,7 @@ */ static struct floppy_struct user_params[N_DRIVE]; -static int floppy_sizes[256]; +static sector_t floppy_sizes[256]; static int floppy_blocksizes[256]; /* @@ -570,7 +570,7 @@ static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; -static unsigned char sector_t; /* sector in track */ +static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ @@ -2282,7 +2282,6 @@ static void request_done(int uptodate) { int block; - unsigned long flags; probing = 0; reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate); @@ -2301,7 +2300,6 @@ DRS->maxtrack = 1; /* unlock chained buffers */ - spin_lock_irqsave(&io_request_lock, flags); while (current_count_sectors && !QUEUE_EMPTY && current_count_sectors >= CURRENT->current_nr_sectors){ current_count_sectors -= CURRENT->current_nr_sectors; @@ -2309,7 +2307,6 @@ CURRENT->sector += CURRENT->current_nr_sectors; end_request(1); } - spin_unlock_irqrestore(&io_request_lock, flags); if (current_count_sectors && !QUEUE_EMPTY){ /* "unlock" last subsector */ @@ -2334,9 +2331,7 @@ DRWE->last_error_sector = CURRENT->sector; DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(&io_request_lock, flags); end_request(0); - spin_unlock_irqrestore(&io_request_lock, flags); } } @@ -2382,7 +2377,7 @@ printk("rt=%d t=%d\n", R_TRACK, TRACK); printk("heads=%d eoc=%d\n", heads, eoc); printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK, - sector_t, ssize); + fsector_t, ssize); printk("in_sector_offset=%d\n", in_sector_offset); } #endif @@ -2429,7 +2424,7 @@ } else if (CT(COMMAND) == FD_READ){ buffer_track = raw_cmd->track; buffer_drive = current_drive; - INFBOUND(buffer_max, nr_sectors + sector_t); + INFBOUND(buffer_max, nr_sectors + fsector_t); } cont->redo(); } @@ -2437,19 +2432,19 @@ /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct buffer_head *bh; + struct bio *bio; int size; char *base; base = CURRENT->buffer; size = CURRENT->current_nr_sectors << 9; - bh = CURRENT->bh; + bio = CURRENT->bio; - if (bh){ - bh = bh->b_reqnext; - while (bh && bh->b_data == base + size){ - size += bh->b_size; - bh = bh->b_reqnext; + if (bio){ + bio = bio->bi_next; + while (bio && bio_data(bio) == base + size){ + size += bio_size(bio); + bio = bio->bi_next; } } return size >> 9; @@ -2458,13 +2453,13 @@ /* Compute the maximal transfer size */ static int transfer_size(int ssize, int max_sector, int max_size) { - SUPBOUND(max_sector, sector_t + max_size); + SUPBOUND(max_sector, fsector_t + max_size); /* alignment */ max_sector -= (max_sector % _floppy->sect) % ssize; /* transfer size, beginning not aligned */ - current_count_sectors = max_sector - sector_t ; + current_count_sectors = max_sector - fsector_t ; return max_sector; } @@ -2475,7 +2470,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct buffer_head *bh; + struct bio *bio; char *buffer, *dma_buffer; int size; @@ -2484,8 +2479,8 @@ CURRENT->nr_sectors); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > sector_t + CURRENT->nr_sectors) - current_count_sectors = minimum(buffer_max - sector_t, + buffer_max > fsector_t + CURRENT->nr_sectors) + current_count_sectors = minimum(buffer_max - fsector_t, CURRENT->nr_sectors); remaining = current_count_sectors << 9; @@ -2496,7 +2491,7 @@ printk("current_count_sectors=%ld\n", current_count_sectors); printk("remaining=%d\n", remaining >> 9); printk("CURRENT->nr_sectors=%ld\n",CURRENT->nr_sectors); - printk("CURRENT->current_nr_sectors=%ld\n", + printk("CURRENT->current_nr_sectors=%u\n", CURRENT->current_nr_sectors); printk("max_sector=%d\n", max_sector); printk("ssize=%d\n", ssize); @@ -2505,9 +2500,9 @@ buffer_max = maximum(max_sector, buffer_max); - dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9); + dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - bh = CURRENT->bh; + bio = CURRENT->bio; size = CURRENT->current_nr_sectors << 9; buffer = CURRENT->buffer; @@ -2519,8 +2514,8 @@ dma_buffer < floppy_track_buffer){ DPRINT("buffer overrun in copy buffer %d\n", (int) ((floppy_track_buffer - dma_buffer) >>9)); - printk("sector_t=%d buffer_min=%d\n", - sector_t, buffer_min); + printk("fsector_t=%d buffer_min=%d\n", + fsector_t, buffer_min); printk("current_count_sectors=%ld\n", current_count_sectors); if (CT(COMMAND) == FD_READ) @@ -2541,15 +2536,15 @@ break; dma_buffer += size; - bh = bh->b_reqnext; + bio = bio->bi_next; #ifdef FLOPPY_SANITY_CHECK - if (!bh){ + if (!bio){ DPRINT("bh=null in copy buffer after copy\n"); break; } #endif - size = bh->b_size; - buffer = bh->b_data; + size = bio_size(bio); + buffer = bio_data(bio); } #ifdef FLOPPY_SANITY_CHECK if (remaining){ @@ -2641,7 +2636,7 @@ max_sector = _floppy->sect * _floppy->head; TRACK = CURRENT->sector / max_sector; - sector_t = CURRENT->sector % max_sector; + fsector_t = CURRENT->sector % max_sector; if (_floppy->track && TRACK >= _floppy->track) { if (CURRENT->current_nr_sectors & 1) { current_count_sectors = 1; @@ -2649,17 +2644,17 @@ } else return 0; } - HEAD = sector_t / _floppy->sect; + HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) && - sector_t < _floppy->sect) + fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){ max_sector = 2 * _floppy->sect / 3; - if (sector_t >= max_sector){ - current_count_sectors = minimum(_floppy->sect - sector_t, + if (fsector_t >= max_sector){ + current_count_sectors = minimum(_floppy->sect - fsector_t, CURRENT->nr_sectors); return 1; } @@ -2681,7 +2676,7 @@ GAP = _floppy->gap; CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1; + SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1; /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -2689,11 +2684,11 @@ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect){ SECT_PER_TRACK ++; - if (tracksize <= sector_t % _floppy->sect) + if (tracksize <= fsector_t % _floppy->sect) SECTOR--; /* if we are beyond tracksize, fill up using smaller sectors */ - while (tracksize <= sector_t % _floppy->sect){ + while (tracksize <= fsector_t % _floppy->sect){ while(tracksize + ssize > _floppy->sect){ SIZECODE--; ssize >>= 1; @@ -2709,12 +2704,12 @@ max_sector = _floppy->sect; } - in_sector_offset = (sector_t % _floppy->sect) % ssize; - aligned_sector_t = sector_t - in_sector_offset; + in_sector_offset = (fsector_t % _floppy->sect) % ssize; + aligned_sector_t = fsector_t - in_sector_offset; max_size = CURRENT->nr_sectors; if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && - (sector_t >= buffer_min) && (sector_t < buffer_max)) { + (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ if (CT(COMMAND) == FD_READ) { copy_buffer(1, max_sector, buffer_max); @@ -2722,8 +2717,8 @@ } } else if (in_sector_offset || CURRENT->nr_sectors < ssize){ if (CT(COMMAND) == FD_WRITE){ - if (sector_t + CURRENT->nr_sectors > ssize && - sector_t + CURRENT->nr_sectors < ssize + ssize) + if (fsector_t + CURRENT->nr_sectors > ssize && + fsector_t + CURRENT->nr_sectors < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2736,7 +2731,7 @@ int direct, indirect; indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) - - sector_t; + fsector_t; /* * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide @@ -2751,7 +2746,7 @@ if (CROSS_64KB(CURRENT->buffer, max_size << 9)) max_size = (K_64 - ((unsigned long)CURRENT->buffer) % K_64)>>9; - direct = transfer_size(ssize,max_sector,max_size) - sector_t; + direct = transfer_size(ssize,max_sector,max_size) - fsector_t; /* * We try to read tracks, but if we get too many errors, we * go back to reading just one sector at a time. @@ -2770,8 +2765,8 @@ raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0){ DPRINT("zero dma transfer attempted from make_raw_request\n"); - DPRINT("indirect=%d direct=%d sector_t=%d", - indirect, direct, sector_t); + DPRINT("indirect=%d direct=%d fsector_t=%d", + indirect, direct, fsector_t); return 0; } /* check_dma_crossing(raw_cmd->kernel_data, @@ -2789,12 +2784,12 @@ /* claim buffer track if needed */ if (buffer_track != raw_cmd->track || /* bad track */ buffer_drive !=current_drive || /* bad drive */ - sector_t > buffer_max || - sector_t < buffer_min || + fsector_t > buffer_max || + fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || (!in_sector_offset && CURRENT->nr_sectors >= ssize))&& max_sector > 2 * max_buffer_sectors + buffer_min && - max_size + sector_t > 2 * max_buffer_sectors + buffer_min) + max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */){ buffer_track = -1; buffer_drive = current_drive; @@ -2841,7 +2836,7 @@ floppy_track_buffer) >> 9), current_count_sectors); printk("st=%d ast=%d mse=%d msi=%d\n", - sector_t, aligned_sector_t, max_sector, max_size); + fsector_t, aligned_sector_t, max_sector, max_size); printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", COMMAND, SECTOR, HEAD, TRACK); @@ -2859,8 +2854,8 @@ raw_cmd->kernel_data + raw_cmd->length > floppy_track_buffer + (max_buffer_sectors << 10)){ DPRINT("buffer overrun in schedule dma\n"); - printk("sector_t=%d buffer_min=%d current_count=%ld\n", - sector_t, buffer_min, + printk("fsector_t=%d buffer_min=%d current_count=%ld\n", + fsector_t, buffer_min, raw_cmd->length >> 9); printk("current_count_sectors=%ld\n", current_count_sectors); @@ -2913,8 +2908,6 @@ } if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); - if (CURRENT->bh && !buffer_locked(CURRENT->bh)) - panic(DEVICE_NAME ": block not locked"); device = CURRENT->rq_dev; set_fdc(DRIVE(device)); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/ida_cmd.h linux/drivers/block/ida_cmd.h --- /opt/kernel/linux-2.4.7/drivers/block/ida_cmd.h Mon Dec 11 21:50:39 2000 +++ linux/drivers/block/ida_cmd.h Tue Jul 24 15:34:38 2001 @@ -96,7 +96,7 @@ int ctlr; struct cmdlist *prev; struct cmdlist *next; - struct buffer_head *bh; + struct bio *bio; int type; } cmdlist_t; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.4.7/drivers/block/ll_rw_blk.c Fri Jul 20 05:51:23 2001 +++ linux/drivers/block/ll_rw_blk.c Tue Jul 24 14:26:33 2001 @@ -6,6 +6,7 @@ * Elevator latency, (C) 2000 Andrea Arcangeli SuSE * Queue request tables / lock, selectable elevator, Jens Axboe * kernel-doc documentation started by NeilBrown - July2000 + * bio rewrite, highmem i/o, etc, Jens Axboe - may 2001 */ /* @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -51,27 +53,13 @@ */ DECLARE_TASK_QUEUE(tq_disk); -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - /* This specifies how many sectors to read ahead on the disk. */ int read_ahead[MAX_BLKDEV]; /* blk_dev_struct is: - * *request_fn - * *current_request + * request_queue + * *queue */ struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ @@ -83,7 +71,7 @@ * * if (!blk_size[MAJOR]) then no minor size checking is done. */ -int * blk_size[MAX_BLKDEV]; +sector_t *blk_size[MAX_BLKDEV]; /* * blksize_size contains the size of all block-devices: @@ -95,18 +83,9 @@ int * blksize_size[MAX_BLKDEV]; /* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. + * blk_gendisk contains pointers to the gendisk structures */ -int * hardsect_size[MAX_BLKDEV]; +struct gendisk *blk_gendisk[MAX_BLKDEV]; /* * The following tunes the read-ahead algorithm in mm/filemap.c @@ -114,11 +93,6 @@ int * max_readahead[MAX_BLKDEV]; /* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -/* * queued sectors for all devices, used to make sure we don't fill all * of memory with locked buffers */ @@ -130,15 +104,20 @@ static int high_queued_sectors, low_queued_sectors; static int batch_requests, queue_nr_requests; static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait); +unsigned long blk_max_low_pfn; -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} - -inline request_queue_t *__blk_get_queue(kdev_t dev) +/** + * blk_get_queue: - return the queue that matches the given device + * @dev: device + * + * Description: + * Given a specific device, return the queue that will hold I/O + * for it. This is either a &struct blk_dev_struct lookup and a + * call to the ->queue() function defined, or the default queue + * stored in the same location. + * + **/ +inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -148,69 +127,6 @@ return &blk_dev[MAJOR(dev)].request_queue; } -/* - * NOTE: the device-specific queue() functions - * have to be atomic! - */ -request_queue_t *blk_get_queue(kdev_t dev) -{ - request_queue_t *ret; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock,flags); - ret = __blk_get_queue(dev); - spin_unlock_irqrestore(&io_request_lock,flags); - - return ret; -} - -static int __blk_cleanup_queue(struct list_head *head) -{ - struct request *rq; - int i = 0; - - if (list_empty(head)) - return 0; - - do { - rq = list_entry(head->next, struct request, table); - list_del(&rq->table); - kmem_cache_free(request_cachep, rq); - i++; - } while (!list_empty(head)); - - return i; -} - -/** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released - * - * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... - **/ -void blk_cleanup_queue(request_queue_t * q) -{ - int count = queue_nr_requests; - - count -= __blk_cleanup_queue(&q->request_freelist[READ]); - count -= __blk_cleanup_queue(&q->request_freelist[WRITE]); - count -= __blk_cleanup_queue(&q->pending_freelist[READ]); - count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); - - memset(q, 0, sizeof(*q)); -} - /** * blk_queue_headactive - indicate whether head of request queue may be active * @q: The queue which this applies to. @@ -234,10 +150,9 @@ * * When a queue is plugged the head will be assumed to be inactive. **/ - void blk_queue_headactive(request_queue_t * q, int active) { - q->head_active = active; + set_bit(QUEUE_FLAG_HEADACTIVE, &q->queue_flags); } /** @@ -246,7 +161,7 @@ * @mfn: the alternate make_request function * * Description: - * The normal way for &struct buffer_heads to be passed to a device + * The normal way for &struct bios to be passed to a device * driver is for them to be collected into requests on a request * queue, and then to allow the device driver to select requests * off that queue when it is ready. This works well for many block @@ -258,19 +173,103 @@ * * Caveat: * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. + * with buffers in "highmemory". This can be accomplished by either calling + * bio_kmap() to get a temporary kernel mapping, or by calling + * blk_queue_bounce() to create a buffer in normal memory. **/ - void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { + q->max_segments = MAX_SEGMENTS; q->make_request_fn = mfn; + blk_queue_max_sectors(q, MAX_SECTORS); + blk_queue_hardsect_size(q, 512); + + q->queue_state = Queue_up; } -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +/** + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @bus_addr: bus address limit + * + * Description: + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page, ie + * one the driver can still call bio_page() and get a valid address on. + **/ +void blk_queue_bounce_limit(request_queue_t *q, unsigned long long dma_addr) +{ + q->bounce_limit = mem_map + (dma_addr >> PAGE_SHIFT); + + /* + * set page alloc gfp mask for bounce pages + */ + q->bounce_gfp = GFP_NOIO; + + /* + * until the zoning design is decided on, always go low when + * getting a bounce page + */ +#if 0 + if (dma_addr > BLK_BOUNCE_HIGH) + q->bounce_gfp |= __GFP_DMA32; +#endif +} + +/** + * blk_queue_max_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_sectors: max sectors in the usual 512b unit + * + * Description: + * Enables a low level driver to set an upper limit on the size of + * received requests. + **/ +void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) +{ + q->max_sectors = max_sectors; +} + +/** + * blk_queue_max_segments - set max segments for a request for this queue + * @q: the request queue for the device + * @max_segments: max number of segments + * + * Description: + * Enables a low level driver to set an upper limit on the number of + * data segments in a request + **/ +void blk_queue_max_segments(request_queue_t *q, unsigned short max_segments) { - if (req->nr_segments < max_segments) { + q->max_segments = max_segments; +} + +/** + * blk_queue_hardsect_size - set hardware sector size for the queue + * @q: the request queue for the device + * @size: the hardware sector size, in bytes + * + * Description: + * This should typically be set to the lowest possible sector size + * that the hardware can operate on (possible without reverting to + * even internal read-modify-write operations). Usually the default + * of 512 covers most hardware. + **/ +void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) +{ + q->hardsect_size = size; +} + +/* + * the standard queue merge functions, can be overridden with device + * specific ones if so desired + */ +static inline int ll_new_segment(request_queue_t *q, struct request *req) +{ + if (req->nr_segments < q->max_segments) { req->nr_segments++; return 1; } @@ -278,36 +277,65 @@ } static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (req->nr_sectors + bio_size(bio) > q->max_sectors) + return 0; + if (BIO_CONTIG(req->biotail, bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (req->nr_sectors + bio_size(bio) > q->max_sectors) + return 0; + if (BIO_CONTIG(bio, req->bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) + struct request *next) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (BIO_CONTIG(req->biotail, next->bio)) total_segments--; - if (total_segments > max_segments) + if (total_segments > q->max_segments) return 0; req->nr_segments = total_segments; return 1; } +/** + * blk_wake_queue - restart a queue that wasn't fully emptied at request_fn time + * @q: The &request_queue_t in question + * + * Description: + * Sometimes hardware can run out of resources, so no more commands can + * be queued. If a driver breaks out of request_fn while there are still + * requests left on there to be serviced, it will be left in a state where + * it is still unplugged but not be recalled by the block layer. + * not be replugged, and thus request_fn will be run. Once a driver has + * freed enough resources to start queueing new requests again, it must + * call blk_wake_queue to start processing again. + **/ +void inline blk_wake_queue(request_queue_t *q) +{ +#if 1 + if (!blk_set_plugged(q)) + queue_task(&q->plug_tq, &tq_disk); +#else + q->request_fn(q); +#endif +} + /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests @@ -316,16 +344,12 @@ * This is called with interrupts off and no requests on the queue. * (and with the request spinlock acquired) */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) +static void blk_plug_device(request_queue_t *q) { - /* - * no need to replug device - */ - if (!list_empty(&q->queue_head) || q->plugged) + if (!list_empty(&q->queue_head)) return; - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); + blk_wake_queue(q); } /* @@ -333,24 +357,91 @@ */ static inline void __generic_unplug_device(request_queue_t *q) { - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) - q->request_fn(q); - } + if (blk_set_unplugged(q) && !list_empty(&q->queue_head)) + q->request_fn(q); } +/** + * generic_unplug_device - fire a request queue + * @q: The &request_queue_t in question + * + * Description: + * Linux uses plugging to build bigger requests queues before letting + * the device have at them. If a queue is plugged, the I/O scheduler + * is still adding and merging requests on the queue. Once the queue + * gets unplugged (either by manually calling this function, or by + * running the tq_disk task queue), the request_fn defined for the + * queue is invoked and transfers started. + **/ void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } -static void blk_init_free_list(request_queue_t *q) +static int __blk_cleanup_rqlist(struct list_head *head) +{ + struct request *rq; + int i = 0; + + if (list_empty(head)) + return 0; + + do { + rq = list_entry(head->next, struct request, queuelist); + list_del(&rq->queuelist); + kmem_cache_free(request_cachep, rq); + i++; + } while (!list_empty(head)); + + return i; +} + +static int __blk_cleanup_queue(request_queue_t *q) +{ + int count; + + count = __blk_cleanup_rqlist(&q->request_freelist[READ]); + count += __blk_cleanup_rqlist(&q->request_freelist[WRITE]); + count += __blk_cleanup_rqlist(&q->pending_freelist[READ]); + count += __blk_cleanup_rqlist(&q->pending_freelist[WRITE]); + + return count; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(q); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + bio_hash_cleanup(&q->queue_hash); + + memset(q, 0, sizeof(*q)); +} + +static int blk_init_free_list(request_queue_t *q) { struct request *rq; int i; @@ -366,21 +457,27 @@ */ for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); - if (rq == NULL) { - /* We'll get a `leaked requests' message from blk_cleanup_queue */ - printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); - break; - } + if (!rq) + goto nomem; + memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->table, &q->request_freelist[i & 1]); + if (i < queue_nr_requests >> 1) + list_add(&rq->queuelist, &q->request_freelist[READ]); + else + list_add(&rq->queuelist, &q->request_freelist[WRITE]); } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->wait_for_request[READ]); + init_waitqueue_head(&q->wait_for_request[WRITE]); spin_lock_init(&q->queue_lock); + return 0; +nomem: + __blk_cleanup_queue(q); + return 1; } -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); +static int __make_request(request_queue_t *, struct bio *); /** * blk_init_queue - prepare a request queue for use with a block device @@ -403,8 +500,8 @@ * requests on the queue, it is responsible for arranging that the requests * get dealt with eventually. * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. + * The queue spin lock must be held while manipulating the requests on the + * request queue. * * The request on the head of the queue is by default assumed to be * potentially active, and it is not considered for re-ordering or merging @@ -415,33 +512,49 @@ * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +int blk_init_queue(request_queue_t * q, request_fn_proc * rfn) { + int ret = -ENOMEM; + INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); + + if (blk_init_free_list(q)) + goto out_err; + + if (bio_hash_init(&q->queue_hash, queue_nr_requests >> 2)) + goto cleanup_queue; + + if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS))) + goto cleanup_hash; + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->plugged = 0; + /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. + * by default assume old behaviour and bounce for any highmem page */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); + + blk_queue_make_request(q, __make_request); + blk_set_unplugged(q); + blk_mark_headactive(q); + return 0; +cleanup_hash: + bio_hash_cleanup(&q->queue_hash); +cleanup_queue: + blk_cleanup_queue(q); +out_err: + return ret; } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, table); +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* - * Get a free request. io_request_lock must be held and interrupts + * Get a free request. queue lock must be held and interrupts * disabled on the way in. */ static inline struct request *get_request(request_queue_t *q, int rw) @@ -450,7 +563,7 @@ if (!list_empty(&q->request_freelist[rw])) { rq = blkdev_free_rq(&q->request_freelist[rw]); - list_del(&rq->table); + list_del(&rq->queuelist); rq->rq_status = RQ_ACTIVE; rq->special = NULL; rq->q = q; @@ -467,34 +580,24 @@ register struct request *rq; DECLARE_WAITQUEUE(wait, current); - add_wait_queue_exclusive(&q->wait_for_request, &wait); + spin_lock_prefetch(&q->queue_lock); + + add_wait_queue_exclusive(&q->wait_for_request[rw], &wait); for (;;) { __set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (rq) break; generic_unplug_device(q); schedule(); } - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->wait_for_request[rw], &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -543,7 +646,7 @@ /* * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the + * queue lock is held and interrupts disabled, as we muck with the * request queue list. * * By this point, req->cmd is always either READ/WRITE, never READA, @@ -552,18 +655,19 @@ static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { + elevator_t *e = &q->elevator; + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); + if (!blk_queue_plugged(q) && blk_queue_headlive(q) + && insert_here == &q->queue_head) BUG(); - } /* * elevator indicated where it wants this request to be * inserted at elevator_merge time */ - list_add(&req->queue, insert_here); + e->elevator_add_req_fn(q, req, insert_here); } inline void blk_refill_freelist(request_queue_t *q, int rw) @@ -576,7 +680,7 @@ } /* - * Must be called with io_request_lock held and interrupts disabled + * Must be called with queue lock held and interrupts disabled */ inline void blkdev_release_request(struct request *req) { @@ -601,12 +705,12 @@ /* * Add to pending free list and batch wakeups */ - list_add(&req->table, &q->pending_freelist[rw]); + list_add(&req->queuelist, &q->pending_freelist[rw]); if (++q->pending_free[rw] >= batch_requests) { int wake_up = q->pending_free[rw]; blk_refill_freelist(q, rw); - wake_up_nr(&q->wait_for_request, wake_up); + wake_up_nr(&q->wait_for_request[rw], wake_up); } } } @@ -614,10 +718,7 @@ /* * Has to be called with the request spinlock acquired */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static void attempt_merge(request_queue_t *q, struct request *req) { struct request *next; @@ -626,8 +727,8 @@ return; if (req->cmd != next->cmd || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) + || req->nr_sectors + next->nr_sectors > q->max_sectors + || next->waiting || next->special) return; /* * If we are not allowed to merge these requests, then @@ -635,135 +736,135 @@ * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; + if (q->merge_requests_fn(q, req, next)) { + q->elevator.elevator_merge_req_fn(req, next); + req->biotail->bi_next = next->bio; + req->biotail = next->biotail; + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + blkdev_dequeue_request(next); + blkdev_release_request(next); + } +} - q->elevator.elevator_merge_req_fn(req, next); - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - blkdev_release_request(next); +static inline void attempt_back_merge(request_queue_t *q, struct request *rq) +{ + if (&rq->queuelist != q->queue_head.prev) + attempt_merge(q, rq); } -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_front_merge(request_queue_t *q, + struct list_head *head, + struct request *rq) { - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); + struct list_head *prev = rq->queuelist.prev; + + if (prev != head) + attempt_merge(q, blkdev_entry_to_request(prev)); } -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) +static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + if (rq->queuelist.next != &q->queue_head) + attempt_merge(q, rq); +} +/** + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device + * @rq: The head request (usually) + * + * Description: + * For head-active devices, the queue can easily be unplugged so quickly + * that proper merging is not done on the front request. This may hurt + * performance greatly for some devices. The block layer cannot safely + * do merging on that first request for these queues, but the driver can + * call this function and make it happen any way. Only the driver knows + * when it is safe to do so. + **/ +void blk_attempt_remerge(request_queue_t *q, struct request *rq) { - struct list_head * prev; + unsigned long flags; - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); + spin_lock_irqsave(&q->queue_lock, flags); + __blk_attempt_remerge(q, rq); + spin_unlock_irqrestore(&q->queue_lock, flags); } -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) +static int __make_request(request_queue_t *q, struct bio *bio) { - unsigned int sector, count; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; + struct request *req, *freereq = NULL; + int el_ret, latency = 0, rw, count; struct list_head *head, *insert_here; - int latency; elevator_t *elevator = &q->elevator; + sector_t sector; - count = bh->b_size >> 9; - sector = bh->b_rsector; + sector = bio->bi_sector; + count = bio_sectors(bio); + rw = bio_rw(bio); - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: - rw_ahead = 1; - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } - - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); - - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. - */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + latency = elevator_request_latency(elevator, rw); -/* look for a free request. */ /* - * Try to coalesce the new request with old requests + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even + * ISA dma in theory) */ - max_sectors = get_max_sectors(bh->b_rdev); + bio = blk_queue_bounce(q, bio); again: + spin_lock_prefetch(&q->queue_lock); req = NULL; head = &q->queue_head; + + spin_lock_irq(&q->queue_lock); + /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic + * barrier write must not be passed - so insert with 0 latency + * and invalidate the entire existing merge hash */ - spin_lock_irq(&io_request_lock); + if ((bio->bi_flags & BIO_BARRIER) && !freereq) { + latency = 0; + __bio_hash_inval(&q->queue_hash); + } insert_here = head->prev; if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + blk_plug_device(q); goto get_rq; - } else if (q->head_active && !q->plugged) + } else if (blk_queue_headlive(q) && !blk_queue_plugged(q)) head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + el_ret = elevator->elevator_merge_fn(q, &req, head, bio); switch (el_ret) { - case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) + if (!q->back_merge_fn(q, req, bio)) break; elevator->elevator_merge_cleanup_fn(q, req, count); - req->bhtail->b_reqnext = bh; - req->bhtail = bh; + req->biotail->bi_next = bio; + req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_back_merge(q, req, max_sectors, max_segments); + attempt_back_merge(q, req); goto out; case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) + if (!q->front_merge_fn(q, req, bio)) break; elevator->elevator_merge_cleanup_fn(q, req, count); - bh->b_reqnext = req->bh; - req->bh = bh; - req->buffer = bh->b_data; - req->current_nr_sectors = count; + bio->bi_next = req->bio; + req->bio = bio; + /* + * may not be valid. if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); + req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_front_merge(q, head, req, max_sectors, max_segments); + attempt_front_merge(q, head, req); goto out; /* @@ -776,7 +877,7 @@ * of the queue */ if (req) - insert_here = &req->queue; + insert_here = &req->queuelist; break; default: @@ -794,107 +895,140 @@ req = freereq; freereq = NULL; } else if ((req = get_request(q, rw)) == NULL) { - spin_unlock_irq(&io_request_lock); - if (rw_ahead) + spin_unlock_irq(&q->queue_lock); + if (bio->bi_flags & BIO_RW_AHEAD) { + bio->bi_flags |= BIO_RW_BLOCK; goto end_io; + } freereq = __get_request_wait(q, rw); goto again; } + bio->bi_req = req; + /* fill up the request-info, and add it to the queue */ req->elevator_sequence = latency; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; + req->current_nr_sectors = req->hard_cur_sectors = count; req->nr_segments = 1; /* Always 1 for a new request. */ req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; + req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; + req->bio = req->biotail = bio; + req->rq_dev = bio->bi_dev; blk_started_io(count); add_request(q, req, insert_here); out: if (freereq) blkdev_release_request(freereq); - spin_unlock_irq(&io_request_lock); + if (__bio_hash_add_unique(&q->queue_hash, bio)) + printk("ll_rw_blk: %lu for %s already there\n", bio->bi_sector, kdevname(bio->bi_dev)); + spin_unlock_irq(&q->queue_lock); return 0; end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + bio->bi_end_io(bio); return 0; } + +/* + * If bio->bi_dev is a partition, remap the location + */ +static inline void blk_partition_remap(struct bio *bio) +{ + int major, minor, drive, minor0; + struct gendisk *g; + kdev_t dev0; + + major = MAJOR(bio->bi_dev); + if ((g = blk_gendisk[major])) { + minor = MINOR(bio->bi_dev); + drive = (minor >> g->minor_shift); + minor0 = (drive << g->minor_shift); /* whole disk device */ + /* that is, minor0 = (minor & ~((1<minor_shift)-1)); */ + dev0 = MKDEV(major, minor0); + if (dev0 != bio->bi_dev) { + bio->bi_dev = dev0; + bio->bi_sector += g->part[minor].start_sect; + } + /* lots of checks are possible */ + } +} + /** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. + * generic_make_request: hand a buffer to it's device driver for I/O + * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. * * generic_make_request() does not return any status. The * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io + * completion, is delivered asynchronously through the bio->bi_end_io * function described (one day) else where. * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + & set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may change bi_dev and + * bi_rsector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. * */ -void generic_make_request (int rw, struct buffer_head * bh) +void generic_make_request(struct bio *bio) { - int major = MAJOR(bh->b_rdev); - int minorsize = 0; + int major = MAJOR(bio->bi_dev); + int minor = MINOR(bio->bi_dev); request_queue_t *q; + int rw = bio_rw(bio); + sector_t minorsize = 0; - if (!bh->b_end_io) - BUG(); + /* + * don't lock any more buffers if we are above the high + * water mark. instead start I/O on the queued stuff. + */ + if (atomic_read(&queued_sectors) >= high_queued_sectors) { + if (bio->bi_flags & BIO_RW_AHEAD) { + bio->bi_flags |= BIO_RW_BLOCK; + goto end_io; + } + run_task_queue(&tq_disk); + wait_event(blk_buffers_wait, + atomic_read(&queued_sectors) < low_queued_sectors); + } - /* Test device size, when known. */ + /* Test device or partition size, when known. */ if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; + minorsize = blk_size[major][minor]; if (minorsize) { unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; + unsigned long sector = bio->bi_sector; + unsigned int count = bio_sectors(bio); if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - /* Yecch again */ - bh->b_end_io(bh, 0); - return; + if (blk_size[major][minor]) { + + /* This may well happen - the kernel calls + * bread() without checking the size of the + * device, e.g., when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%Lu\n", + kdevname(bio->bi_dev), rw, + (sector + count)>>1, + (u64) blk_size[major][minor]); + } + bio->bi_flags |= BIO_EOF; + goto end_io; } } @@ -902,63 +1036,124 @@ * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. + * + * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + q = blk_get_queue(bio->bi_dev); if (!q) { printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); + "generic_make_request: Trying to access nonexistent block-device %s (%Lu)\n", + kdevname(bio->bi_dev), (u64) bio->bi_sector); +end_io: + bio->bi_end_io(bio); break; } - } while (q->make_request_fn(q, rw, bh)); + + /* + * just a reminder, will be changed of course + */ + if (q->queue_state != Queue_up) + printk("ll_rw_blk: request for downed queue\n"); + + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); + + } while (q->make_request_fn(q, bio)); } +/* + * our default bio end_io callback handler for a buffer_head mapping. it's + * pretty simple, because no bio will ever contain more than one bio_vec + */ +static void end_bio_bh_io_sync(struct bio *bio) +{ + struct buffer_head *bh = bio->bi_private; + + bh->b_end_io(bh, bio->bi_flags & BIO_UPTODATE); + bio_put(bio); +} /** - * submit_bh: submit a buffer_head to the block device later for I/O + * submit_bio: submit a bio to the block device layer for I/O * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O + * @bio: The &struct bio which describes the I/O * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. + * submit_bio() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. Both are fairly rough + * interfaces, @bio must be presetup and ready for I/O. * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. */ +void submit_bio(int rw, struct bio *bio) +{ + int count = bio_sectors(bio); + + /* + * do some validity checks... + */ + if (!bio->bi_end_io) + BUG(); + if (bio_size(bio) > PAGE_SIZE) { + printk("bio: invalid size %d\n", bio_size(bio)); + BUG(); + } else if ((bio_offset(bio) + bio_size(bio)) > PAGE_SIZE) { + printk("bio: size/off %d/%d\n", bio_size(bio), bio_offset(bio)); + BUG(); + } + + if (rw & WRITE) { + kstat.pgpgout += count; + bio->bi_flags |= BIO_WRITE; + } else { + kstat.pgpgin += count; + bio->bi_flags |= BIO_READ; + if (rw == READA) + bio->bi_flags |= BIO_RW_AHEAD; + } + + generic_make_request(bio); +} + +/** + * submit_bh: submit a buffer_head to the block device layer for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + **/ void submit_bh(int rw, struct buffer_head * bh) { - int count = bh->b_size >> 9; + struct bio *bio; if (!test_bit(BH_Lock, &bh->b_state)) BUG(); + if (!buffer_mapped(bh)) + BUG(); + if (!bh->b_end_io) + BUG(); set_bit(BH_Req, &bh->b_state); /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. + * from here on down, it's all bio -- do the initial mapping, + * submit_bio -> generic_make_request may further map this bio around */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; + bio = bio_alloc(GFP_NOIO); - generic_make_request(rw, bh); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_dev = bh->b_dev; + bio->bi_next = NULL; + bio->bi_private = bh; + bio->bi_end_io = end_bio_bh_io_sync; + + bio->bi_io_vec.bv_page = bh->b_page; + bio->bi_io_vec.bv_len = bh->b_size; + bio->bi_io_vec.bv_offset = bh_offset(bh); - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } + submit_bio(rw, bio); } /** @@ -990,8 +1185,9 @@ * * Caveat: * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - + * a multiple of the current approved size for the device. + * + **/ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) { unsigned int major; @@ -1014,7 +1210,7 @@ /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { + if (bh->b_size & (correct_size - 1)) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1032,16 +1228,6 @@ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - /* - * don't lock any more buffers if we are above the high - * water mark. instead start I/O on the queued stuff. - */ - if (atomic_read(&queued_sectors) >= high_queued_sectors) { - run_task_queue(&tq_disk); - wait_event(blk_buffers_wait, - atomic_read(&queued_sectors) < low_queued_sectors); - } - /* Only one thread can actually submit the I/O. */ if (test_and_set_bit(BH_Lock, &bh->b_state)) continue; @@ -1086,11 +1272,47 @@ #endif +inline int __end_that_request_first(struct request *req, int uptodate) +{ + struct bio *bio; + int nsect; + + req->errors = 0; + if (!uptodate) + printk("end_request: I/O error, dev %s, sector %lu\n", + kdevname(req->rq_dev), req->sector); + + if ((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); + blk_finished_io(nsect); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio->bi_req = NULL; + bio_endio(bio, uptodate); + if ((bio = req->bio) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + + req->current_nr_sectors = bio_sectors(bio); + req->hard_cur_sectors = req->current_nr_sectors; + if (req->nr_sectors < req->current_nr_sectors) { + req->nr_sectors = req->current_nr_sectors; + printk("end_request: buffer-list destroyed\n"); + } + req->buffer = bio_data(bio); + return 1; + } + } + return 0; +} + /** * end_that_request_first - end I/O on one buffer. + * &q: queue that finished request * @req: the request being processed * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error * * Description: * Ends I/O on the first buffer attached to @req, and sets it up @@ -1105,43 +1327,21 @@ * blk_finished_io() appropriately. **/ -int end_that_request_first (struct request *req, int uptodate, char *name) +int end_that_request_first(request_queue_t *q, struct request *rq, int uptodate) { - struct buffer_head * bh; - int nsect; + unsigned long flags; + int ret; - req->errors = 0; - if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); + spin_lock_irqsave(&q->queue_lock, flags); + ret = __end_that_request_first(rq, uptodate); + spin_unlock_irqrestore(&q->queue_lock, flags); - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("end_request: buffer-list destroyed\n"); - } - req->buffer = bh->b_data; - return 1; - } - } - return 0; + return ret; } void end_that_request_last(struct request *req) { - if (req->waiting != NULL) + if (req->waiting) complete(req->waiting); blkdev_release_request(req); @@ -1166,7 +1366,6 @@ memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); atomic_set(&queued_sectors, 0); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); @@ -1205,123 +1404,37 @@ low_queued_sectors / 2, queue_nr_requests); -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_BLK_DEV_RAM - rd_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif + blk_max_low_pfn = max_low_pfn; + #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) ide_init(); /* this MUST precede hd_init */ #endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) hd_init(); #endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else #if defined(__i386__) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif return 0; }; -EXPORT_SYMBOL(io_request_lock); EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(__end_that_request_first); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); -EXPORT_SYMBOL(__blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(blk_queue_bounce_limit); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); EXPORT_SYMBOL(queued_sectors); +EXPORT_SYMBOL(blk_wake_queue); +EXPORT_SYMBOL(blk_attempt_remerge); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_queue_max_sectors); +EXPORT_SYMBOL(blk_queue_max_segments); +EXPORT_SYMBOL(blk_queue_hardsect_size); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.4.7/drivers/block/loop.c Sat Jun 30 01:16:56 2001 +++ linux/drivers/block/loop.c Wed Jan 1 00:07:23 1997 @@ -75,8 +75,8 @@ #define MAJOR_NR LOOP_MAJOR static int max_loop = 8; -static struct loop_device *loop_dev; -static int *loop_sizes; +static struct loop_device *loop_dev, **loop_lookup; +static sector_t *loop_sizes; static int *loop_blksizes; static devfs_handle_t devfs_handle; /* For the directory */ @@ -86,10 +86,12 @@ static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf, char *loop_buf, int size, int real_block) { - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); + if (raw_buf != loop_buf) { + if (cmd == READ) + memcpy(loop_buf, raw_buf, size); + else + memcpy(raw_buf, loop_buf, size); + } return 0; } @@ -117,6 +119,7 @@ static int none_status(struct loop_device *lo, struct loop_info *info) { + lo->lo_flags |= LO_FLAGS_BH_REMAP; return 0; } @@ -164,8 +167,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -178,8 +180,8 @@ index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; + len = bio_size(bio); + data = bio_data(bio); while (len > 0) { int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; size = PAGE_CACHE_SIZE - offset; @@ -251,18 +253,17 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; struct file *file; cookie.lo = lo; - cookie.data = bh->b_data; + cookie.data = bio_data(bio); cookie.bsize = bsize; desc.written = 0; - desc.count = bh->b_size; + desc.count = bio_size(bio); desc.buf = (char*)&cookie; desc.error = 0; spin_lock_irq(&lo->lo_lock); @@ -298,42 +299,46 @@ return IV; } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) { loff_t pos; int ret; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + if (bio->bi_flags & BIO_WRITE) + ret = lo_send(lo, bio, loop_get_bs(lo), pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bio, loop_get_bs(lo), pos); return ret; } -static void loop_put_buffer(struct buffer_head *bh) +static void loop_end_io_transfer(struct bio *); +static void loop_put_buffer(struct bio *bio) { - if (bh) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + /* + * check bi_end_io, may just be a remapped bio + */ + if (bio && bio->bi_end_io == loop_end_io_transfer) { + __free_page(bio_page(bio)); + bio_put(bio); } } /* - * Add buffer_head to back of pending list + * Add bio to back of pending list */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_add_bio(struct loop_device *lo, struct bio *bio) { unsigned long flags; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; } else - lo->lo_bh = lo->lo_bhtail = bh; + lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); up(&lo->lo_bh_mutex); @@ -342,65 +347,56 @@ /* * Grab first pending buffer */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static struct bio *loop_get_bio(struct loop_device *lo) { - struct buffer_head *bh; + struct bio *bio; spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; + if ((bio = lo->lo_bio)) { + if (bio == lo->lo_biotail) + lo->lo_biotail = NULL; + lo->lo_bio = bio->bi_next; + bio->bi_next = NULL; } spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } /* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs + * if this was a WRITE lo->transfer stuff has already been done. for READs, + * queue it for the loop thread and let it do the transfer out of + * bi_end_io context (we don't want to do decrypt of a page with irqs * disabled) */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static void loop_end_io_transfer(struct bio *bio) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct loop_device *lo = loop_lookup[MINOR(bio->bi_dev)]; - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + if (bio->bi_flags & (BIO_UPTODATE | BIO_WRITE)) { + struct bio *rbh = bio->bi_private; - rbh->b_end_io(rbh, uptodate); + bio_endio(rbh, bio->bi_flags & BIO_UPTODATE); if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + loop_put_buffer(bio); } else - loop_add_bh(lo, bh); + loop_add_bio(lo, bio); } -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct buffer_head *bh; + struct bio *bio; - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; + /* + * for xfer_funcs that can operate on the same bh, do that + */ + if (lo->lo_flags & LO_FLAGS_BH_REMAP) { + bio = rbh; + goto out_bh; + } - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); - - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; - spin_unlock_irq(&lo->lo_lock); - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bio = bio_alloc(GFP_NOIO); /* * easy way out, although it does waste some memory for < PAGE_SIZE @@ -408,66 +404,61 @@ * so can we :-) */ do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) + bio->bi_io_vec.bv_page = alloc_page(GFP_NOIO); + if (bio->bi_io_vec.bv_page) break; run_task_queue(&tq_disk); schedule_timeout(HZ); } while (1); - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); - init_waitqueue_head(&bh->b_wait); + bio->bi_io_vec.bv_len = bio_size(rbh); + bio->bi_io_vec.bv_offset = bio_offset(rbh); + + bio->bi_end_io = loop_end_io_transfer; + bio->bi_private = rbh; + +out_bh: + bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9); + bio->bi_flags |= rbh->bi_flags & BIO_RW_MASK; + spin_lock_irq(&lo->lo_lock); + bio->bi_dev = lo->lo_device; + spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } -static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) +static int loop_make_request(request_queue_t *q, struct bio *rbh) { - struct buffer_head *bh = NULL; + struct bio *bh = NULL; struct loop_device *lo; unsigned long IV; - if (!buffer_locked(rbh)) - BUG(); - - if (MINOR(rbh->b_rdev) >= max_loop) + if (MINOR(rbh->bi_dev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = &loop_dev[MINOR(rbh->bi_dev)]; spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; atomic_inc(&lo->lo_pending); spin_unlock_irq(&lo->lo_lock); - if (rw == WRITE) { + if (rbh->bi_flags & BIO_WRITE) { if (lo->lo_flags & LO_FLAGS_READ_ONLY) goto err; - } else if (rw == READA) { - rw = READ; - } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + } else if (!(rbh->bi_flags & BIO_READ)) { + printk(KERN_ERR "loop: unknown command (%lx)\n", rbh->bi_flags); goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + rbh = blk_queue_bounce(q, rbh); /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_bio(lo, rbh); return 0; } @@ -475,16 +466,14 @@ * piggy old buffer on original, and submit for I/O */ bh = loop_get_buffer(lo, rbh); - bh->b_private = rbh; - IV = loop_get_iv(lo, bh->b_rsector); - if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + IV = loop_get_iv(lo, rbh->bi_sector); + if (rbh->bi_flags & BIO_WRITE) { + if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh), + bio_size(bh), IV)) goto err; } - generic_make_request(rw, bh); + generic_make_request(bh); return 0; err: @@ -492,14 +481,14 @@ up(&lo->lo_bh_mutex); loop_put_buffer(bh); out: - buffer_IO_error(rbh); + bio_io_error(rbh); return 0; inactive: spin_unlock_irq(&lo->lo_lock); goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { int ret; @@ -507,19 +496,17 @@ * For block backed loop, we know this is a READ */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); + ret = do_bio_filebacked(lo, bio); + bio_endio(bio, !ret); } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + struct bio *rbh = bio->bi_private; + unsigned long IV = loop_get_iv(lo, rbh->bi_sector); - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); + ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh), + bio_size(bio), IV); - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); + bio_endio(rbh, !ret); + loop_put_buffer(bio); } } @@ -532,7 +519,7 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct bio *bio; daemonize(); exit_files(current); @@ -566,12 +553,12 @@ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + bio = loop_get_bio(lo); + if (!bio) { + printk("loop: missing bio\n"); continue; } - loop_handle_bh(lo, bh); + loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending @@ -600,7 +587,7 @@ error = -EBUSY; if (lo->lo_state != Lo_unbound) goto out; - + error = -EBADF; file = fget(arg); if (!file) @@ -620,7 +607,6 @@ * If we can't read - sorry. If we only can't write - well, * it's going to be read-only. */ - error = -EINVAL; if (!aops->readpage) goto out_putf; @@ -649,6 +635,7 @@ figure_loop_size(lo); lo->old_gfp_mask = inode->i_mapping->gfp_mask; inode->i_mapping->gfp_mask = GFP_NOIO; + loop_lookup[MINOR(lo_device)] = lo; bs = 0; if (blksize_size[MAJOR(lo_device)]) @@ -658,7 +645,7 @@ set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; + lo->lo_bio = lo->lo_biotail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -852,7 +839,7 @@ err = -EINVAL; break; } - err = put_user(loop_sizes[lo->lo_number] << 1, (long *) arg); + err = put_user(loop_sizes[lo->lo_number] << 1, (sector_t *)arg); break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; @@ -983,13 +970,17 @@ if (!loop_dev) return -ENOMEM; - loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); + loop_lookup = kmalloc(max_loop*sizeof(struct loop_device *),GFP_KERNEL); + if (!loop_lookup) + goto out_mem; + + loop_sizes = kmalloc(max_loop * sizeof(sector_t), GFP_KERNEL); if (!loop_sizes) - goto out_sizes; + goto out_mem; loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_blksizes) - goto out_blksizes; + goto out_mem; blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); @@ -1003,8 +994,9 @@ spin_lock_init(&lo->lo_lock); } - memset(loop_sizes, 0, max_loop * sizeof(int)); + memset(loop_sizes, 0, max_loop * sizeof(sector_t)); memset(loop_blksizes, 0, max_loop * sizeof(int)); + memset(loop_lookup, 0, max_loop * sizeof(struct loop_device *)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; for (i = 0; i < max_loop; i++) @@ -1013,9 +1005,9 @@ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: +out_mem: kfree(loop_dev); -out_blksizes: + kfree(loop_lookup); kfree(loop_sizes); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; @@ -1028,6 +1020,7 @@ printk(KERN_WARNING "loop: cannot unregister blkdev\n"); kfree(loop_dev); + kfree(loop_lookup); kfree(loop_sizes); kfree(loop_blksizes); } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/nbd.c linux/drivers/block/nbd.c --- /opt/kernel/linux-2.4.7/drivers/block/nbd.c Sat Jun 30 01:15:41 2001 +++ linux/drivers/block/nbd.c Wed Jan 1 00:07:23 1997 @@ -56,7 +56,7 @@ static int nbd_blksizes[MAX_NBD]; static int nbd_blksize_bits[MAX_NBD]; -static int nbd_sizes[MAX_NBD]; +static sector_t nbd_sizes[MAX_NBD]; static u64 nbd_bytesizes[MAX_NBD]; static struct nbd_device nbd_dev[MAX_NBD]; @@ -166,14 +166,14 @@ FAIL("Sendmsg failed for control."); if (req->cmd == WRITE) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE); + result = nbd_xmit(1, sock, bio_data(bio), bio_size(bio), bio->bi_next == NULL ? 0 : MSG_MORE); if (result <= 0) FAIL("Send data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } return; @@ -206,14 +206,14 @@ if (ntohl(reply.error)) FAIL("Other side returned error."); if (req->cmd == READ) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL); + result = nbd_xmit(0, lo->sock, bio_data(bio), bio_size(bio), MSG_WAITALL); if (result <= 0) HARDFAIL("Recv data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } DEBUG("done.\n"); return req; @@ -251,7 +251,7 @@ goto out; } #endif - list_del(&req->queue); + blkdev_dequeue_request(req); up (&lo->queue_lock); nbd_end_request(req); @@ -286,7 +286,7 @@ } #endif req->errors++; - list_del(&req->queue); + blkdev_dequeue_request(req); up(&lo->queue_lock); nbd_end_request(req); @@ -334,22 +334,22 @@ #endif req->errors = 0; blkdev_dequeue_request(req); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down (&lo->queue_lock); - list_add(&req->queue, &lo->queue_head); + list_add(&req->queuelist, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; error_out: req->errors++; blkdev_dequeue_request(req); - spin_unlock(&io_request_lock); + spin_unlock(&q->queue_lock); nbd_end_request(req); - spin_lock(&io_request_lock); + spin_lock(&q->queue_lock); } return; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c --- /opt/kernel/linux-2.4.7/drivers/block/paride/pd.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/block/paride/pd.c Tue Jul 24 15:04:44 2001 @@ -329,7 +329,6 @@ static int pd_cmd; /* current command READ/WRITE */ static int pd_unit; /* unit of current request */ static int pd_dev; /* minor of current request */ -static int pd_poffs; /* partition offset of current minor */ static char * pd_buf; /* buffer for request in progress */ static DECLARE_WAIT_QUEUE_HEAD(pd_wait_open); @@ -455,8 +454,7 @@ pd_gendisk.major = major; pd_gendisk.major_name = name; - pd_gendisk.next = gendisk_head; - gendisk_head = &pd_gendisk; + add_gendisk(&pd_gendisk); for(i=0;ii_rdev)) return -EINVAL; - dev = MINOR(inode->i_rdev); + if (!inode || !inode->i_rdev) + return -EINVAL; unit = DEVICE_NR(inode->i_rdev); - if (dev >= PD_DEVS) return -EINVAL; - if (!PD.present) return -ENODEV; + if (!PD.present) + return -ENODEV; - switch (cmd) { + switch (cmd) { case CDROMEJECT: if (PD.access == 1) pd_eject(unit); return 0; - case HDIO_GETGEO: - if (!geo) return -EINVAL; - err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); - if (err) return err; + case HDIO_GETGEO: + if (!geo) return -EINVAL; + err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); + if (err) return err; if (PD.alt_geom) { - put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), + put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), (short *) &geo->cylinders); - put_user(PD_LOG_HEADS, (char *) &geo->heads); - put_user(PD_LOG_SECTS, (char *) &geo->sectors); + put_user(PD_LOG_HEADS, (char *) &geo->heads); + put_user(PD_LOG_SECTS, (char *) &geo->sectors); } else { - put_user(PD.cylinders, (short *) &geo->cylinders); - put_user(PD.heads, (char *) &geo->heads); - put_user(PD.sectors, (char *) &geo->sectors); + put_user(PD.cylinders, (short *) &geo->cylinders); + put_user(PD.heads, (char *) &geo->heads); + put_user(PD.sectors, (char *) &geo->sectors); } - put_user(pd_hd[dev].start_sect,(long *)&geo->start); - return 0; - case BLKGETSIZE: - if (!arg) return -EINVAL; - err = verify_area(VERIFY_WRITE,(long *) arg,sizeof(long)); - if (err) return (err); - put_user(pd_hd[dev].nr_sects,(long *) arg); - return (0); - case BLKRRPART: + put_user(get_start_sect(inode->i_rdev), (long *)&geo->start); + return 0; + case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return pd_revalidate(inode->i_rdev); + return pd_revalidate(inode->i_rdev); + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -546,9 +539,9 @@ case BLKFLSBUF: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); - default: - return -EINVAL; - } + default: + return -EINVAL; + } } static int pd_release (struct inode *inode, struct file *file) @@ -586,36 +579,32 @@ } static int pd_revalidate(kdev_t dev) +{ + int unit, res; + long flags; -{ int p, unit, minor; - long flags; - - unit = DEVICE_NR(dev); - if ((unit >= PD_UNITS) || (!PD.present)) return -ENODEV; - - save_flags(flags); - cli(); - if (PD.access > 1) { - restore_flags(flags); - return -EBUSY; - } - pd_valid = 0; - restore_flags(flags); + unit = DEVICE_NR(dev); + if ((unit >= PD_UNITS) || !PD.present) + return -ENODEV; - for (p=(PD_PARTNS-1);p>=0;p--) { - minor = p + unit*PD_PARTNS; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - pd_hd[minor].start_sect = 0; - pd_hd[minor].nr_sects = 0; - } + save_flags(flags); + cli(); + if (PD.access > 1) { + restore_flags(flags); + return -EBUSY; + } + pd_valid = 0; + restore_flags(flags); - if (pd_identify(unit)) - grok_partitions(&pd_gendisk,unit,1<next)) - if (*gdp == &pd_gendisk) break; - if (*gdp) *gdp = (*gdp)->next; + devfs_unregister_blkdev(MAJOR_NR, name); + del_gendisk(&pd_gendisk); - for (unit=0;unitcmd; - pd_poffs = pd_hd[pd_dev].start_sect; - pd_block += pd_poffs; pd_buf = CURRENT->buffer; pd_retries = 0; @@ -963,7 +947,7 @@ (CURRENT->cmd != pd_cmd) || (MINOR(CURRENT->rq_dev) != pd_dev) || (CURRENT->rq_status == RQ_INACTIVE) || - (CURRENT->sector+pd_poffs != pd_block)) + (CURRENT->sector != pd_block)) printk("%s: OUCH: request list changed unexpectedly\n", PD.name); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c --- /opt/kernel/linux-2.4.7/drivers/block/paride/pf.c Sun Feb 4 19:05:29 2001 +++ linux/drivers/block/paride/pf.c Tue Jul 24 15:04:44 2001 @@ -463,7 +463,7 @@ if (PF.access == 1) { pf_eject(unit); return 0; - } + } case HDIO_GETGEO: if (!geo) return -EINVAL; err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); @@ -483,10 +483,7 @@ return 0; case BLKGETSIZE: if (!arg) return -EINVAL; - err = verify_area(VERIFY_WRITE,(long *) arg,sizeof(long)); - if (err) return (err); - put_user(PF.capacity,(long *) arg); - return (0); + return put_user(PF.capacity,(long *) arg); case BLKROSET: case BLKROGET: case BLKRASET: diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/ps2esdi.c linux/drivers/block/ps2esdi.c --- /opt/kernel/linux-2.4.7/drivers/block/ps2esdi.c Wed Jul 11 01:18:51 2001 +++ linux/drivers/block/ps2esdi.c Tue Jul 24 15:04:44 2001 @@ -65,6 +65,7 @@ #define TYPE_0_CMD_BLK_LENGTH 2 #define TYPE_1_CMD_BLK_LENGTH 4 +#define PS2ESDI_LOCK (&((BLK_DEFAULT_QUEUE(MAJOR_NR))->queue_lock)) static void reset_ctrl(void); @@ -115,9 +116,8 @@ static int no_int_yet; static int access_count[MAX_HD]; static char ps2esdi_valid[MAX_HD]; -static int ps2esdi_sizes[MAX_HD << 6]; +static sector_t ps2esdi_sizes[MAX_HD << 6]; static int ps2esdi_blocksizes[MAX_HD << 6]; -static int ps2esdi_maxsect[MAX_HD << 6]; static int ps2esdi_drives; static struct hd_struct ps2esdi[MAX_HD << 6]; static u_short io_base; @@ -183,9 +183,9 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ - /* some minor housekeeping - setup the global gendisk structure */ - ps2esdi_gendisk.next = gendisk_head; - gendisk_head = &ps2esdi_gendisk; + /* setup the global gendisk structure */ + add_gendisk(&ps2esdi_gendisk); + ps2esdi_geninit(); return 0; } /* ps2esdi_init */ @@ -221,18 +221,18 @@ } void -cleanup_module(void) -{ - if(ps2esdi_slot) - { +cleanup_module(void) { + if(ps2esdi_slot) { mca_mark_as_unused(ps2esdi_slot); mca_set_adapter_procfn(ps2esdi_slot, NULL, NULL); } release_region(io_base, 4); free_dma(dma_arb_level); - free_irq(PS2ESDI_IRQ, NULL) + free_irq(PS2ESDI_IRQ, NULL); devfs_unregister_blkdev(MAJOR_NR, "ed"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + del_gendisk(&ps2esdi_gendisk); + blk_clear(MAJOR_NR); } #endif /* MODULE */ @@ -415,16 +415,13 @@ ps2esdi_gendisk.nr_real = ps2esdi_drives; - /* 128 was old default, maybe maxsect=255 is ok too? - Paul G. */ - for (i = 0; i < (MAX_HD << 6); i++) { - ps2esdi_maxsect[i] = 128; + for (i = 0; i < (MAX_HD << 6); i++) ps2esdi_blocksizes[i] = 1024; - } request_dma(dma_arb_level, "ed"); request_region(io_base, 4, "ed"); blksize_size[MAJOR_NR] = ps2esdi_blocksizes; - max_sectors[MAJOR_NR] = ps2esdi_maxsect; + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 128); for (i = 0; i < ps2esdi_drives; i++) { register_disk(&ps2esdi_gendisk,MKDEV(MAJOR_NR,i<<6),1<<6, @@ -482,7 +479,7 @@ if (virt_to_bus(CURRENT->buffer + CURRENT->current_nr_sectors * 512) > 16 * MB) { printk("%s: DMA above 16MB not supported\n", DEVICE_NAME); - end_request(FAIL); + __end_request(CURRENT, FAIL); } /* check for above 16Mb dmas */ else if ((CURRENT_DEV < ps2esdi_drives) && (CURRENT->sector + CURRENT->current_nr_sectors <= @@ -495,13 +492,9 @@ CURRENT->current_nr_sectors); #endif - - block = CURRENT->sector + ps2esdi[MINOR(CURRENT->rq_dev)].start_sect; - -#if 0 - printk("%s: blocknumber : %d\n", DEVICE_NAME, block); -#endif + block = CURRENT->sector; count = CURRENT->current_nr_sectors; + switch (CURRENT->cmd) { case READ: ps2esdi_readwrite(READ, CURRENT_DEV, block, count); @@ -511,7 +504,7 @@ break; default: printk("%s: Unknown command\n", DEVICE_NAME); - end_request(FAIL); + __end_request(CURRENT, FAIL); break; } /* handle different commands */ } @@ -519,7 +512,7 @@ else { printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives, CURRENT->sector, ps2esdi[MINOR(CURRENT->rq_dev)].nr_sects); - end_request(FAIL); + __end_request(CURRENT, FAIL); } } /* main strategy routine */ @@ -584,7 +577,7 @@ if (ps2esdi_out_cmd_blk(cmd_blk)) { printk("%s: Controller failed\n", DEVICE_NAME); if ((++CURRENT->errors) >= MAX_RETRIES) - end_request(FAIL); + __end_request(CURRENT, FAIL); } /* check for failure to put out the command block */ else { @@ -958,10 +951,10 @@ break; } if(ending != -1) { - spin_lock_irqsave(&io_request_lock, flags); - end_request(ending); + spin_lock_irqsave(PS2ESDI_LOCK, flags); + __end_request(CURRENT, ending); do_ps2esdi_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(PS2ESDI_LOCK, flags); } } /* handle interrupts */ @@ -1100,20 +1093,10 @@ put_user(ps2esdi_info[dev].head, (char *) &geometry->heads); put_user(ps2esdi_info[dev].sect, (char *) &geometry->sectors); put_user(ps2esdi_info[dev].cyl, (short *) &geometry->cylinders); - put_user(ps2esdi[MINOR(inode->i_rdev)].start_sect, + put_user(get_start_sect(inode->i_rdev), (long *) &geometry->start); - return (0); - } - break; - - case BLKGETSIZE: - if (arg) { - if ((err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)))) - return (err); - put_user(ps2esdi[MINOR(inode->i_rdev)].nr_sects, (long *) arg); - - return (0); + return 0; } break; @@ -1122,6 +1105,7 @@ return -EACCES; return (ps2esdi_reread_partitions(inode->i_rdev)); + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -1138,8 +1122,7 @@ static int ps2esdi_reread_partitions(kdev_t dev) { int target = DEVICE_NR(dev); - int start = target << ps2esdi_gendisk.minor_shift; - int partition; + int res; cli(); ps2esdi_valid[target] = (access_count[target] != 1); @@ -1147,21 +1130,16 @@ if (ps2esdi_valid[target]) return (-EBUSY); - for (partition = ps2esdi_gendisk.max_p - 1; - partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - ps2esdi_gendisk.part[minor].start_sect = 0; - ps2esdi_gendisk.part[minor].nr_sects = 0; - } - - grok_partitions(&ps2esdi_gendisk, target, 1<<6, - ps2esdi_info[target].head * ps2esdi_info[target].cyl * ps2esdi_info[target].sect); - + res = wipe_partitions(dev); + if (res == 0) + grok_partitions(dev, ps2esdi_info[target].head + * ps2esdi_info[target].cyl + * ps2esdi_info[target].sect); + ps2esdi_valid[target] = 1; wake_up(&ps2esdi_wait_open); - return (0); + return (res); } static void ps2esdi_reset_timer(unsigned long unused) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/rd.c linux/drivers/block/rd.c --- /opt/kernel/linux-2.4.7/drivers/block/rd.c Mon Jul 16 01:15:44 2001 +++ linux/drivers/block/rd.c Wed Jan 1 00:07:23 1997 @@ -98,7 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ -static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static sector_t rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ static devfs_handle_t devfs_handle; static struct inode *rd_inode[NUM_RAMDISKS]; /* Protected device inodes */ @@ -194,22 +194,21 @@ * 19-JAN-1998 Richard Gooch Added devfs support * */ -static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +static int rd_make_request(request_queue_t * q, struct bio *sbh) { unsigned int minor; unsigned long offset, len; struct buffer_head *rbh; char *bdata; + int rw = bio_rw(sbh); - - minor = MINOR(sbh->b_rdev); + minor = MINOR(sbh->bi_dev); if (minor >= NUM_RAMDISKS) goto fail; - - offset = sbh->b_rsector << 9; - len = sbh->b_size; + offset = sbh->bi_sector << 9; + len = bio_size(sbh); if ((offset + len) > rd_length[minor]) goto fail; @@ -221,25 +220,26 @@ goto fail; } - rbh = getblk(sbh->b_rdev, sbh->b_rsector/(sbh->b_size>>9), sbh->b_size); + rbh = getblk(sbh->bi_dev,sbh->bi_sector/bio_sectors(sbh),bio_size(sbh)); /* I think that it is safe to assume that rbh is not in HighMem, though * sbh might be - NeilBrown */ - bdata = bh_kmap(sbh); - if (rw == READ) { - if (sbh != rbh) - memcpy(bdata, rbh->b_data, rbh->b_size); - } else - if (sbh != rbh) - memcpy(rbh->b_data, bdata, rbh->b_size); - bh_kunmap(sbh); + bdata = kmap(bio_page(sbh)); + + if (rw == READ) + memcpy(bdata, rbh->b_data, rbh->b_size); + else + memcpy(rbh->b_data, bdata, rbh->b_size); + + kunmap(bio_page(sbh)); + mark_buffer_protected(rbh); brelse(rbh); - sbh->b_end_io(sbh,1); + bio_endio(sbh, 1); return 0; fail: - sbh->b_end_io(sbh,0); + bio_io_error(sbh); return 0; } @@ -370,7 +370,8 @@ for (i = 0 ; i < NUM_RAMDISKS; i++) { if (rd_inode[i]) { - /* withdraw invalidate_buffers() and prune_icache() immunity */ + /* withdraw invalidate_buffers() and prune_icache() + immunity */ atomic_dec(&rd_inode[i]->i_bdev->bd_openers); /* remove stale pointer to module address space */ rd_inode[i]->i_bdev->bd_op = NULL; @@ -381,9 +382,7 @@ devfs_unregister (devfs_handle); unregister_blkdev( MAJOR_NR, "ramdisk" ); - hardsect_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); } #endif @@ -428,7 +427,6 @@ register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &fd_fops, rd_size<<1); #endif - hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/block/xd.c linux/drivers/block/xd.c --- /opt/kernel/linux-2.4.7/drivers/block/xd.c Fri May 25 00:14:08 2001 +++ linux/drivers/block/xd.c Tue Jul 24 15:04:44 2001 @@ -173,8 +173,7 @@ devfs_handle = devfs_mk_dir (NULL, xd_gendisk.major_name, NULL); blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ - xd_gendisk.next = gendisk_head; - gendisk_head = &xd_gendisk; + add_gendisk(&xd_gendisk); xd_geninit(); return 0; @@ -258,7 +257,6 @@ } xd_gendisk.nr_real = xd_drives; - } /* xd_open: open a device */ @@ -296,7 +294,7 @@ if (CURRENT_DEV < xd_drives && CURRENT->sector + CURRENT->nr_sectors <= xd_struct[MINOR(CURRENT->rq_dev)].nr_sects) { - block = CURRENT->sector + xd_struct[MINOR(CURRENT->rq_dev)].start_sect; + block = CURRENT->sector; count = CURRENT->nr_sectors; switch (CURRENT->cmd) { @@ -333,18 +331,16 @@ g.heads = xd_info[dev].heads; g.sectors = xd_info[dev].sectors; g.cylinders = xd_info[dev].cylinders; - g.start = xd_struct[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(geometry, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: - if (!arg) return -EINVAL; - return put_user(xd_struct[MINOR(inode->i_rdev)].nr_sects,(long *) arg); case HDIO_SET_DMA: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (xdc_busy) return -EBUSY; nodma = !arg; if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); + xd_dma_mem_free((unsigned long)xd_dma_buffer, + xd_maxsectors * 0x200); xd_dma_buffer = 0; } return 0; @@ -357,6 +353,7 @@ return -EACCES; return xd_reread_partitions(inode->i_rdev); + case BLKGETSIZE: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -387,11 +384,9 @@ static int xd_reread_partitions(kdev_t dev) { int target; - int start; - int partition; + int res; target = DEVICE_NR(dev); - start = target << xd_gendisk.minor_shift; cli(); xd_valid[target] = (xd_access[target] != 1); @@ -399,20 +394,16 @@ if (xd_valid[target]) return -EBUSY; - for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - xd_gendisk.part[minor].start_sect = 0; - xd_gendisk.part[minor].nr_sects = 0; - }; - - grok_partitions(&xd_gendisk, target, 1<<6, - xd_info[target].heads * xd_info[target].cylinders * xd_info[target].sectors); + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, xd_info[target].heads + * xd_info[target].cylinders + * xd_info[target].sectors); xd_valid[target] = 1; wake_up(&xd_wait_open); - return 0; + return res; } /* xd_readwrite: handle a read/write request */ @@ -1112,18 +1103,9 @@ static void xd_done (void) { - struct gendisk ** gdp; - - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - blk_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - read_ahead[MAJOR_NR] = 0; - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &xd_gendisk) - break; - if (*gdp) - *gdp = (*gdp)->next; + del_gendisk(&xd_gendisk); + blk_clear(MAJOR_NR); release_region(xd_iobase,4); } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/cdrom.c linux/drivers/cdrom/cdrom.c --- /opt/kernel/linux-2.4.7/drivers/cdrom/cdrom.c Wed Jul 11 23:55:41 2001 +++ linux/drivers/cdrom/cdrom.c Wed Jan 1 00:07:23 1997 @@ -279,6 +279,8 @@ static int lockdoor = 1; /* will we ever get to use this... sigh. */ static int check_media_type; +/* protects various structures */ +static spinlock_t cdrom_lock = SPIN_LOCK_UNLOCKED; MODULE_PARM(debug, "i"); MODULE_PARM(autoclose, "i"); MODULE_PARM(autoeject, "i"); @@ -420,8 +422,10 @@ &cdrom_fops, NULL); } cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); + spin_lock(&cdrom_lock); cdi->next = topCdromPtr; topCdromPtr = cdi; + spin_unlock(&cdrom_lock); return 0; } #undef ENSURE @@ -429,13 +433,15 @@ int unregister_cdrom(struct cdrom_device_info *unreg) { struct cdrom_device_info *cdi, *prev; - int major = MAJOR(unreg->dev); + int major = MAJOR(unreg->dev), ret; cdinfo(CD_OPEN, "entering unregister_cdrom\n"); + ret = -1; if (major < 0 || major >= MAX_BLKDEV) - return -1; + goto out; + spin_lock(&cdrom_lock); prev = NULL; cdi = topCdromPtr; while (cdi != NULL && cdi->dev != unreg->dev) { @@ -443,27 +449,34 @@ cdi = cdi->next; } + ret = -2; if (cdi == NULL) - return -2; + goto out_unlock; + ret = 0; if (prev) prev->next = cdi->next; else topCdromPtr = cdi->next; cdi->ops->n_minors--; - devfs_unregister (cdi->de); - devfs_dealloc_unique_number (&cdrom_numspace, cdi->number); + devfs_unregister(cdi->de); + devfs_dealloc_unique_number(&cdrom_numspace, cdi->number); cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); - return 0; +out_unlock: + spin_unlock(&cdrom_lock); +out: + return ret; } struct cdrom_device_info *cdrom_find_device(kdev_t dev) { struct cdrom_device_info *cdi; + spin_lock(&cdrom_lock); cdi = topCdromPtr; while (cdi != NULL && cdi->dev != dev) cdi = cdi->next; + spin_unlock(&cdrom_lock); return cdi; } @@ -1926,7 +1939,11 @@ ret = cdi->ops->generic_packet(cdi, cgc); __copy_to_user(usense, cgc->sense, sizeof(*usense)); - if (!ret && cgc->data_direction == CGC_DATA_READ) + + /* + * copy data back regardless of package status + */ + if (cgc->data_direction == CGC_DATA_READ) __copy_to_user(ubuf, cgc->buffer, cgc->buflen); kfree(cgc->buffer); return ret; @@ -2502,6 +2519,7 @@ { struct cdrom_device_info *cdi; + spin_lock(&cdrom_lock); for (cdi = topCdromPtr; cdi != NULL; cdi = cdi->next) { if (autoclose && CDROM_CAN(CDC_CLOSE_TRAY)) cdi->options |= CDO_AUTO_CLOSE; @@ -2520,6 +2538,7 @@ else cdi->options &= ~CDO_CHECK_TYPE; } + spin_unlock(&cdrom_lock); } static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp, diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/cdu31a.c linux/drivers/cdrom/cdu31a.c --- /opt/kernel/linux-2.4.7/drivers/cdrom/cdu31a.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/cdrom/cdu31a.c Wed Jan 1 00:07:23 1997 @@ -1675,7 +1675,7 @@ restore_flags(flags); if (!QUEUE_EMPTY && CURRENT->rq_status != RQ_INACTIVE) { - end_request(0); + __end_request(CURRENT, 0); } restore_flags(flags); #if DEBUG @@ -1694,7 +1694,10 @@ /* Make sure we have a valid TOC. */ sony_get_toc(); - spin_unlock_irq(&io_request_lock); + /* yes lets release the lock and then much with the queue etc. I won't + * bother auditing this driver, it's decrepit and full of races anyway. + * /jens */ + spin_unlock_irq(&q->queue_lock); /* Make sure the timer is cancelled. */ del_timer(&cdu31a_abort_timer); @@ -1853,7 +1856,7 @@ } end_do_cdu31a_request: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); #if 0 /* After finished, cancel any pending operations. */ abort_read(); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c --- /opt/kernel/linux-2.4.7/drivers/cdrom/cm206.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/cdrom/cm206.c Wed Jan 1 00:07:23 1997 @@ -823,10 +823,10 @@ return; if (CURRENT->cmd != READ) { debug(("Non-read command %d on cdrom\n", CURRENT->cmd)); - end_request(0); + __end_request(CURRENT, 0); continue; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); error=0; for (i=0; inr_sectors; i++) { int e1, e2; @@ -849,8 +849,8 @@ debug(("cm206_request: %d %d\n", e1, e2)); } } - spin_lock_irq(&io_request_lock); - end_request(!error); + spin_lock_irq(&q->queue_lock); + __end_request(CURRENT, !error); } } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/cdrom/sbpcd.c linux/drivers/cdrom/sbpcd.c --- /opt/kernel/linux-2.4.7/drivers/cdrom/sbpcd.c Tue Jul 17 00:13:32 2001 +++ linux/drivers/cdrom/sbpcd.c Wed Jan 1 00:07:23 1997 @@ -4882,7 +4882,7 @@ #undef DEBUG_GTL static inline void sbpcd_end_request(struct request *req, int uptodate) { list_add(&req->queue, &req->q->queue_head); - end_request(uptodate); + __end_request(req, uptodate); } /*==========================================================================*/ /* @@ -4924,7 +4924,7 @@ sbpcd_end_request(req, 0); if (req -> sector == -1) sbpcd_end_request(req, 0); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down(&ioctl_read_sem); if (req->cmd != READ) @@ -4964,7 +4964,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5005,7 +5005,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5021,7 +5021,7 @@ #endif up(&ioctl_read_sem); sbp_sleep(0); /* wait a bit, try again */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 0); goto request_loop; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/char/raw.c linux/drivers/char/raw.c --- /opt/kernel/linux-2.4.7/drivers/char/raw.c Thu Jun 28 02:10:55 2001 +++ linux/drivers/char/raw.c Wed Jan 1 00:07:23 1997 @@ -134,10 +134,8 @@ if (is_mounted(rdev)) { if (blksize_size[MAJOR(rdev)]) sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)]; - } else { - if (hardsect_size[MAJOR(rdev)]) - sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)]; - } + } else + sector_size = get_hardsect_size(rdev); set_blocksize(rdev, sector_size); raw_devices[minor].sector_size = sector_size; @@ -282,16 +280,14 @@ struct kiobuf * iobuf; int new_iobuf; int err = 0; - unsigned long blocknr, blocks; + unsigned long blocks; size_t transferred; int iosize; - int i; int minor; kdev_t dev; unsigned long limit; - int sector_size, sector_bits, sector_mask; - int max_sectors; + sector_t blocknr; /* * First, a few checks on device size limits @@ -316,7 +312,6 @@ sector_size = raw_devices[minor].sector_size; sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; - max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); if (blk_size[MAJOR(dev)]) limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; @@ -334,18 +329,10 @@ if ((*offp >> sector_bits) >= limit) goto out_free; - /* - * Split the IO into KIO_MAX_SECTORS chunks, mapping and - * unmapping the single kiobuf as we go to perform each chunk of - * IO. - */ - transferred = 0; blocknr = *offp >> sector_bits; while (size > 0) { blocks = size >> sector_bits; - if (blocks > max_sectors) - blocks = max_sectors; if (blocks > limit - blocknr) blocks = limit - blocknr; if (!blocks) @@ -357,10 +344,7 @@ if (err) break; - for (i=0; i < blocks; i++) - iobuf->blocks[i] = blocknr++; - - err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size); if (rw == READ && err > 0) mark_dirty_kiobuf(iobuf, err); @@ -370,6 +354,8 @@ size -= err; buf += err; } + + blocknr += blocks; unmap_kiobuf(iobuf); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/i2o/i2o_block.c linux/drivers/i2o/i2o_block.c --- /opt/kernel/linux-2.4.7/drivers/i2o/i2o_block.c Fri Jul 20 05:48:39 2001 +++ linux/drivers/i2o/i2o_block.c Tue Jul 24 15:04:44 2001 @@ -111,15 +111,16 @@ #define I2O_BSA_DSC_VOLUME_CHANGED 0x000D #define I2O_BSA_DSC_TIMEOUT 0x000E +#define I2O_UNIT(dev) (i2ob_dev[MINOR((dev)) & 0xf0]) +#define I2O_LOCK(unit) (i2ob_dev[(unit)].req_queue->queue_lock) + /* * Some of these can be made smaller later */ static int i2ob_blksizes[MAX_I2OB<<4]; -static int i2ob_hardsizes[MAX_I2OB<<4]; -static int i2ob_sizes[MAX_I2OB<<4]; +static sector_t i2ob_sizes[MAX_I2OB<<4]; static int i2ob_media_change_flag[MAX_I2OB]; -static u32 i2ob_max_sectors[MAX_I2OB<<4]; static int i2ob_context; @@ -249,9 +250,9 @@ unsigned long mptr; u64 offset; struct request *req = ireq->req; - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; int count = req->nr_sectors<<9; - char *last = NULL; + unsigned long last = 0; unsigned short size = 0; // printk(KERN_INFO "i2ob_send called\n"); @@ -280,30 +281,30 @@ if(req->cmd == READ) { __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_bus(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x10000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x10000000|bio_size(bio), mptr); else - __raw_writel(0xD0000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD0000000|bio_size(bio), mptr); + __raw_writel(bio_to_bus(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_bus(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } /* * Heuristic for now since the block layer doesnt give @@ -319,30 +320,30 @@ else if(req->cmd == WRITE) { __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_bus(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x14000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x14000000|bio_size(bio), mptr); else - __raw_writel(0xD4000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD4000000|bio_size(bio), mptr); + __raw_writel(bio_to_bus(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_bus(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } if(c->battery) @@ -406,7 +407,8 @@ * unlocked. */ - while (end_that_request_first( req, !req->errors, "i2o block" )); + while (__end_that_request_first( req, !req->errors)) + ; /* * It is now ok to complete the request. @@ -414,61 +416,6 @@ end_that_request_last( req ); } -/* - * Request merging functions - */ -static inline int i2ob_new_segment(request_queue_t *q, struct request *req, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int i2ob_back_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_front_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (bh->b_data + bh->b_size == req->bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_merge_requests(request_queue_t *q, - struct request *req, - struct request *next, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - int total_segments = req->nr_segments + next->nr_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit) { unsigned long msg; @@ -526,10 +473,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; @@ -550,12 +497,12 @@ if(msg->function == I2O_CMD_BLOCK_CFLUSH) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); dev->constipated=0; DEBUG(("unconstipated\n")); if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -571,10 +518,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n"); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -620,7 +567,7 @@ */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); if(err==4) { /* @@ -665,7 +612,7 @@ */ i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * and out @@ -673,7 +620,7 @@ return; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, bsa_errors[m[4]&0XFFFF]); if(m[4]&0x00FF0000) @@ -688,8 +635,8 @@ * Dequeue the request. We use irqsave locks as one day we * may be running polled controllers from a BH... */ - - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); atomic_dec(&i2ob_queues[c->unit]->queue_depth); @@ -701,7 +648,7 @@ if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); } /* @@ -780,8 +727,7 @@ for(i = unit; i <= unit+15; i++) { i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } @@ -815,11 +761,11 @@ if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 ) i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(unit), flags); i2ob_sizes[unit] = (int)(size>>10); i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(unit), flags); break; } @@ -872,13 +818,14 @@ static void i2ob_timer_handler(unsigned long q) { + request_queue_t *req_queue = (request_queue_t *) q; unsigned long flags; /* * We cannot touch the request queue or the timer - * flag without holding the io_request_lock. + * flag without holding the queue_lock */ - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&req_queue->queue_lock,flags); /* * Clear the timer started flag so that @@ -889,12 +836,12 @@ /* * Restart any requests. */ - i2ob_request((request_queue_t*)q); + i2ob_request(req_queue); /* * Free the lock. */ - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&req_queue->queue_lock,flags); } static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev) @@ -1123,32 +1070,23 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct i2ob_device *dev; - int minor; - /* Anyone capable of this syscall can do *real bad* things */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!inode) + if (!inode || !inode->i_rdev) return -EINVAL; - minor = MINOR(inode->i_rdev); - if (minor >= (MAX_I2OB<<4)) - return -ENODEV; - dev = &i2ob_dev[minor]; switch (cmd) { - case BLKGETSIZE: - return put_user(i2ob[minor].nr_sects, (long *) arg); - case HDIO_GETGEO: { struct hd_geometry g; - int u=minor&0xF0; + int u = MINOR(inode->i_rdev) & 0xF0; i2o_block_biosparam(i2ob_sizes[u]<<1, &g.cylinders, &g.heads, &g.sectors); - g.start = i2ob[minor].start_sect; - return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0; + g.start = get_start_sect(inode->i_rdev); + return copy_to_user((void *)arg, &g, sizeof(g)) + ? -EFAULT : 0; } case BLKRRPART: @@ -1156,6 +1094,7 @@ return -EACCES; return do_i2ob_revalidate(inode->i_rdev,1); + case BLKGETSIZE: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -1345,8 +1284,6 @@ i2ob_query_device(dev, 0x0000, 5, &flags, 4); i2ob_query_device(dev, 0x0000, 6, &status, 4); i2ob_sizes[unit] = (int)(size>>10); - for(i=unit; i <= unit+15 ; i++) - i2ob_hardsizes[i] = blocksize; i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); @@ -1360,23 +1297,25 @@ for(i=unit;i<=unit+15;i++) { + request_queue_t *q = i2ob_dev[unit].req_queue; + if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy) { - i2ob_max_sectors[i] = 32; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 32); + blk_queue_max_sectors(q, 8); i2ob_dev[i].depth = 4; } else if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req) { - i2ob_max_sectors[i] = 8; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 8); + blk_queue_max_segments(q, 8); } else { /* MAX_SECTORS was used but 255 is a dumb number for striped RAID */ - i2ob_max_sectors[i]=256; - i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2; + blk_queue_max_sectors(q, 256); + blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2); } } @@ -1421,7 +1360,7 @@ } printk(".\n"); printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", - d->dev_name, i2ob_max_sectors[unit]); + d->dev_name, i2ob_dev[unit].req_queue->max_sectors); /* * If this is the first I2O block device found on this IOP, @@ -1441,7 +1380,7 @@ */ dev->req_queue = &i2ob_queues[c->unit]->req_queue; - grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9)); + grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9)); /* * Register for the events we're interested in and that the @@ -1484,9 +1423,6 @@ blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request); blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0); - i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge; - i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge; - i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests; i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit]; return 0; @@ -1497,11 +1433,11 @@ */ static request_queue_t* i2ob_get_queue(kdev_t dev) { - int unit = MINOR(dev)&0xF0; - - return i2ob_dev[unit].req_queue; + return I2O_UNIT(dev).req_queue; } + + /* * Probe the I2O subsytem for block class devices */ @@ -1699,7 +1635,7 @@ int i = 0; int flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); /* * Need to do this...we somtimes get two events from the IRTOS @@ -1721,7 +1657,7 @@ if(unit >= MAX_I2OB<<4) { printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n"); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -1734,12 +1670,11 @@ { i2ob_dev[i].i2odev = NULL; i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * Sync the device...this will force all outstanding I/Os @@ -1903,9 +1838,7 @@ */ blksize_size[MAJOR_NR] = i2ob_blksizes; - hardsect_size[MAJOR_NR] = i2ob_hardsizes; blk_size[MAJOR_NR] = i2ob_sizes; - max_sectors[MAJOR_NR] = i2ob_max_sectors; blk_dev[MAJOR_NR].queue = i2ob_get_queue; blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request); @@ -1921,7 +1854,6 @@ i2ob_dev[i].tail = NULL; i2ob_dev[i].depth = MAX_I2OB_DEPTH; i2ob_blksizes[i] = 1024; - i2ob_max_sectors[i] = 2; } /* @@ -1977,9 +1909,8 @@ /* * Adding i2ob_gendisk into the gendisk list. - */ - i2ob_gendisk.next = gendisk_head; - gendisk_head = &i2ob_gendisk; + */ + add_gendisk(&i2ob_gendisk); return 0; } @@ -1992,7 +1923,6 @@ void cleanup_module(void) { - struct gendisk *gdp; int i; if(evt_running) { @@ -2049,20 +1979,6 @@ */ blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - /* - * Why isnt register/unregister gendisk in the kernel ??? - */ - - if (gendisk_head == &i2ob_gendisk) { - gendisk_head = i2ob_gendisk.next; - } - else { - for (gdp = gendisk_head; gdp; gdp = gdp->next) - if (gdp->next == &i2ob_gendisk) - { - gdp->next = i2ob_gendisk.next; - break; - } - } + del_gendisk(&i2ob_gendisk); } #endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/i2o/i2o_core.c linux/drivers/i2o/i2o_core.c --- /opt/kernel/linux-2.4.7/drivers/i2o/i2o_core.c Wed May 2 01:10:37 2001 +++ linux/drivers/i2o/i2o_core.c Tue Jul 24 15:12:30 2001 @@ -124,6 +124,7 @@ * Function table to send to bus specific layers * See for explanation of this */ +#ifdef CONFIG_I2O_PCI_MODULE static struct i2o_core_func_table i2o_core_functions = { i2o_install_controller, @@ -134,7 +135,6 @@ i2o_delete_controller }; -#ifdef CONFIG_I2O_PCI_MODULE extern int i2o_pci_core_attach(struct i2o_core_func_table *); extern void i2o_pci_core_detach(void); #endif /* CONFIG_I2O_PCI_MODULE */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/aec62xx.c linux/drivers/ide/aec62xx.c --- /opt/kernel/linux-2.4.7/drivers/ide/aec62xx.c Tue Jun 20 16:52:36 2000 +++ linux/drivers/ide/aec62xx.c Wed Jan 1 00:07:23 1997 @@ -557,6 +557,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) hwif->dmaproc = &aec62xx_dmaproc; + hwif->highmem = 1; #else /* !CONFIG_BLK_DEV_IDEDMA */ hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c --- /opt/kernel/linux-2.4.7/drivers/ide/cmd64x.c Fri Jul 28 01:40:57 2000 +++ linux/drivers/ide/cmd64x.c Wed Jan 1 00:07:23 1997 @@ -795,5 +795,7 @@ default: break; } + + hwif->highmem = 1; #endif /* CONFIG_BLK_DEV_IDEDMA */ } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/cs5530.c linux/drivers/ide/cs5530.c --- /opt/kernel/linux-2.4.7/drivers/ide/cs5530.c Wed Jan 3 01:58:45 2001 +++ linux/drivers/ide/cs5530.c Wed Jan 1 00:07:23 1997 @@ -352,9 +352,10 @@ unsigned int basereg, d0_timings; #ifdef CONFIG_BLK_DEV_IDEDMA - hwif->dmaproc = &cs5530_dmaproc; + hwif->dmaproc = &cs5530_dmaproc; + hwif->highmem = 1; #else - hwif->autodma = 0; + hwif->autodma = 0; #endif /* CONFIG_BLK_DEV_IDEDMA */ hwif->tuneproc = &cs5530_tuneproc; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/cy82c693.c linux/drivers/ide/cy82c693.c --- /opt/kernel/linux-2.4.7/drivers/ide/cy82c693.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/cy82c693.c Wed Jan 1 00:07:23 1997 @@ -441,6 +441,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &cy82c693_dmaproc; if (!noautodma) hwif->autodma = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/hd.c linux/drivers/ide/hd.c --- /opt/kernel/linux-2.4.7/drivers/ide/hd.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/ide/hd.c Tue Jul 24 15:04:44 2001 @@ -107,7 +107,6 @@ static int hd_sizes[MAX_HD<<6]; static int hd_blocksizes[MAX_HD<<6]; static int hd_hardsectsizes[MAX_HD<<6]; -static int hd_maxsect[MAX_HD<<6]; static struct timer_list device_timer; @@ -560,19 +559,18 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; - if (dev >= (NR_HD<<6) || block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { -#ifdef DEBUG - if (dev >= (NR_HD<<6)) + if (dev >= (NR_HD<<6) || (dev & 0x3f) || + block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { + if (dev >= (NR_HD<<6) || (dev & 0x3f)) printk("hd: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else printk("hd%c: bad access: block=%d, count=%d\n", (MINOR(CURRENT->rq_dev)>>6)+'a', block, nsect); -#endif end_request(0); goto repeat; } - block += hd[dev].start_sect; + dev >>= 6; if (special_op[dev]) { if (do_special_op(dev)) @@ -634,20 +632,16 @@ g.heads = hd_info[dev].head; g.sectors = hd_info[dev].sect; g.cylinders = hd_info[dev].cyl; - g.start = hd[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - if (!arg) return -EINVAL; - return put_user(hd[MINOR(inode->i_rdev)].nr_sects, - (long *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return revalidate_hddisk(inode->i_rdev, 1); + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -734,11 +728,9 @@ for(drive=0; drive < (MAX_HD << 6); drive++) { hd_blocksizes[drive] = 1024; hd_hardsectsizes[drive] = 512; - hd_maxsect[drive]=255; } blksize_size[MAJOR_NR] = hd_blocksizes; hardsect_size[MAJOR_NR] = hd_hardsectsizes; - max_sectors[MAJOR_NR] = hd_maxsect; #ifdef __i386__ if (!NR_HD) { @@ -841,9 +833,9 @@ return -1; } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ - hd_gendisk.next = gendisk_head; - gendisk_head = &hd_gendisk; + add_gendisk(&hd_gendisk); init_timer(&device_timer); device_timer.function = hd_times_out; hd_geninit(); @@ -870,9 +862,7 @@ { int target; struct gendisk * gdev; - int max_p; - int start; - int i; + int res; long flags; target = DEVICE_NR(dev); @@ -887,25 +877,20 @@ DEVICE_BUSY = 1; restore_flags(flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; - - for (i=max_p - 1; i >=0 ; i--) { - int minor = start + i; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(gdev, target, 1<<6, CAPACITY); + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } static int parse_hd_setup (char *line) { diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c --- /opt/kernel/linux-2.4.7/drivers/ide/hpt34x.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/hpt34x.c Wed Jan 1 00:07:23 1997 @@ -425,6 +425,7 @@ hwif->autodma = 0; hwif->dmaproc = &hpt34x_dmaproc; + hwif->highmem = 1; } else { hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c --- /opt/kernel/linux-2.4.7/drivers/ide/hpt366.c Thu Jun 28 02:10:55 2001 +++ linux/drivers/ide/hpt366.c Wed Jan 1 00:07:23 1997 @@ -720,6 +720,7 @@ hwif->autodma = 1; else hwif->autodma = 0; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.c Fri Jul 20 06:04:55 2001 +++ linux/drivers/ide/ide-cd.c Wed Jan 1 00:07:23 1997 @@ -926,7 +926,7 @@ /* If we're not done filling the current buffer, complain. Otherwise, complete the command normally. */ if (rq->current_nr_sectors > 0) { - printk ("%s: cdrom_read_intr: data underrun (%ld blocks)\n", + printk ("%s: cdrom_read_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); } else @@ -959,8 +959,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)), - sectors_to_transfer); + nskip = MIN(rq->current_nr_sectors - bio_sectors(rq->bio), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -978,8 +977,7 @@ /* If we've filled the present buffer but there's another chained buffer after it, move on. */ - if (rq->current_nr_sectors == 0 && - rq->nr_sectors > 0) + if (rq->current_nr_sectors == 0 && rq->nr_sectors) cdrom_end_request (1, drive); /* If the buffers are full, cache the rest of the data in our @@ -1059,7 +1057,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors < bio_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, rq->sector); @@ -1098,9 +1096,9 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors != bio_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { - printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n", + printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); return ide_stopped; @@ -1197,13 +1195,15 @@ start it over entirely, or even put it back on the request queue. */ static void restore_request (struct request *rq) { - if (rq->buffer != rq->bh->b_data) { - int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE; - rq->buffer = rq->bh->b_data; + if (rq->buffer != bio_data(rq->bio)) { + int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE; + rq->buffer = bio_data(rq->bio); rq->nr_sectors += n; rq->sector -= n; } - rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS; + rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); + rq->hard_nr_sectors = rq->nr_sectors; + rq->hard_sector = rq->sector; } /* @@ -1217,20 +1217,22 @@ /* If the request is relative to a partition, fix it up to refer to the absolute address. */ - if ((minor & PARTN_MASK) != 0) { + if (minor & PARTN_MASK) { rq->sector = block; minor &= ~PARTN_MASK; - rq->rq_dev = MKDEV (MAJOR(rq->rq_dev), minor); + rq->rq_dev = MKDEV(MAJOR(rq->rq_dev), minor); } /* We may be retrying this request after an error. Fix up any weirdness which might be present in the request packet. */ - restore_request (rq); + restore_request(rq); /* Satisfy whatever we can of this request from our cached sector. */ if (cdrom_read_from_buffer(drive)) return ide_stopped; + blk_attempt_remerge(&drive->queue, rq); + /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1478,7 +1480,7 @@ static ide_startstop_t cdrom_write_intr(ide_drive_t *drive) { - int stat, ireason, len, sectors_to_transfer; + int stat, ireason, len, sectors_to_transfer, uptodate; struct cdrom_info *info = drive->driver_data; int i, dma_error = 0, dma = info->dma; ide_startstop_t startstop; @@ -1499,6 +1501,9 @@ return startstop; } + /* + * using dma, transfer is complete now + */ if (dma) { if (dma_error) return ide_error(drive, "dma error", stat); @@ -1520,12 +1525,13 @@ /* If we're not done writing, complain. * Otherwise, complete the command normally. */ + uptodate = 1; if (rq->current_nr_sectors > 0) { - printk("%s: write_intr: data underrun (%ld blocks)\n", - drive->name, rq->current_nr_sectors); - cdrom_end_request(0, drive); - } else - cdrom_end_request(1, drive); + printk("%s: write_intr: data underrun (%u blocks)\n", + drive->name, rq->current_nr_sectors); + uptodate = 0; + } + cdrom_end_request(uptodate, drive); return ide_stopped; } @@ -1534,26 +1540,42 @@ if (cdrom_write_check_ireason(drive, len, ireason)) return ide_stopped; - /* The number of sectors we need to read from the drive. */ sectors_to_transfer = len / SECTOR_SIZE; - /* Now loop while we still have data to read from the drive. DMA - * transfers will already have been complete + /* + * now loop and write out the data */ while (sectors_to_transfer > 0) { - /* If we've filled the present buffer but there's another - chained buffer after it, move on. */ - if (rq->current_nr_sectors == 0 && rq->nr_sectors > 0) - cdrom_end_request(1, drive); + int this_transfer; + + if (!rq->current_nr_sectors) { + printk("ide-cd: write_intr: oops\n"); + break; + } + + /* + * Figure out how many sectors we can transfer + */ + this_transfer = MIN(sectors_to_transfer,rq->current_nr_sectors); - atapi_output_bytes(drive, rq->buffer, rq->current_nr_sectors); - rq->nr_sectors -= rq->current_nr_sectors; - rq->current_nr_sectors = 0; - rq->sector += rq->current_nr_sectors; - sectors_to_transfer -= rq->current_nr_sectors; + while (this_transfer > 0) { + atapi_output_bytes(drive, rq->buffer, SECTOR_SIZE); + rq->buffer += SECTOR_SIZE; + --rq->nr_sectors; + --rq->current_nr_sectors; + ++rq->sector; + --this_transfer; + --sectors_to_transfer; + } + + /* + * current buffer complete, move on + */ + if (rq->current_nr_sectors == 0 && rq->nr_sectors) + cdrom_end_request (1, drive); } - /* arm handler */ + /* re-arm handler */ ide_set_handler(drive, &cdrom_write_intr, 5 * WAIT_CMD, NULL); return ide_started; } @@ -1584,10 +1606,26 @@ return cdrom_transfer_packet_command(drive, &pc, cdrom_write_intr); } -static ide_startstop_t cdrom_start_write(ide_drive_t *drive) +static ide_startstop_t cdrom_start_write(ide_drive_t *drive, struct request *rq) { struct cdrom_info *info = drive->driver_data; + /* + * writes *must* be 2kB frame aligned + */ + if ((rq->nr_sectors & 3) || (rq->sector & 3)) { + cdrom_end_request(0, drive); + return ide_stopped; + } + + /* + * for dvd-ram and such media, it's a really big deal to get + * big writes all the time. so scour the queue and attempt to + * remerge requests, often the plugging will not have had time + * to do this properly + */ + blk_attempt_remerge(&drive->queue, rq); + info->nsectors_buffered = 0; /* use dma, if possible. we don't need to check more, since we @@ -1630,7 +1668,7 @@ if (rq->cmd == READ) action = cdrom_start_read(drive, block); else - action = cdrom_start_write(drive); + action = cdrom_start_write(drive, rq); } info->last_block = block; return action; @@ -1833,6 +1871,7 @@ pc.buffer = buf; pc.buflen = buflen; + pc.quiet = 1; pc.c[0] = GPCMD_READ_TOC_PMA_ATIP; pc.c[6] = trackno; pc.c[7] = (buflen >> 8); @@ -2113,7 +2152,9 @@ pc.quiet = cgc->quiet; pc.timeout = cgc->timeout; pc.sense = cgc->sense; - return cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->buflen -= pc.buflen; + return cgc->stat; } static @@ -2622,7 +2663,6 @@ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "dsc_overlap", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->dsc_overlap, NULL); } @@ -2827,7 +2867,12 @@ drive->part[0].nr_sects = toc->capacity * SECTORS_PER_FRAME; HWIF(drive)->gd->sizes[minor] = toc->capacity * BLOCKS_PER_FRAME; + /* + * reset block size, ide_revalidate_disk incorrectly sets it to + * 1024 even for CDROM's + */ blk_size[HWIF(drive)->major] = HWIF(drive)->gd->sizes; + set_blocksize(MKDEV(HWIF(drive)->major, minor), CD_FRAMESIZE); } static diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h --- /opt/kernel/linux-2.4.7/drivers/ide/ide-cd.h Fri Jul 20 21:54:49 2001 +++ linux/drivers/ide/ide-cd.h Wed Jan 1 00:07:23 1997 @@ -37,11 +37,12 @@ /************************************************************************/ -#define SECTOR_SIZE 512 #define SECTOR_BITS 9 -#define SECTORS_PER_FRAME (CD_FRAMESIZE / SECTOR_SIZE) +#define SECTOR_SIZE (1 << SECTOR_BITS) +#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS) #define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32) -#define SECTORS_BUFFER (SECTOR_BUFFER_SIZE / SECTOR_SIZE) +#define SECTORS_BUFFER (SECTOR_BUFFER_SIZE >> SECTOR_BITS) +#define SECTORS_MAX (131072 >> SECTOR_BITS) #define BLOCKS_PER_FRAME (CD_FRAMESIZE / BLOCK_SIZE) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide-disk.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/ide/ide-disk.c Wed Jan 1 00:07:23 1997 @@ -27,6 +27,7 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 Highmem I/O support, Jens Axboe */ #define IDEDISK_VERSION "1.10" @@ -140,6 +141,7 @@ int i; unsigned int msect, nsect; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -150,8 +152,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -160,14 +162,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -201,14 +204,15 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + char *to = ide_map_buffer(rq); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -238,28 +242,27 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; /* Do we move to the next bh after this? */ if (!rq->current_nr_sectors) { - struct buffer_head *bh = rq->bh->b_reqnext; + struct bio *bio = rq->bio->bi_next; /* end early early we ran out of requests */ - if (!bh) { + if (!bio) { mcount = 0; } else { - rq->bh = bh; - rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->bio = bio; + rq->current_nr_sectors = bio_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -268,6 +271,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer); } while (mcount); return 0; @@ -367,6 +371,8 @@ */ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { + char *buffer; + if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG); @@ -444,15 +450,17 @@ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; } } else { ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + buffer = ide_map_buffer(rq); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer); } return ide_started; } @@ -481,7 +489,8 @@ { if (drive->removable && !drive->usage) { invalidate_buffers(inode->i_rdev); - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && + ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) drive->doorlocking = 0; } MOD_DEC_USE_COUNT; @@ -494,9 +503,7 @@ static void idedisk_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&DRIVE_LOCK(drive)); return 0; } @@ -690,7 +697,6 @@ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide-dma.c Mon Jan 15 22:08:15 2001 +++ linux/drivers/ide/ide-dma.c Wed Jan 1 00:07:23 1997 @@ -168,25 +168,6 @@ #endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ /* - * Our Physical Region Descriptor (PRD) table should be large enough - * to handle the biggest I/O request we are likely to see. Since requests - * can have no more than 256 sectors, and since the typical blocksize is - * two or more sectors, we could get by with a limit of 128 entries here for - * the usual worst case. Most requests seem to include some contiguous blocks, - * further reducing the number of table entries required. - * - * The driver reverts to PIO mode for individual requests that exceed - * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling - * 100% of all crazy scenarios here is not necessary. - * - * As it turns out though, we must allocate a full 4KB page for this, - * so the two PRD tables (ide0 & ide1) will each get half of that, - * allowing each to have about 256 entries (8 bytes each) from this. - */ -#define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) - -/* * dma_intr() is the handler for disk read/write DMA interrupts */ ide_startstop_t ide_dma_intr (ide_drive_t *drive) @@ -213,34 +194,42 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) { - struct buffer_head *bh; - struct scatterlist *sg = hwif->sg_table; + struct sg_list *sg = hwif->sg_table; + struct bio *bio = rq->bio; + unsigned long lastdataend; int nents = 0; if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; - bh = rq->bh; - do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; - if (nents >= PRD_ENTRIES) - return 0; - - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; + bio = rq->bio; + lastdataend = 0; + do { + /* + * continue segment from before? + */ + if (bio_to_bus(bio) == lastdataend) { + sg[nents - 1].length += bio_size(bio); + lastdataend += bio_size(bio); + } else { + /* + * start new segment + */ + if (nents >= PRD_ENTRIES) + BUG(); + + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].page = bio_page(bio); + sg[nents].length = bio_size(bio); + sg[nents].offset = bio_offset(bio); + lastdataend = bio_to_bus(bio) + bio_size(bio); + nents++; } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; - nents++; - } while (bh != NULL); + } while ((bio = bio->bi_next) != NULL); - return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); + return pci_map_sgl(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } /* @@ -258,7 +247,7 @@ #endif unsigned int count = 0; int i; - struct scatterlist *sg; + struct sg_list *sg; HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); @@ -266,7 +255,7 @@ return 0; sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + while (i) { u32 cur_addr; u32 cur_len; @@ -280,26 +269,20 @@ */ while (cur_len) { - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - pci_unmap_sg(HWIF(drive)->pci_dev, - HWIF(drive)->sg_table, - HWIF(drive)->sg_nents, - HWIF(drive)->sg_dma_direction); - return 0; /* revert to PIO for this request */ - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); - - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; - } + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + + if (count++ >= PRD_ENTRIES) + BUG(); + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; @@ -318,10 +301,10 @@ void ide_destroy_dmatable (ide_drive_t *drive) { struct pci_dev *dev = HWIF(drive)->pci_dev; - struct scatterlist *sg = HWIF(drive)->sg_table; + struct sg_list *sg = HWIF(drive)->sg_table; int nents = HWIF(drive)->sg_nents; - pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction); + pci_unmap_sgl(dev, sg, nents, HWIF(drive)->sg_dma_direction); } /* @@ -450,6 +433,22 @@ return 0; } +#ifdef CONFIG_HIGHMEM +static inline void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + unsigned long addr = BLK_BOUNCE_HIGH; + + if (on && drive->media == ide_disk && HWIF(drive)->highmem) { + printk("%s: enabling highmem I/O\n", drive->name); + addr = BLK_BOUNCE_4G; + } + + blk_queue_bounce_limit(&drive->queue, addr); +} +#else +#define ide_toggle_bounce(drive, on) +#endif + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -471,15 +470,17 @@ ide_hwif_t *hwif = HWIF(drive); unsigned long dma_base = hwif->dma_base; byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); + set_high = 0; case ide_dma_off_quietly: outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: + ide_toggle_bounce(drive, set_high); drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide-floppy.c Thu Jun 14 23:16:58 2001 +++ linux/drivers/ide/ide-floppy.c Wed Jan 1 00:07:23 1997 @@ -1380,9 +1380,7 @@ */ static void idefloppy_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); } @@ -1555,10 +1552,7 @@ */ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - } + blk_queue_max_sectors(&drive->queue, 64); (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide-probe.c Sun Mar 18 18:25:02 2001 +++ linux/drivers/ide/ide-probe.c Wed Jan 1 00:10:32 1997 @@ -594,9 +594,21 @@ static void ide_init_queue(ide_drive_t *drive) { request_queue_t *q = &drive->queue; + int max_sectors; q->queuedata = HWGROUP(drive); blk_init_queue(q, do_ide_request); + + /* IDE can do up to 128K per request, pdc4030 needs smaller limit */ +#ifdef CONFIG_BLK_DEV_PDC4030 + max_sectors = 127; +#else + max_sectors = 255; +#endif + blk_queue_max_sectors(q, max_sectors); + + /* IDE DMA can do PRD_ENTRIES number of segments */ + q->max_segments = PRD_ENTRIES; } /* @@ -670,7 +682,7 @@ hwgroup->rq = NULL; hwgroup->handler = NULL; hwgroup->drive = NULL; - hwgroup->busy = 0; + hwgroup->flags = 0; init_timer(&hwgroup->timer); hwgroup->timer.function = &ide_timer_expiry; hwgroup->timer.data = (unsigned long) hwgroup; @@ -700,6 +712,13 @@ hwif->next = hwgroup->hwif->next; hwgroup->hwif->next = hwif; + if (!hwgroup->hwif) { + hwgroup->hwif = HWIF(hwgroup->drive); +#ifdef DEBUG + printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name); +#endif + } + restore_flags(flags); /* all CPUs; safe now that hwif->hwgroup is set up */ for (index = 0; index < MAX_DRIVES; ++index) { ide_drive_t *drive = &hwif->drives[index]; if (!drive->present) @@ -710,13 +729,6 @@ hwgroup->drive->next = drive; ide_init_queue(drive); } - if (!hwgroup->hwif) { - hwgroup->hwif = HWIF(hwgroup->drive); -#ifdef DEBUG - printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name); -#endif - } - restore_flags(flags); /* all CPUs; safe now that hwif->hwgroup is set up */ #if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__) printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name, @@ -747,9 +759,9 @@ */ static void init_gendisk (ide_hwif_t *hwif) { - struct gendisk *gd, **gdp; + struct gendisk *gd; unsigned int unit, units, minors; - int *bs, *max_sect, *max_ra; + int *bs, *max_ra; extern devfs_handle_t ide_devfs_handle; /* figure out maximum drive number on the interface */ @@ -762,23 +774,15 @@ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); memset(gd->part, 0, minors * sizeof(struct hd_struct)); /* cdroms and msdos f/s are examples of non-1024 blocksizes */ blksize_size[hwif->major] = bs; - max_sectors[hwif->major] = max_sect; max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif *max_ra++ = MAX_READAHEAD; } @@ -800,8 +804,8 @@ if (gd->flags) memset (gd->flags, 0, sizeof *gd->flags * units); - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ; - hwif->gd = *gdp = gd; /* link onto tail of list */ + hwif->gd = gd; + add_gendisk(gd); for (unit = 0; unit < units; ++unit) { if (hwif->drives[unit].present) { @@ -870,13 +874,6 @@ read_ahead[hwif->major] = 8; /* (4kB) */ hwif->present = 1; /* success */ -#if (DEBUG_SPINLOCK > 0) -{ - static int done = 0; - if (!done++) - printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */ -} -#endif return hwif->present; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide-proc.c linux/drivers/ide/ide-proc.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide-proc.c Thu Oct 26 23:11:39 2000 +++ linux/drivers/ide/ide-proc.c Wed Jan 1 00:07:23 1997 @@ -190,7 +190,7 @@ if (hwif->mate && hwif->mate->hwgroup) mategroup = (ide_hwgroup_t *)(hwif->mate->hwgroup); cli(); /* all CPUs; ensure all writes are done together */ - while (mygroup->busy || (mategroup && mategroup->busy)) { + while (test_bit(IDE_BUSY, &mygroup->flags) || (mategroup && test_bit(IDE_BUSY, &mategroup->flags))) { sti(); /* all CPUs */ if (0 < (signed long)(jiffies - timeout)) { printk("/proc/ide/%s/config: channel(s) busy, cannot write\n", hwif->name); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/kernel/linux-2.4.7/drivers/ide/ide.c Fri Jul 20 06:02:28 2001 +++ linux/drivers/ide/ide.c Tue Jul 24 15:04:44 2001 @@ -113,6 +113,8 @@ * Version 6.31 Debug Share INTR's and request queue streaming * Native ATA-100 support * Prep for Cascades Project + * Version 6.32 4GB highmem support for DMA, and mapping of those for + * PIO transfer (Jens Axboe) * * Some additional driver compile-time options are in ./include/linux/ide.h * @@ -121,8 +123,8 @@ * */ -#define REVISION "Revision: 6.31" -#define VERSION "Id: ide.c 6.31 2000/06/09" +#define REVISION "Revision: 6.32" +#define VERSION "Id: ide.c 6.32 2001/05/24" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -167,6 +169,7 @@ static int idebus_parameter; /* holds the "idebus=" parameter */ static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ static int initializing; /* set while initializing built-in drivers */ +spinlock_t ide_lock = SPIN_LOCK_UNLOCKED; #ifdef CONFIG_BLK_DEV_IDEPCI static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */ @@ -512,17 +515,25 @@ { struct request *rq; unsigned long flags; + ide_drive_t *drive = hwgroup->drive; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); rq = hwgroup->rq; - if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { + /* + * decide whether to reenable DMA -- 3 is a random magic for now, + * if we DMA timeout more than 3 times, just stay in PIO + */ + if (drive->state == DMA_PIO_RETRY && drive->retry_pio < 3) + hwgroup->hwif->dmaproc(ide_dma_on, drive); + + if (!__end_that_request_first(rq, uptodate)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); blkdev_dequeue_request(rq); hwgroup->rq = NULL; end_that_request_last(rq); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -538,7 +549,7 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", drive->name, hwgroup->handler, handler); @@ -547,7 +558,7 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -794,9 +805,9 @@ unsigned long flags; struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); rq = HWGROUP(drive)->rq; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); if (rq->cmd == IDE_DRIVE_CMD) { byte *args = (byte *) rq->buffer; @@ -819,11 +830,11 @@ args[6] = IN_BYTE(IDE_SELECT_REG); } } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; end_that_request_last(rq); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -1137,8 +1148,8 @@ static ide_startstop_t start_request (ide_drive_t *drive) { ide_startstop_t startstop; - unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); + unsigned long block; + struct request *rq = HWGROUP(drive)->rq; unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); @@ -1156,16 +1167,11 @@ } #endif block = rq->sector; - blockend = block + rq->nr_sectors; + /* Strange disk manager remap */ if ((rq->cmd == READ || rq->cmd == WRITE) && (drive->media == ide_disk || drive->media == ide_floppy)) { - if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { - printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name, - (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors); - goto kill_rq; - } - block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0; + block += drive->sect0; } /* Yecch - this will shift the entire interval, possibly killing some innocent following sector */ @@ -1177,7 +1183,8 @@ #endif SELECT_DRIVE(hwif, drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, + BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk("%s: drive not ready for command\n", drive->name); return startstop; } @@ -1188,7 +1195,8 @@ if (drive->driver != NULL) { return (DRIVER(drive)->do_request(drive, rq, block)); } - printk("%s: media type %d not supported\n", drive->name, drive->media); + printk("%s: media type %d not supported\n", + drive->name, drive->media); goto kill_rq; } return do_special(drive); @@ -1229,7 +1237,7 @@ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - if( !drive->queue.plugged ) + if (!blk_queue_plugged(&drive->queue)) best = drive; } } @@ -1258,7 +1266,7 @@ /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * Caller must have already done spin_lock_irqsave(&DRIVE_LOCK(drive), ...) * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -1270,26 +1278,21 @@ * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses the single global io_request_lock spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses the queue spinlock to protect access to the request + * queues. * * The first thread into the driver for a particular hwgroup sets the - * hwgroup->busy flag to indicate that this hwgroup is now active, + * hwgroup->flags IDE_BUSY flag to indicate that this hwgroup is now active, * and then initiates processing of the top request from the request queue. * * Other threads attempting entry notice the busy setting, and will simply - * queue their new requests and exit immediately. Note that hwgroup->busy - * remains set even when the driver is merely awaiting the next interrupt. + * queue their new requests and exit immediately. Note that hwgroup->flags + * remains busy even when the driver is merely awaiting the next interrupt. * Thus, the meaning is "this hwgroup is busy processing a request". * * When processing of a request completes, the completing thread or IRQ-handler * will start the next request from the queue. If no more work remains, - * the driver will clear the hwgroup->busy flag and exit. - * - * The io_request_lock (spinlock) is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * the driver will clear the hwgroup->flags IDE_BUSY flag and exit. */ static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq) { @@ -1301,8 +1304,7 @@ __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */ - while (!hwgroup->busy) { - hwgroup->busy = 1; + while (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) { drive = choose_drive(hwgroup); if (drive == NULL) { unsigned long sleep = 0; @@ -1325,13 +1327,13 @@ if (timer_pending(&hwgroup->timer)) printk("ide_set_handler: timer already active\n"); #endif - hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */ + set_bit(IDE_SLEEP, &hwgroup->flags); mod_timer(&hwgroup->timer, sleep); - /* we purposely leave hwgroup->busy==1 while sleeping */ + /* we purposely leave hwgroup busy while sleeping */ } else { /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */ ide_release_lock(&ide_lock); /* for atari only */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } return; /* no more work for this hwgroup (for now) */ } @@ -1345,9 +1347,14 @@ drive->sleep = 0; drive->service_start = jiffies; - if ( drive->queue.plugged ) /* paranoia */ + if (blk_queue_plugged(&drive->queue)) /* paranoia */ printk("%s: Huh? nuking plugged queue\n", drive->name); - hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + + /* + * just continuing an interrupted request maybe + */ + hwgroup->rq = elv_next_request(&drive->queue); + /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1358,14 +1365,14 @@ */ if (masked_irq && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&io_request_lock); + spin_unlock(&DRIVE_LOCK(drive)); ide__sti(); /* allow other IRQs while we start this request */ startstop = start_request(drive); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); if (masked_irq && hwif->irq != masked_irq) enable_irq(hwif->irq); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } @@ -1388,6 +1395,50 @@ } /* + * un-busy the hwgroup etc, and clear any pending DMA status. we want to + * retry the current request in pio mode instead of risking tossing it + * all away + */ +void ide_dma_timeout_retry(ide_drive_t *drive) +{ + ide_hwif_t *hwif = HWIF(drive); + struct request *rq; + + /* + * end current dma transaction + */ + (void) hwif->dmaproc(ide_dma_end, drive); + + /* + * complain a little, later we might remove some of this verbosity + */ + printk("%s: timeout waiting for DMA\n", drive->name); + (void) hwif->dmaproc(ide_dma_timeout, drive); + + /* + * disable dma for now, but remember that we did so because of + * a timeout -- we'll reenable after we finish this next request + * (or rather the first chunk of it) in pio. + */ + drive->retry_pio++; + drive->state = DMA_PIO_RETRY; + (void) hwif->dmaproc(ide_dma_off_quietly, drive); + + /* + * un-busy drive etc (hwgroup is un-busy'ed on return) and + * make sure request is sane + */ + rq = HWGROUP(drive)->rq; + HWGROUP(drive)->rq = NULL; + + rq->errors = 0; + rq->sector = rq->bio->bi_sector; + rq->current_nr_sectors = bio_sectors(rq->bio); + //rq->buffer = rq->bh->b_data; +} + + +/* * ide_timer_expiry() is our timeout function for all drive operations. * But note that it can also be invoked as a result of a "sleep" operation * triggered by the mod_timer() call in ide_do_request. @@ -1400,7 +1451,11 @@ unsigned long flags; unsigned long wait; - spin_lock_irqsave(&io_request_lock, flags); + /* + * a global lock protects timers etc -- shouldn't get contention + * worth mentioning + */ + spin_lock_irqsave(&ide_lock, flags); del_timer(&hwgroup->timer); if ((handler = hwgroup->handler) == NULL) { @@ -1410,10 +1465,8 @@ * or we were "sleeping" to give other devices a chance. * Either way, we don't really want to complain about anything. */ - if (hwgroup->sleeping) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; - } + if (test_and_clear_bit(IDE_SLEEP, &hwgroup->flags)) + clear_bit(IDE_BUSY, &hwgroup->flags); } else { ide_drive_t *drive = hwgroup->drive; if (!drive) { @@ -1422,17 +1475,16 @@ } else { ide_hwif_t *hwif; ide_startstop_t startstop; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_timer_expiry: hwgroup was not busy??\n", drive->name); if ((expiry = hwgroup->expiry) != NULL) { /* continue */ if ((wait = expiry(drive)) != 0) { /* reset timer */ hwgroup->timer.expires = jiffies + wait; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } } @@ -1442,7 +1494,7 @@ * the handler() function, which means we need to globally * mask the specific IRQ: */ - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); hwif = HWIF(drive); #if DISABLE_IRQ_NOSYNC disable_irq_nosync(hwif->irq); @@ -1460,22 +1512,23 @@ startstop = handler(drive); } else { if (drive->waiting_for_dma) { - (void) hwgroup->hwif->dmaproc(ide_dma_end, drive); - printk("%s: timeout waiting for DMA\n", drive->name); - (void) hwgroup->hwif->dmaproc(ide_dma_timeout, drive); - } - startstop = ide_error(drive, "irq timeout", GET_STAT()); + startstop = ide_stopped; + ide_dma_timeout_retry(drive); + } else + startstop = ide_error(drive, "irq timeout", GET_STAT()); } set_recovery_timer(hwif); drive->service_time = jiffies - drive->service_start; enable_irq(hwif->irq); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } + spin_unlock_irqrestore(&ide_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(hwgroup->drive), flags); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(hwgroup->drive), flags); } /* @@ -1538,11 +1591,11 @@ ide_handler_t *handler; ide_startstop_t startstop; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwif = hwgroup->hwif; if (!ide_ack_intr(hwif)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } @@ -1576,7 +1629,7 @@ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); #endif /* CONFIG_BLK_DEV_IDEPCI */ } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } drive = hwgroup->drive; @@ -1584,7 +1637,7 @@ /* * This should NEVER happen, and there isn't much we could do about it here. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } if (!drive_is_ready(drive)) { @@ -1594,21 +1647,20 @@ * the IRQ before their status register is up to date. Hopefully we have * enough advance overhead that the latter isn't a problem. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_intr: hwgroup was not busy??\n", drive->name); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); if (drive->unmask) ide__sti(); /* local CPU only */ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); /* * Note that handler() may have set things up for another @@ -1621,13 +1673,13 @@ drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); ide_do_request(hwgroup, hwif->irq); } else { printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -1637,9 +1689,6 @@ ide_drive_t *get_info_ptr (kdev_t i_rdev) { int major = MAJOR(i_rdev); -#if 0 - int minor = MINOR(i_rdev) & PARTN_MASK; -#endif unsigned int h; for (h = 0; h < MAX_HWIFS; ++h) { @@ -1648,11 +1697,7 @@ unsigned unit = DEVICE_NR(i_rdev); if (unit < MAX_DRIVES) { ide_drive_t *drive = &hwif->drives[unit]; -#if 0 - if ((drive->present) && (drive->part[minor].nr_sects)) -#else if (drive->present) -#endif return drive; } break; @@ -1712,7 +1757,7 @@ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<waiting = &wait; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); if (list_empty(queue_head) || action == ide_preempt) { if (action == ide_preempt) hwgroup->rq = NULL; @@ -1722,9 +1767,9 @@ } else queue_head = queue_head->next; } - list_add(&rq->queue, queue_head); + list_add(&rq->queuelist, queue_head); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); if (action == ide_wait) { wait_for_completion(&wait); /* wait for it to be serviced */ return rq->errors ? -EIO : 0; /* return -EIO if errors */ @@ -1733,6 +1778,16 @@ } +/* Common for ide-floppy.c and ide-disk.c */ +void ide_revalidate_drive (ide_drive_t *drive) +{ + struct gendisk *g = HWIF(drive)->gd; + int minor = (drive->select.b.unit << g->minor_shift); + kdev_t dev = MKDEV(g->major, minor); + + grok_partitions(dev, current_capacity(drive)); +} + /* * This routine is called to flush all partitions and partition tables * for a changed disk, and then re-read the new partition table. @@ -1745,40 +1800,33 @@ { ide_drive_t *drive; ide_hwgroup_t *hwgroup; - unsigned int p, major, minor; - long flags; + unsigned long flags; + int res; if ((drive = get_info_ptr(i_rdev)) == NULL) return -ENODEV; - major = MAJOR(i_rdev); - minor = drive->select.b.unit << PARTN_BITS; hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); if (drive->busy || (drive->usage > 1)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); return -EBUSY; - }; + } drive->busy = 1; MOD_INC_USE_COUNT; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); - for (p = 0; p < (1<part[p].nr_sects > 0) { - kdev_t devp = MKDEV(major, minor+p); - invalidate_device(devp, 1); - set_blocksize(devp, 1024); - } - drive->part[p].start_sect = 0; - drive->part[p].nr_sects = 0; - }; + res = wipe_partitions(i_rdev); + if (res) + goto leave; if (DRIVER(drive)->revalidate) DRIVER(drive)->revalidate(drive); + leave: drive->busy = 0; wake_up(&drive->wqueue); MOD_DEC_USE_COUNT; - return 0; + return res; } static void revalidate_drives (void) @@ -1943,7 +1991,7 @@ void ide_unregister (unsigned int index) { - struct gendisk *gd, **gdp; + struct gendisk *gd; ide_drive_t *drive, *d; ide_hwif_t *hwif, *g; ide_hwgroup_t *hwgroup; @@ -2058,18 +2106,13 @@ */ unregister_blkdev(hwif->major, hwif->name); kfree(blksize_size[hwif->major]); - kfree(max_sectors[hwif->major]); kfree(max_readahead[hwif->major]); blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; - blksize_size[hwif->major] = NULL; - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == hwif->gd) - break; - if (*gdp == NULL) - printk("gd not in disk chain!\n"); - else { - gd = *gdp; *gdp = gd->next; + blk_clear(hwif->major); + gd = hwif->gd; + if (gd) { + del_gendisk(gd); kfree(gd->sizes); kfree(gd->part); if (gd->de_arr) @@ -2077,6 +2120,7 @@ if (gd->flags) kfree (gd->flags); kfree(gd); + hwif->gd = NULL; } old_hwif = *hwif; init_hwif_data (index); /* restore hwif data to pristine status */ @@ -2292,7 +2336,7 @@ unsigned long flags; if ((setting->rw & SETTING_READ)) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); switch(setting->data_type) { case TYPE_BYTE: val = *((u8 *) setting->data); @@ -2305,7 +2349,7 @@ val = *((u32 *) setting->data); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } return val; } @@ -2315,11 +2359,11 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long timeout = jiffies + (3 * HZ); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); - while (hwgroup->busy) { + while (test_bit(IDE_BUSY, &hwgroup->flags)) { unsigned long lflags; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&DRIVE_LOCK(drive)); __save_flags(lflags); /* local CPU only */ __sti(); /* local CPU only; needed for jiffies */ if (0 < (signed long)(jiffies - timeout)) { @@ -2328,7 +2372,7 @@ return -EBUSY; } __restore_flags(lflags); /* local CPU only */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); } return 0; } @@ -2369,7 +2413,7 @@ *p = val; break; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&DRIVE_LOCK(drive)); return 0; } @@ -2509,24 +2553,14 @@ { struct hd_geometry *loc = (struct hd_geometry *) arg; unsigned short bios_cyl = drive->bios_cyl; /* truncate */ - if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; - if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; - if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start)) return -EFAULT; - return 0; - } - - case HDIO_GETGEO_BIG: - { - struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; - if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; - if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; - if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start)) return -EFAULT; + if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) + return -EINVAL; + if (put_user(drive->bios_head, &loc->heads) || + put_user(drive->bios_sect, &loc->sectors) || + put_user(bios_cyl, &loc->cylinders) || + put_user(get_start_sect(inode->i_rdev), + &loc->start)) + return -EFAULT; return 0; } @@ -2542,9 +2576,6 @@ return 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (long *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return ide_revalidate_disk(inode->i_rdev); @@ -2662,6 +2693,7 @@ } return 0; + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -2669,6 +2701,8 @@ case BLKPG: case BLKELVGET: case BLKELVSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); default: diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/osb4.c linux/drivers/ide/osb4.c --- /opt/kernel/linux-2.4.7/drivers/ide/osb4.c Wed May 2 01:05:00 2001 +++ linux/drivers/ide/osb4.c Wed Jan 1 00:07:23 1997 @@ -450,6 +450,7 @@ if (!noautodma) hwif->autodma = 1; hwif->dmaproc = &osb4_dmaproc; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c --- /opt/kernel/linux-2.4.7/drivers/ide/pdc202xx.c Wed May 2 01:05:00 2001 +++ linux/drivers/ide/pdc202xx.c Wed Jan 1 00:07:23 1997 @@ -855,6 +855,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { hwif->dmaproc = &pdc202xx_dmaproc; + hwif->highmem = 1; if (!noautodma) hwif->autodma = 1; } else { diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/piix.c linux/drivers/ide/piix.c --- /opt/kernel/linux-2.4.7/drivers/ide/piix.c Mon Jul 16 01:22:23 2001 +++ linux/drivers/ide/piix.c Wed Jan 1 00:07:23 1997 @@ -512,6 +512,7 @@ if (!hwif->dma_base) return; + hwif->highmem = 1; #ifndef CONFIG_BLK_DEV_IDEDMA hwif->autodma = 0; #else /* CONFIG_BLK_DEV_IDEDMA */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/sis5513.c linux/drivers/ide/sis5513.c --- /opt/kernel/linux-2.4.7/drivers/ide/sis5513.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/sis5513.c Wed Jan 1 00:07:23 1997 @@ -631,6 +631,7 @@ case PCI_DEVICE_ID_SI_5591: if (!noautodma) hwif->autodma = 1; + hwif->highmem = 1; hwif->dmaproc = &sis5513_dmaproc; break; #endif /* CONFIG_BLK_DEV_IDEDMA */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/slc90e66.c linux/drivers/ide/slc90e66.c --- /opt/kernel/linux-2.4.7/drivers/ide/slc90e66.c Mon Jul 16 01:22:23 2001 +++ linux/drivers/ide/slc90e66.c Wed Jan 1 00:07:23 1997 @@ -373,6 +373,7 @@ return; hwif->autodma = 0; + hwif->highmem = 1; #ifdef CONFIG_BLK_DEV_IDEDMA if (!noautodma) hwif->autodma = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c --- /opt/kernel/linux-2.4.7/drivers/ide/via82cxxx.c Sat Feb 3 20:27:43 2001 +++ linux/drivers/ide/via82cxxx.c Wed Jan 1 00:07:23 1997 @@ -504,6 +504,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &via82cxxx_dmaproc; #ifdef CONFIG_IDEDMA_AUTO hwif->autodma = 1; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/md/lvm.c linux/drivers/md/lvm.c --- /opt/kernel/linux-2.4.7/drivers/md/lvm.c Thu Jul 12 01:35:37 2001 +++ linux/drivers/md/lvm.c Wed Jan 1 00:07:23 1997 @@ -394,8 +394,6 @@ */ int lvm_init(void) { - struct gendisk *gendisk_ptr = NULL; - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) { printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); return -EIO; @@ -415,27 +413,19 @@ lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); if (lvm_proc_dir != NULL) { - lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); + lvm_proc_vg_subdir = + create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, + lvm_proc_dir); pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); - if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; + if (pde != NULL) + pde->read_proc = &lvm_proc_get_global_info; } lvm_init_vars(); lvm_geninit(&lvm_gendisk); /* insert our gendisk at the corresponding major */ - if (gendisk_head != NULL) { - gendisk_ptr = gendisk_head; - while (gendisk_ptr->next != NULL && - gendisk_ptr->major > lvm_gendisk.major) { - gendisk_ptr = gendisk_ptr->next; - } - lvm_gendisk.next = gendisk_ptr->next; - gendisk_ptr->next = &lvm_gendisk; - } else { - gendisk_head = &lvm_gendisk; - lvm_gendisk.next = NULL; - } + add_gendisk(&lvm_gendisk); #ifdef LVM_HD_NAME /* reference from drivers/block/genhd.c */ @@ -447,7 +437,7 @@ /* optional read root VGDA */ /* - if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); + if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); */ printk(KERN_INFO @@ -469,8 +459,6 @@ */ static void lvm_cleanup(void) { - struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL; - devfs_unregister (lvm_devfs_handle); if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { @@ -480,21 +468,8 @@ printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name); } - - gendisk_ptr = gendisk_ptr_prev = gendisk_head; - while (gendisk_ptr != NULL) { - if (gendisk_ptr == &lvm_gendisk) - break; - gendisk_ptr_prev = gendisk_ptr; - gendisk_ptr = gendisk_ptr->next; - } - /* delete our gendisk from chain */ - if (gendisk_ptr == &lvm_gendisk) - gendisk_ptr_prev->next = gendisk_ptr->next; - - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + del_gendisk(&lvm_gendisk); + blk_clear(MAJOR_NR); remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); @@ -506,7 +481,6 @@ #endif printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); - return; } /* lvm_cleanup() */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/md/md.c linux/drivers/md/md.c --- /opt/kernel/linux-2.4.7/drivers/md/md.c Mon Jul 2 23:16:24 2001 +++ linux/drivers/md/md.c Tue Jul 24 15:02:55 2001 @@ -222,18 +222,6 @@ return mddev; } -struct gendisk * find_gendisk (kdev_t dev) -{ - struct gendisk *tmp = gendisk_head; - - while (tmp != NULL) { - if (tmp->major == MAJOR(dev)) - return (tmp); - tmp = tmp->next; - } - return (NULL); -} - mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) { mdk_rdev_t * rdev; @@ -281,7 +269,7 @@ /* * ok, add this new device name to the list */ - hd = find_gendisk (dev); + hd = get_gendisk (dev); dname->name = NULL; if (hd) dname->name = disk_name (hd, MINOR(dev), dname->namebuf); @@ -582,7 +570,7 @@ static kdev_t dev_unit(kdev_t dev) { unsigned int mask; - struct gendisk *hd = find_gendisk(dev); + struct gendisk *hd = get_gendisk(dev); if (!hd) return 0; @@ -2663,7 +2651,7 @@ (short *) &loc->cylinders); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[minor].start_sect, + err = md_put_user (get_start_sect(dev), (long *) &loc->start); goto done_unlock; } @@ -3537,13 +3525,13 @@ read_ahead[MAJOR_NR] = INT_MAX; - md_gendisk.next = gendisk_head; - gendisk_head = &md_gendisk; + add_gendisk(&md_gendisk); md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); if (!md_recovery_thread) - printk(KERN_ALERT "md: bug: couldn't allocate md_recovery_thread\n"); + printk(KERN_ALERT + "md: bug: couldn't allocate md_recovery_thread\n"); md_register_reboot_notifier(&md_notifier); raid_table_header = register_sysctl_table(raid_root_table, 1); @@ -3876,23 +3864,12 @@ #ifdef CONFIG_PROC_FS remove_proc_entry("mdstat", NULL); #endif - - gendisk_ptr = &gendisk_head; - while (*gendisk_ptr) { - if (*gendisk_ptr == &md_gendisk) { - *gendisk_ptr = md_gendisk.next; - break; - } - gendisk_ptr = & (*gendisk_ptr)->next; - } + + del_gendisk(&md_gendisk); blk_dev[MAJOR_NR].queue = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; - max_readahead[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); free_device_names(); - } #endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/mtd/ftl.c linux/drivers/mtd/ftl.c --- /opt/kernel/linux-2.4.7/drivers/mtd/ftl.c Tue Jun 12 19:30:27 2001 +++ linux/drivers/mtd/ftl.c Tue Jul 24 15:04:12 2001 @@ -1171,7 +1171,7 @@ put_user(1, (char *)&geo->heads); put_user(8, (char *)&geo->sectors); put_user((sect>>3), (short *)&geo->cylinders); - put_user(ftl_hd[minor].start_sect, (u_long *)&geo->start); + put_user(get_start_sect(inode->i_rdev), (u_long *)&geo->start); break; case BLKGETSIZE: ret = verify_area(VERIFY_WRITE, (long *)arg, sizeof(long)); @@ -1211,42 +1211,27 @@ ======================================================================*/ -static int ftl_reread_partitions(int minor) +static int ftl_reread_partitions(kdev_t dev) { + int minor = MINOR(dev); partition_t *part = myparts[minor >> 4]; - int i, whole; + int res; DEBUG(0, "ftl_cs: ftl_reread_partition(%d)\n", minor); if ((atomic_read(&part->open) > 1)) { return -EBUSY; } - whole = minor & ~(MAX_PART-1); - i = MAX_PART - 1; - while (i-- > 0) { - if (ftl_hd[whole+i].nr_sects > 0) { - kdev_t rdev = MKDEV(FTL_MAJOR, whole+i); - - invalidate_device(rdev, 1); - } - ftl_hd[whole+i].start_sect = 0; - ftl_hd[whole+i].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; scan_header(part); register_disk(&ftl_gendisk, whole >> PART_BITS, MAX_PART, &ftl_blk_fops, le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE); -#ifdef PCMCIA_DEBUG - for (i = 0; i < MAX_PART; i++) { - if (ftl_hd[whole+i].nr_sects > 0) - printk(KERN_INFO " %d: start %ld size %ld\n", i, - ftl_hd[whole+i].start_sect, - ftl_hd[whole+i].nr_sects); - } -#endif - return 0; + return res; } /*====================================================================== @@ -1428,8 +1413,7 @@ blksize_size[FTL_MAJOR] = ftl_blocksizes; ftl_gendisk.major = FTL_MAJOR; blk_init_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR), &do_ftl_request); - ftl_gendisk.next = gendisk_head; - gendisk_head = &ftl_gendisk; + add_gendisk(&ftl_gendisk, FTL_MAJOR); register_mtd_user(&ftl_notifier); @@ -1438,19 +1422,13 @@ mod_exit_t cleanup_ftl(void) { - struct gendisk *gd, **gdp; - unregister_mtd_user(&ftl_notifier); unregister_blkdev(FTL_MAJOR, "ftl"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR)); - blksize_size[FTL_MAJOR] = NULL; + bklk_clear(FTL_MAJOR); - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &ftl_gendisk) { - gd = *gdp; *gdp = gd->next; - break; - } + del_gendisk(&ftl_gendisk); } module_init(init_ftl); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/mtd/mtdblock.c linux/drivers/mtd/mtdblock.c --- /opt/kernel/linux-2.4.7/drivers/mtd/mtdblock.c Sat Apr 28 20:27:54 2001 +++ linux/drivers/mtd/mtdblock.c Wed Jan 1 00:07:23 1997 @@ -28,7 +28,7 @@ #if LINUX_VERSION_CODE < 0x20300 #define QUEUE_PLUGGED (blk_dev[MAJOR_NR].plug_tq.sync) #else -#define QUEUE_PLUGGED (blk_dev[MAJOR_NR].request_queue.plugged) +#define QUEUE_PLUGGED (blk_queue_plugged(QUEUE)) #endif #ifdef CONFIG_DEVFS_FS @@ -56,7 +56,7 @@ static spinlock_t mtdblks_lock; -static int mtd_sizes[MAX_MTD_DEVICES]; +static sector_t mtd_sizes[MAX_MTD_DEVICES]; static int mtd_blksizes[MAX_MTD_DEVICES]; @@ -392,7 +392,7 @@ /* * This is a special request_fn because it is executed in a process context - * to be able to sleep independently of the caller. The io_request_lock + * to be able to sleep independently of the caller. The queue_lock * is held upon entry and exit. * The head of our request queue is considered active so there is no need * to dequeue requests before we are done. @@ -406,7 +406,7 @@ for (;;) { INIT_REQUEST; req = CURRENT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); mtdblk = mtdblks[MINOR(req->rq_dev)]; res = 0; @@ -448,7 +448,7 @@ } end_req: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -485,16 +485,16 @@ while (!leaving) { add_wait_queue(&thr_wq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); if (QUEUE_EMPTY || QUEUE_PLUGGED) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); schedule(); remove_wait_queue(&thr_wq, &wait); } else { remove_wait_queue(&thr_wq, &wait); set_current_state(TASK_RUNNING); handle_mtdblock_request(); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); } } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/mtd/nftlcore.c linux/drivers/mtd/nftlcore.c --- /opt/kernel/linux-2.4.7/drivers/mtd/nftlcore.c Tue Jun 12 19:30:27 2001 +++ linux/drivers/mtd/nftlcore.c Tue Jul 24 15:04:44 2001 @@ -53,17 +53,12 @@ * encountered, except ... */ -static int nftl_sizes[256]; +static sector_t nftl_sizes[256]; static int nftl_blocksizes[256]; /* .. for the Linux partition table handling. */ struct hd_struct part_table[256]; -#if LINUX_VERSION_CODE < 0x20328 -static void dummy_init (struct gendisk *crap) -{} -#endif - static struct gendisk nftl_gendisk = { major: MAJOR_NR, major_name: "nftl", @@ -166,7 +161,8 @@ #if LINUX_VERSION_CODE < 0x20328 resetup_one_dev(&nftl_gendisk, firstfree); #else - grok_partitions(&nftl_gendisk, firstfree, 1<nr_sects); + grok_partitions(MKDEV(MAJOR_NR,firstfree<nr_sects); #endif } @@ -774,7 +770,7 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { struct NFTLrecord *nftl; - int p; + int res; nftl = NFTLs[MINOR(inode->i_rdev) >> NFTL_PARTN_BITS]; @@ -787,14 +783,9 @@ g.heads = nftl->heads; g.sectors = nftl->sectors; g.cylinders = nftl->cylinders; - g.start = part_table[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - if (!arg) return -EINVAL; - return put_user(part_table[MINOR(inode->i_rdev)].nr_sects, - (long *) arg); - case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(inode->i_rdev); @@ -811,23 +802,11 @@ * or we won't be able to re-use the partitions, * if there was a change and we don't want to reboot */ - p = (1< 0) { - kdev_t devp = MKDEV(MAJOR(inode->i_dev), MINOR(inode->i_dev)+p); - if (part_table[p].nr_sects > 0) - invalidate_device (devp, 1); + res = wipe_partitions(inode->i_rdev); + if (!res) + grok_partitions(inode->i_rdev, nftl->nr_sects); - part_table[MINOR(inode->i_dev)+p].start_sect = 0; - part_table[MINOR(inode->i_dev)+p].nr_sects = 0; - } - -#if LINUX_VERSION_CODE < 0x20328 - resetup_one_dev(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS); -#else - grok_partitions(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS, - 1<nr_sects); -#endif - return 0; + return res; #if (LINUX_VERSION_CODE < 0x20303) RO_IOCTLS(inode->i_rdev, arg); /* ref. linux/blk.h */ @@ -845,7 +824,7 @@ void nftl_request(RQFUNC_ARG) { - unsigned int dev, block, nsect; + unsigned int dev, unit, block, nsect; struct NFTLrecord *nftl; char *buffer; struct request *req; @@ -857,10 +836,11 @@ /* We can do this because the generic code knows not to touch the request at the head of the queue */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); DEBUG(MTD_DEBUG_LEVEL2, "NFTL_request\n"); - DEBUG(MTD_DEBUG_LEVEL3, "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", + DEBUG(MTD_DEBUG_LEVEL3, + "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", (req->cmd == READ) ? "Read " : "Write", req->sector, req->current_nr_sectors); @@ -870,8 +850,8 @@ buffer = req->buffer; res = 1; /* succeed */ - if (dev >= MAX_NFTLS * (1<> NFTL_PARTN_BITS; + if (unit >= MAX_NFTLS || dev != (unit << NFTL_PARTN_BITS)) { printk("nftl: bad minor number: device = %s\n", kdevname(req->rq_dev)); res = 0; /* fail */ @@ -892,8 +872,6 @@ goto repeat; } - block += part_table[dev].start_sect; - if (req->cmd == READ) { DEBUG(MTD_DEBUG_LEVEL2, "NFTL read request of 0x%x sectors @ %x " "(req->nr_sectors == %lx)\n", nsect, block, req->nr_sectors); @@ -939,7 +917,7 @@ } repeat: DEBUG(MTD_DEBUG_LEVEL3, "end_request(%d)\n", res); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -1045,22 +1023,19 @@ #endif if (register_blkdev(MAJOR_NR, "nftl", &nftl_fops)){ - printk("unable to register NFTL block device on major %d\n", MAJOR_NR); + printk("unable to register NFTL block device on major %d\n", + MAJOR_NR); return -EBUSY; } else { -#if LINUX_VERSION_CODE < 0x20320 - blk_dev[MAJOR_NR].request_fn = nftl_request; -#else blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), &nftl_request); -#endif + /* set block size to 1kB each */ for (i = 0; i < 256; i++) { nftl_blocksizes[i] = 1024; } blksize_size[MAJOR_NR] = nftl_blocksizes; - nftl_gendisk.next = gendisk_head; - gendisk_head = &nftl_gendisk; + add_gendisk(&nftl_gendisk); } register_mtd_user(&nftl_notifier); @@ -1070,24 +1045,12 @@ static void __exit cleanup_nftl(void) { - struct gendisk *gd, **gdp; - unregister_mtd_user(&nftl_notifier); unregister_blkdev(MAJOR_NR, "nftl"); -#if LINUX_VERSION_CODE < 0x20320 - blk_dev[MAJOR_NR].request_fn = 0; -#else blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); -#endif - /* remove ourself from generic harddisk list - FIXME: why can't I found this partition on /proc/partition */ - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &nftl_gendisk) { - gd = *gdp; *gdp = gd->next; - break; - } + del_gendisk(&nftl_gendisk); } module_init(init_nftl); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c --- /opt/kernel/linux-2.4.7/drivers/s390/block/dasd.c Tue May 15 10:29:34 2001 +++ linux/drivers/s390/block/dasd.c Tue Jul 24 15:04:44 2001 @@ -596,10 +596,9 @@ INIT_BLK_DEV(major,do_dasd_request,dasd_get_queue,NULL); - major_info->gendisk.major = major; - major_info->gendisk.next = gendisk_head; major_info->gendisk.sizes = blk_size[major]; - gendisk_head = &major_info->gendisk; + major_info->gendisk.major = major; + add_gendisk(&major_info->gendisk); return major; out_max_sectors: kfree(max_sectors[major]); @@ -621,7 +620,6 @@ { int rc = 0; int major; - struct gendisk *dd, *prev = NULL; unsigned long flags; if (major_info == NULL) { @@ -629,33 +627,18 @@ } major = major_info->gendisk.major; INIT_BLK_DEV(major,NULL,NULL,NULL); - blk_size[major] = NULL; - blksize_size[major] = NULL; - hardsect_size[major] = NULL; - max_sectors[major] = NULL; - - /* do the gendisk stuff */ - for (dd = gendisk_head; dd; dd = dd->next) { - if (dd == &major_info->gendisk) { - if (prev) - prev->next = dd->next; - else - gendisk_head = dd->next; - break; - } - prev = dd; - } - if (dd == NULL) { - return -ENOENT; - } + del_gendisk(&major_info->gendisk); + kfree (major_info->gendisk.de_arr); kfree (major_info->gendisk.flags); + kfree (major_info->gendisk.part); kfree (major_info->dasd_device); kfree (blk_size[major]); kfree (blksize_size[major]); kfree (hardsect_size[major]); kfree (max_sectors[major]); - kfree (major_info->gendisk.part); + + blk_clear(major); rc = devfs_unregister_blkdev (major, DASD_NAME); if (rc < 0) { @@ -1298,14 +1281,10 @@ chanq_max_size > 0 || (req->nr_sectors >= chanq_min_size)) { ccw_req_t *cqr; - /* relocate request according to partition table */ - req->sector += device->major_info->gendisk.part[MINOR (req->rq_dev)].start_sect; cqr = device->discipline->build_cp_from_req (device, req); if (cqr == NULL) { DASD_MESSAGE (KERN_WARNING, device, "CCW creation failed on request %p\n", req); - /* revert relocation of request */ - req->sector -= device->major_info->gendisk.part[MINOR (req->rq_dev)].start_sect; break; /* terminate request queue loop */ } @@ -1357,10 +1336,10 @@ dasd_run_bh (dasd_device_t *device) { long flags; - spin_lock_irqsave (&io_request_lock, flags); + spin_lock_irqsave (&device->request_queue.queue_lock, flags); atomic_set(&device->bh_scheduled,0); dasd_process_queues (device); - spin_unlock_irqrestore (&io_request_lock, flags); + spin_unlock_irqrestore (&device->request_queue.queue_lock, flags); } /* @@ -2093,14 +2072,15 @@ break; } case BIODASDRWTB:{ + long startsect; int offset = 0; int xlt; rc = copy_from_user (&xlt, (void *) data, sizeof (int)) ? -EFAULT : 0; if (rc) break; - offset = major_info->gendisk.part[MINOR (inp->i_rdev)].start_sect >> - device->sizes.s2b_shift; + startsect = get_start_sect(inp->i_rdev); + offset = startsect >> device->sizes.s2b_shift; xlt += offset; rc = copy_to_user ((void *) data, &xlt, sizeof (int)) ? -EFAULT : 0; @@ -2287,15 +2267,14 @@ /* SECTION: Management of device list */ int -dasd_fillgeo(int kdev,struct hd_geometry *geo) +dasd_fillgeo(kdev_t kdev, struct hd_geometry *geo) { dasd_device_t *device = dasd_device_from_kdev (kdev); if (!device->discipline->fill_geometry) return -EINVAL; device->discipline->fill_geometry (device, geo); - geo->start = device->major_info-> - gendisk.part[MINOR(kdev)].start_sect; + geo->start = get_start_sect(kdev); /* This is a hack. dasdfmt and ibm.c expect geo.start to contain the block number of the label block when diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/s390/block/xpram.c linux/drivers/s390/block/xpram.c --- /opt/kernel/linux-2.4.7/drivers/s390/block/xpram.c Thu Apr 12 04:02:28 2001 +++ linux/drivers/s390/block/xpram.c Wed Jan 1 00:07:23 1997 @@ -1208,8 +1208,7 @@ { int i; - /* first of all, flush it all and reset all the data structures */ - + /* first of all, flush it all and reset all the data structures */ for (i=0; irequest_queue; + + spin_lock_irqsave (&q->queue_lock, flags_ior); s390irq_spin_lock_irqsave(tape->devinfo.irq,flags_390irq); atomic_set(&tape->bh_scheduled,0); tapeblock_exec_IO(tape); s390irq_spin_unlock_irqrestore(tape->devinfo.irq,flags_390irq); - spin_unlock_irqrestore (&io_request_lock, flags_ior); + spin_unlock_irqrestore (&q->queue_lock, flags_ior); } void diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- /opt/kernel/linux-2.4.7/drivers/scsi/Config.in Thu Jul 5 20:28:16 2001 +++ linux/drivers/scsi/Config.in Wed Jan 1 00:07:23 1997 @@ -20,10 +20,6 @@ comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -#fi - bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux.c linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux.c Sun May 20 21:11:39 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Wed Jan 1 00:07:23 1997 @@ -1103,9 +1103,9 @@ if (host == NULL) return (ENOMEM); - ahc_lock(ahc, &s); *((struct ahc_softc **)host->hostdata) = ahc; ahc->platform_data->host = host; + ahc_lock(ahc, &s); host->can_queue = AHC_MAX_QUEUE; host->cmd_per_lun = 2; host->sg_tablesize = AHC_NSEG; @@ -1241,7 +1241,9 @@ memset(ahc->platform_data, 0, sizeof(struct ahc_platform_data)); TAILQ_INIT(&ahc->platform_data->completeq); LIST_INIT(&ahc->platform_data->device_runq); - ahc_lockinit(ahc); + /* + * ahc_lockinit done by scsi_register, as we don't own that lock + */ ahc_done_lockinit(ahc); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) init_MUTEX_LOCKED(&ahc->platform_data->eh_sem); @@ -1495,29 +1497,23 @@ int ahc_linux_queue(Scsi_Cmnd * cmd, void (*scsi_done) (Scsi_Cmnd *)) { - struct ahc_softc *ahc; + struct ahc_softc *ahc = *(struct ahc_softc **)cmd->host->hostdata; struct ahc_linux_device *dev; - u_long flags; - - ahc = *(struct ahc_softc **)cmd->host->hostdata; /* * Save the callback on completion function. */ cmd->scsi_done = scsi_done; - ahc_lock(ahc, &flags); dev = ahc_linux_get_device(ahc, cmd->channel, cmd->target, cmd->lun, /*alloc*/TRUE); if (dev == NULL) { - ahc_unlock(ahc, &flags); printf("aic7xxx_linux_queue: Unable to allocate device!\n"); return (-ENOMEM); } cmd->result = CAM_REQ_INPROG << 16; TAILQ_INSERT_TAIL(&dev->busyq, (struct ahc_cmd *)cmd, acmd_links.tqe); ahc_linux_run_device_queue(ahc, dev); - ahc_unlock(ahc, &flags); return (0); } @@ -2344,12 +2340,10 @@ flag == SCB_ABORT ? "n ABORT" : " TARGET RESET"); /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. + * we used to drop io_request_lock and lock ahc from here, but + * now that the global lock is gone the upper layer have already + * done what ahc_lock would do /jens */ - spin_unlock_irq(&io_request_lock); - - ahc_lock(ahc, &s); /* * First determine if we currently own this command. @@ -2598,7 +2592,7 @@ ahc_unlock(ahc, &s); if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return (retval); } @@ -2641,14 +2635,7 @@ u_long s; int found; - /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. - */ - spin_unlock_irq(&io_request_lock); - ahc = *(struct ahc_softc **)cmd->host->hostdata; - ahc_lock(ahc, &s); found = ahc_reset_channel(ahc, cmd->channel + 'A', /*initiate reset*/TRUE); acmd = TAILQ_FIRST(&ahc->platform_data->completeq); @@ -2661,7 +2648,7 @@ if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return SUCCESS; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h --- /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Sat May 5 00:16:28 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Wed Jan 1 00:07:23 1997 @@ -81,7 +81,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + can_dma_32: 1 \ } #endif /* _AIC7XXX_LINUX_HOST_H_ */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_osm.h linux/drivers/scsi/aic7xxx/aic7xxx_osm.h --- /opt/kernel/linux-2.4.7/drivers/scsi/aic7xxx/aic7xxx_osm.h Fri Jul 20 06:07:19 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h Tue Jul 24 15:32:52 2001 @@ -516,9 +516,6 @@ LIST_HEAD(, ahc_linux_device) device_runq; struct ahc_completeq completeq; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0) - spinlock_t spin_lock; -#endif u_int qfrozen; struct timer_list reset_timer; struct semaphore eh_sem; @@ -672,20 +669,20 @@ static __inline void ahc_lockinit(struct ahc_softc *ahc) { - spin_lock_init(&ahc->platform_data->spin_lock); + spin_lock_init(&ahc->platform_data->host->host_lock); } static __inline void ahc_lock(struct ahc_softc *ahc, unsigned long *flags) { *flags = 0; - spin_lock_irqsave(&ahc->platform_data->spin_lock, *flags); + spin_lock_irqsave(&ahc->platform_data->host->host_lock, *flags); } static __inline void ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&ahc->platform_data->spin_lock, *flags); + spin_unlock_irqrestore(&ahc->platform_data->host->host_lock, *flags); } static __inline void @@ -697,14 +694,18 @@ static __inline void ahc_done_lock(struct ahc_softc *ahc, unsigned long *flags) { + struct Scsi_Host *host = ahc->platform_data->host; + *flags = 0; - spin_lock_irqsave(&io_request_lock, *flags); + spin_lock_irqsave(&host->host_lock, *flags); } static __inline void ahc_done_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&io_request_lock, *flags); + struct Scsi_Host *host = ahc->platform_data->host; + + spin_unlock_irqrestore(&host->host_lock, *flags); } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- /opt/kernel/linux-2.4.7/drivers/scsi/hosts.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/hosts.c Wed Jan 1 00:07:23 1997 @@ -160,6 +160,7 @@ break; } } + spin_lock_init(&retval->host_lock); atomic_set(&retval->host_active,0); retval->host_busy = 0; retval->host_failed = 0; @@ -235,6 +236,7 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + retval->can_dma_32 = tpnt->can_dma_32; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- /opt/kernel/linux-2.4.7/drivers/scsi/hosts.h Fri Jul 20 21:55:46 2001 +++ linux/drivers/scsi/hosts.h Tue Jul 24 15:33:28 2001 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned can_dma_32:1; + /* * Name of proc directory */ @@ -317,6 +319,7 @@ struct Scsi_Host * next; Scsi_Device * host_queue; + spinlock_t host_lock; struct task_struct * ehandler; /* Error recovery thread. */ struct semaphore * eh_wait; /* The error recovery thread waits on @@ -390,6 +393,7 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned can_dma_32:1; /* * True if this host was loaded as a loadable module */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- /opt/kernel/linux-2.4.7/drivers/scsi/qlogicfc.h Mon Jun 26 21:02:16 2000 +++ linux/drivers/scsi/qlogicfc.h Wed Jan 1 00:07:23 1997 @@ -100,7 +100,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + can_dma_32: 1 \ } #endif /* _QLOGICFC_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi.c Fri Jul 20 06:07:04 2001 +++ linux/drivers/scsi/scsi.c Wed Jan 1 00:07:23 1997 @@ -178,10 +178,13 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + request_queue_t *q = &SDpnt->request_queue; + + blk_init_queue(q, scsi_request_fn); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; } #ifdef MODULE @@ -612,8 +615,6 @@ unsigned long flags = 0; unsigned long timeout; - ASSERT_LOCK(&io_request_lock, 0); - #if DEBUG unsigned long *ret = 0; #ifdef __mips__ @@ -625,6 +626,8 @@ host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); + /* Assign a unique nonzero serial_number. */ if (++serial_number == 0) serial_number = 1; @@ -678,41 +681,41 @@ * passes a meaningful return value. */ if (host->hostt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); rtn = host->hostt->queuecommand(SCpnt, scsi_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); if (rtn != 0) { scsi_delete_timer(SCpnt); scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } } else { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_old_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } } else { int temp; SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command)); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); SCpnt->result = temp; #ifdef DEBUG_DELAY - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); clock = jiffies + 4 * HZ; while (time_before(jiffies, clock)) barrier(); printk("done(host = %d, result = %04x) : routine at %p\n", host->host_no, temp, host->hostt->command); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); #endif if (host->hostt->use_new_eh_code) { scsi_done(SCpnt); } else { scsi_old_done(SCpnt); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); return rtn; @@ -780,7 +783,7 @@ Scsi_Device * SDpnt = SRpnt->sr_device; struct Scsi_Host *host = SDpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCSI_LOG_MLQUEUE(4, { @@ -876,7 +879,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; SRpnt->sr_command = SCpnt; @@ -966,7 +969,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; @@ -1315,11 +1318,11 @@ Scsi_Request * SRpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); - host = SCpnt->host; device = SCpnt->device; + ASSERT_LOCK(&host->host_lock, 0); + /* * We need to protect the decrement, as otherwise a race condition * would exist. Fiddling with SCpnt isn't a problem as the @@ -1327,10 +1330,10 @@ * one execution context, but the device and host structures are * shared. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->host_busy--; /* Indicate that we are free */ device->device_busy--; /* Decrement device usage counter. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); /* * Clear the flags which say that the device/host is no longer @@ -1821,7 +1824,6 @@ Scsi_Device *SDpnt; struct Scsi_Device_Template *sdtpnt; const char *name; - unsigned long flags; int out_of_space = 0; if (tpnt->next || !tpnt->detect) @@ -1831,7 +1833,7 @@ /* If max_sectors isn't set, default to max */ if (!tpnt->max_sectors) - tpnt->max_sectors = MAX_SECTORS; + tpnt->max_sectors = 1024; pcount = next_scsi_host; @@ -1843,10 +1845,11 @@ using the new scsi code. NOTE: the detect routine could redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */ + /* + * detect should do its own locking + */ if (tpnt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); tpnt->present = tpnt->detect(tpnt); - spin_unlock_irqrestore(&io_request_lock, flags); } else tpnt->present = tpnt->detect(tpnt); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi.h Fri Jul 20 21:55:46 2001 +++ linux/drivers/scsi/scsi.h Tue Jul 24 15:33:28 2001 @@ -389,9 +389,9 @@ #if defined(__mc68000__) || defined(CONFIG_APUS) #include #define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) + (virt_to_phys(bio_data(X)+bio_size(X)-1)+1==virt_to_phys(bio_data(Y))) #else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) +#define CONTIGUOUS_BUFFERS(X,Y) BIO_CONTIG(X, Y) #endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_error.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_error.c Wed Jan 1 00:07:23 1997 @@ -422,8 +422,6 @@ {REQUEST_SENSE, 0, 0, 0, 255, 0}; unsigned char scsi_result0[256], *scsi_result = NULL; - ASSERT_LOCK(&io_request_lock, 0); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); @@ -580,16 +578,14 @@ STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout) { unsigned long flags; - struct Scsi_Host *host; - - ASSERT_LOCK(&io_request_lock, 0); + struct Scsi_Host *host = SCpnt->host; - host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); - retry: +retry: /* - * We will use a queued command if possible, otherwise we will emulate the - * queuing and calling of completion function ourselves. + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. */ SCpnt->owner = SCSI_OWNER_LOWLEVEL; @@ -606,9 +602,9 @@ SCpnt->host->eh_action = &sem; SCpnt->request.rq_status = RQ_SCSI_BUSY; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_eh_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); down(&sem); @@ -631,10 +627,10 @@ * abort a timed out command or not. Not sure how * we should treat them differently anyways. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); if (SCpnt->host->hostt->eh_abort_handler) SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); SCpnt->request.rq_status = RQ_SCSI_DONE; SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; @@ -647,13 +643,13 @@ int temp; /* - * We damn well had better never use this code. There is no timeout - * protection here, since we would end up waiting in the actual low - * level driver, we don't know how to wake it up. + * We damn well had better never use this code. There is no + * timeout protection here, since we would end up waiting in + * the actual low level driver, we don't know how to wake it up. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); SCpnt->result = temp; /* Fall through to code below to examine status. */ @@ -661,8 +657,8 @@ } /* - * Now examine the actual status codes to see whether the command actually - * did complete normally. + * Now examine the actual status codes to see whether the command + * actually did complete normally. */ if (SCpnt->eh_state == SUCCESS) { int ret = scsi_eh_completed_normally(SCpnt); @@ -773,9 +769,9 @@ SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); return rtn; } @@ -805,9 +801,9 @@ } SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -838,9 +834,9 @@ return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -884,9 +880,9 @@ if (SCpnt->host->hostt->eh_host_reset_handler == NULL) { return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -1227,7 +1223,7 @@ Scsi_Device *SDpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); /* * Next free up anything directly waiting upon the host. This will be @@ -1244,19 +1240,22 @@ * now that error recovery is done, we will need to ensure that these * requests are started. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { - request_queue_t *q; + request_queue_t *q = &SDpnt->request_queue; + if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) || (host->host_blocked) || (host->host_self_blocked) || (SDpnt->device_blocked)) { break; } - q = &SDpnt->request_queue; + + spin_lock(&q->queue_lock); q->request_fn(q); + spin_unlock(&q->queue_lock); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* @@ -1303,7 +1302,7 @@ Scsi_Cmnd *SCdone; int timed_out; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCdone = NULL; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_lib.c Fri Jul 20 05:48:04 2001 +++ linux/drivers/scsi/scsi_lib.c Tue Jul 24 12:53:58 2001 @@ -61,7 +61,7 @@ * data - private data * at_head - insert request at head or tail of queue * - * Lock status: Assumed that io_request_lock is not held upon entry. + * Lock status: Assumed that queue lock is not held upon entry. * * Returns: Nothing */ @@ -70,7 +70,7 @@ { unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); rq->cmd = SPECIAL; rq->special = data; @@ -84,15 +84,15 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (at_head) - list_add(&rq->queue, &q->queue_head); + list_add(&rq->queuelist, &q->queue_head); else - list_add_tail(&rq->queue, &q->queue_head); + list_add_tail(&rq->queuelist, &q->queue_head); q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } @@ -167,8 +167,6 @@ */ int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); - SCpnt->owner = SCSI_OWNER_MIDLEVEL; SCpnt->reset_chain = NULL; SCpnt->serial_number = 0; @@ -250,9 +248,9 @@ Scsi_Device *SDpnt; struct Scsi_Host *SHpnt; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (SCpnt != NULL) { /* @@ -262,7 +260,7 @@ * the bad sector. */ SCpnt->request.special = (void *) SCpnt; - list_add(&SCpnt->request.queue, &q->queue_head); + list_add(&SCpnt->request.queuelist, &q->queue_head); } /* @@ -280,14 +278,10 @@ * with special case code, then spin off separate versions and * use function pointers to pick the right one. */ - if (SDpnt->single_lun - && list_empty(&q->queue_head) - && SDpnt->device_busy == 0) { + if (SDpnt->single_lun && list_empty(&q->queue_head) && SDpnt->device_busy == 0) { request_queue_t *q; - for (SDpnt = SHpnt->host_queue; - SDpnt; - SDpnt = SDpnt->next) { + for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { if (((SHpnt->can_queue > 0) && (SHpnt->host_busy >= SHpnt->can_queue)) || (SHpnt->host_blocked) @@ -295,6 +289,7 @@ || (SDpnt->device_blocked)) { break; } + q = &SDpnt->request_queue; q->request_fn(q); } @@ -328,7 +323,7 @@ SHpnt->some_device_starved = 0; } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } /* @@ -361,56 +356,34 @@ int frequeue) { struct request *req; - struct buffer_head *bh; - Scsi_Device * SDpnt; - int nsect; + request_queue_t *q = &SCpnt->device->request_queue; + unsigned long flags; + + ASSERT_LOCK(&q->queue_lock, 0); - ASSERT_LOCK(&io_request_lock, 0); + spin_lock_irqsave(&q->queue_lock, flags); req = &SCpnt->request; - req->errors = 0; - if (!uptodate) { - printk(" I/O error: dev %s, sector %lu\n", - kdevname(req->rq_dev), req->sector); - } do { - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - sectors -= nsect; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector += nsect; - req->nr_sectors -= nsect; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("scsi_end_request: buffer-list destroyed\n"); - } - } + if (!req->bio) { + printk("scsi_end_request: missing bio\n"); + break; } - } while (sectors && bh); + sectors -= bio_sectors(req->bio); + if (!__end_that_request_first(req, 1)) + break; + } while (sectors > 0); + + spin_unlock_irqrestore(&q->queue_lock, flags); /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (req->bh) { - request_queue_t *q; - - if( !requeue ) - { + if (req->bio) { + if (!requeue) return SCpnt; - } - - q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -418,17 +391,15 @@ scsi_queue_next_request(q, SCpnt); return SCpnt; } + /* * This request is done. If there is someone blocked waiting for this - * request, wake them up. Typically used to wake up processes trying - * to swap a page into memory. + * request, wake them up. */ - if (req->waiting != NULL) { + if (req->waiting) complete(req->waiting); - } - add_blkdev_randomness(MAJOR(req->rq_dev)); - SDpnt = SCpnt->device; + add_blkdev_randomness(MAJOR(req->rq_dev)); /* * This will goose the queue request function at the end, so we don't @@ -436,12 +407,9 @@ */ __scsi_release_command(SCpnt); - if( frequeue ) { - request_queue_t *q; + if (frequeue) + scsi_queue_next_request(q, NULL); - q = &SDpnt->request_queue; - scsi_queue_next_request(q, NULL); - } return NULL; } @@ -489,7 +457,9 @@ */ static void scsi_release_buffers(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); + struct request *req = &SCpnt->request; + + ASSERT_LOCK(&SCpnt->device->request_queue.queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -507,9 +477,8 @@ } scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); } else { - if (SCpnt->request_buffer != SCpnt->request.buffer) { - scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); - } + if (SCpnt->request_buffer != req->buffer) + scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen); } /* @@ -545,6 +514,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -559,7 +529,7 @@ * would be used if we just wanted to retry, for example. * */ - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -574,7 +544,7 @@ for (i = 0; i < SCpnt->use_sg; i++) { if (sgpnt[i].alt_address) { - if (SCpnt->request.cmd == READ) { + if (req->cmd == READ) { memcpy(sgpnt[i].alt_address, sgpnt[i].address, sgpnt[i].length); @@ -584,10 +554,12 @@ } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); + if (SCpnt->buffer != req->buffer) { + if (req->cmd == READ) { + char *to = bio_kmap_irq(req->bio); + + memcpy(to, SCpnt->buffer, SCpnt->bufflen); + bio_kunmap_irq(to); } scsi_free(SCpnt->buffer, SCpnt->bufflen); } @@ -608,11 +580,10 @@ */ if (good_sectors > 0) { SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", - SCpnt->request.nr_sectors, - good_sectors)); + req->nr_sectors good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); - SCpnt->request.errors = 0; + req->errors = 0; /* * If multiple sectors are requested in one buffer, then * they will have been finished off by the first command. @@ -699,7 +670,7 @@ break; case NOT_READY: printk(KERN_INFO "Device %s not ready.\n", - kdevname(SCpnt->request.rq_dev)); + kdevname(req->rq_dev)); SCpnt = scsi_end_request(SCpnt, 0, this_count); return; break; @@ -734,7 +705,7 @@ * We sometimes get this cruft in the event that a medium error * isn't properly reported. */ - SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors); + SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors); return; } } @@ -748,7 +719,7 @@ * Arguments: request - I/O request we are preparing to queue. * * Lock status: No locks assumed to be held, but as it happens the - * io_request_lock is held when this is called. + * q->queue_lock is held when this is called. * * Returns: Nothing * @@ -762,7 +733,7 @@ kdev_t dev = req->rq_dev; int major = MAJOR(dev); - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&req->q->queue_lock, 1); for (spnt = scsi_devicelist; spnt; spnt = spnt->next) { /* @@ -820,7 +791,7 @@ struct Scsi_Host *SHpnt; struct Scsi_Device_Template *STpnt; - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&q->queue_lock, 1); SDpnt = (Scsi_Device *) q->queuedata; if (!SDpnt) { @@ -838,7 +809,7 @@ * released the lock and grabbed it again, so each time * we need to check to see if the queue is plugged or not. */ - if (SHpnt->in_recovery || q->plugged) + if (SHpnt->in_recovery || blk_queue_plugged(q)) return; /* @@ -887,9 +858,9 @@ */ SDpnt->was_reset = 0; if (SDpnt->removable && !in_interrupt()) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; } } @@ -898,14 +869,14 @@ * If we couldn't find a request that could be queued, then we * can also quit. */ - if (list_empty(&q->queue_head)) + if (blk_queue_empty(q)) break; /* - * Loop through all of the requests in this queue, and find - * one that is queueable. + * get next queueable request. cur_rq would be set if we + * previously had to abort for some reason */ - req = blkdev_entry_next_request(&q->queue_head); + req = elv_next_request(q); /* * Find the actual device driver associated with this command. @@ -925,9 +896,8 @@ if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) { SCpnt = scsi_allocate_device(SRpnt->sr_device, FALSE, FALSE); - if( !SCpnt ) { + if (!SCpnt) break; - } scsi_init_cmd_from_req(SCpnt, SRpnt); } @@ -959,9 +929,8 @@ * while the queue is locked and then break out of the * loop. Otherwise loop around and try another request. */ - if (!SCpnt) { + if (!SCpnt) break; - } } /* @@ -998,7 +967,7 @@ * another. */ req = NULL; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (SCpnt->request.cmd != SPECIAL) { /* @@ -1028,7 +997,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1044,7 +1013,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1065,7 +1034,7 @@ * Now we need to grab the lock again. We are about to mess * with the request queue and try to find another command. */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); } } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_merge.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_merge.c Tue Jul 24 14:26:10 2001 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -71,51 +72,6 @@ */ #define DMA_SEGMENT_SIZE_LIMITED -#ifdef CONFIG_SCSI_DEBUG_QUEUES -/* - * Enable a bunch of additional consistency checking. Turn this off - * if you are benchmarking. - */ -static int dump_stats(struct request *req, - int use_clustering, - int dma_host, - int segments) -{ - struct buffer_head *bh; - - /* - * Dump the information that we have. We know we have an - * inconsistency. - */ - printk("nr_segments is %x\n", req->nr_segments); - printk("counted segments is %x\n", segments); - printk("Flags %d %d\n", use_clustering, dma_host); - for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) - { - printk("Segment 0x%p, blocks %d, addr 0x%lx\n", - bh, - bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); - } - panic("Ththththaats all folks. Too dangerous to continue.\n"); -} - - -/* - * Simple sanity check that we will use for the first go around - * in order to ensure that we are doing the counting correctly. - * This can be removed for optimization. - */ -#define SANITY_CHECK(req, _CLUSTER, _DMA) \ - if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \ - { \ - printk("Incorrect segment count at 0x%p", current_text_addr()); \ - dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \ - } -#else -#define SANITY_CHECK(req, _CLUSTER, _DMA) -#endif - static void dma_exhausted(Scsi_Cmnd * SCpnt, int i) { int jj; @@ -191,8 +147,7 @@ { int ret = 1; int reqsize = 0; - struct buffer_head *bh; - struct buffer_head *bhnext; + struct bio *bio, *bionext; if( remainder != NULL ) { reqsize = *remainder; @@ -201,21 +156,21 @@ /* * Add in the size increment for the first buffer. */ - bh = req->bh; + bio = req->bio; #ifdef DMA_SEGMENT_SIZE_LIMITED - if( reqsize + bh->b_size > PAGE_SIZE ) { + if( reqsize + bio_size(bio) > PAGE_SIZE ) { ret++; - reqsize = bh->b_size; + reqsize = bio_size(bio); } else { - reqsize += bh->b_size; + reqsize += bio_size(bio); } #else - reqsize += bh->b_size; + reqsize += bio_size(bio); #endif - for (bh = req->bh, bhnext = bh->b_reqnext; - bhnext != NULL; - bh = bhnext, bhnext = bh->b_reqnext) { + for (bio = req->bio, bionext = bio->bi_next; + bionext != NULL; + bio = bionext, bionext = bio->bi_next) { if (use_clustering) { /* * See if we can do this without creating another @@ -223,11 +178,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_bus(bionext) - 1 == ISA_DMA_THRESHOLD) { ret++; - reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + reqsize = bio_size(bionext); + } else if (CONTIGUOUS_BUFFERS(bio, bionext)) { /* * This one is OK. Let it go. */ @@ -241,23 +195,22 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bhnext->b_size > PAGE_SIZE ) + if(dma_host && bio_to_bus(bionext) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bio_size(bionext) > PAGE_SIZE ) { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); continue; } #endif - reqsize += bhnext->b_size; + reqsize += bio_size(bionext); continue; } ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } else { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } } if( remainder != NULL ) { @@ -304,14 +257,13 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bio_to_bus((X))+bio_size((X)))|((long)bio_to_bus((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE static inline int scsi_new_mergeable(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg will be able to merge these two @@ -320,7 +272,7 @@ * scsi.c allocates for this purpose * min(64,sg_tablesize) entries. */ - if (req->nr_segments >= max_segments || + if (req->nr_segments >= q->max_segments || req->nr_segments >= SHpnt->sg_tablesize) return 0; req->nr_segments++; @@ -329,8 +281,7 @@ static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg won't be able to map these two @@ -347,11 +298,10 @@ #else static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { if (req->nr_segments < SHpnt->sg_tablesize && - req->nr_segments < max_segments) { + req->nr_segments < q->max_segments) { /* * This will form the start of a new segment. Bump the * counter. @@ -371,7 +321,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot @@ -380,7 +330,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -399,8 +349,7 @@ */ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -412,12 +361,14 @@ SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; + /* + * FIXME: remember to look into this /jens + */ #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors) return 0; if (use_clustering) { @@ -427,17 +378,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_bus(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto new_end_segment; } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (CONTIGUOUS_BUFFERS(req->biotail, bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if( dma_host && bio_to_bus(bio) - 1 >= ISA_DMA_THRESHOLD ) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bh->b_size > PAGE_SIZE ) { + if( segment_size + bio_size(bio) > PAGE_SIZE ) { goto new_end_segment; } } @@ -450,16 +399,15 @@ } new_end_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(req->bhtail, bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(req->biotail, bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -472,11 +420,10 @@ SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors) return 0; if (use_clustering) { @@ -486,15 +433,13 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_bus(bio) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (CONTIGUOUS_BUFFERS(bio, req->bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bh->b_size; + if( dma_host && bio_to_bus(bio) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bio_size(bio); count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { goto new_start_segment; @@ -509,10 +454,10 @@ } new_start_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(bh, req->bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(bio, req->bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } /* @@ -522,12 +467,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -535,15 +480,12 @@ #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct buffer_head * bh, \ - int max_segments) \ + struct bio *bio) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ - bh, \ - max_segments, \ + bio, \ _CLUSTER, \ _DMA); \ return ret; \ @@ -576,7 +518,7 @@ * Returns: 1 if it is OK to merge the two requests. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -596,7 +538,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int max_segments, int use_clustering, int dma_host) { @@ -614,13 +555,12 @@ SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; /* If it would not fit into prepared memory space for sg chain, * then don't allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > max_segments || + if (req->nr_segments + next->nr_segments - 1 > q->max_segments || req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { return 0; } @@ -652,8 +592,7 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_bus(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto dont_combine; } #ifdef DMA_SEGMENT_SIZE_LIMITED @@ -662,8 +601,8 @@ * buffers in chunks of PAGE_SIZE or less. */ if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + && CONTIGUOUS_BUFFERS(req->biotail, next->bio) + && bio_to_bus(req->biotail) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -675,7 +614,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (CONTIGUOUS_BUFFERS(req->biotail, next->bio)) { /* * This one is OK. Let it go. */ @@ -688,7 +627,7 @@ } dont_combine: #ifdef DMA_CHUNK_SIZE - if (req->nr_segments + next->nr_segments > max_segments || + if (req->nr_segments + next->nr_segments > q->max_segments || req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { return 0; } @@ -696,7 +635,7 @@ * first segment in next, then the check for hw segments was * done above already, so we can always merge. */ - if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + if (MERGEABLE_BUFFERS (req->biotail, next->bio)) { req->nr_hw_segments += next->nr_hw_segments - 1; } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { return 0; @@ -711,7 +650,7 @@ * Make sure we can fix something that is the sum of the two. * A slightly stricter test than we had above. */ - if (req->nr_segments + next->nr_segments > max_segments || + if (req->nr_segments + next->nr_segments > q->max_segments || req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { return 0; } else { @@ -732,12 +671,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -745,12 +684,10 @@ #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct request * next, \ - int max_segments) \ + struct request * next) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ - ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ return ret; \ } @@ -798,8 +735,8 @@ int use_clustering, int dma_host) { - struct buffer_head * bh; - struct buffer_head * bhprev; + struct bio * bio; + struct bio * bioprev; char * buff; int count; int i; @@ -808,30 +745,8 @@ struct scatterlist * sgpnt; int this_count; - /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. - */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } req = &SCpnt->request; + /* * First we need to know how many scatter gather segments are needed. */ @@ -847,24 +762,22 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } + /* - * Don't bother with scatter-gather if there is only one segment. + * we used to not use scatter-gather for single segment request, + * but now we do (it makes highmem I/O easier to support without + * kmapping pages) */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; - goto single_segment; - } SCpnt->use_sg = count; /* * Allocate the actual scatter-gather table itself. * scsi_malloc can only allocate in chunks of 512 bytes */ - SCpnt->sglist_len = (SCpnt->use_sg - * sizeof(struct scatterlist) + 511) & ~511; + SCpnt->sglist_len = (count * sizeof(struct scatterlist) + 511) & ~511; sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); @@ -877,7 +790,7 @@ * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } /* @@ -887,15 +800,14 @@ memset(sgpnt, 0, SCpnt->sglist_len); SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = 0; - bhprev = NULL; + req->buffer = NULL; + bioprev = NULL; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { - if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + for (count = 0, bio = req->bio; bio; bio = bio->bi_next) { + if (use_clustering && bioprev != NULL) { + if (dma_host && bio_to_bus(bioprev) - 1 == ISA_DMA_THRESHOLD) { /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + } else if (CONTIGUOUS_BUFFERS(bioprev, bio)) { /* * This one is OK. Let it go. Note that we * do not have the ability to allocate @@ -904,32 +816,32 @@ */ if( dma_host ) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD - || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + if( bio_to_bus(bio) - 1 < ISA_DMA_THRESHOLD + || sgpnt[count - 1].length + bio_size(bio) <= PAGE_SIZE ) { + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; } #else - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; #endif } else { - sgpnt[count - 1].length += bh->b_size; - SCpnt->request_bufflen += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + SCpnt->request_bufflen += bio_size(bio); + bioprev = bio; continue; } } } + + set_bio_sg(&sgpnt[count], bio); + if (!dma_host) + SCpnt->request_bufflen += bio_size(bio); + count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; - } - bhprev = bh; + bioprev = bio; } /* @@ -937,13 +849,14 @@ */ if (count != SCpnt->use_sg) { printk("Incorrect number of segments after building list\n"); -#ifdef CONFIG_SCSI_DEBUG_QUEUES - dump_stats(req, use_clustering, dma_host, count); -#endif + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + this_count = req->current_nr_sectors; + goto single_segment; } - if (!dma_host) { + + if (!dma_host) return 1; - } + /* * Now allocate bounce buffers, if needed. */ @@ -952,7 +865,7 @@ sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > - ISA_DMA_THRESHOLD) { + ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { /* * If this would nearly drain the DMA @@ -970,7 +883,12 @@ break; } - sgpnt[i].alt_address = sgpnt[i].address; + /* + * this is not a dma host, so it will never + * be a highmem page + */ + sgpnt[i].alt_address = page_address(sgpnt[i].page) + + sgpnt[i].offset; sgpnt[i].address = (char *) scsi_malloc(sgpnt[i].length); /* @@ -986,7 +904,7 @@ } break; } - if (SCpnt->request.cmd == WRITE) { + if (req->cmd == WRITE) { memcpy(sgpnt[i].address, sgpnt[i].alt_address, sgpnt[i].length); } @@ -1031,21 +949,20 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bio = req->bio; bio; bio = bio->bi_next) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { break; } - this_count += bh->b_size >> 9; + this_count += bio_sectors(bio); } } else { /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* @@ -1058,28 +975,32 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; - if (dma_host) { + bio = req->bio; + buff = req->buffer = bio_data(bio); + + if (dma_host || PageHighMem(bio_page(bio))) { /* * Allocate a DMA bounce buffer. If the allocation fails, fall * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bio_to_bus(bio) + bio_size(bio) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); + return 0; } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) { + char *buf = bio_kmap_irq(bio); + memcpy(buff, buf, this_count << 9); + bio_kunmap_irq(buf); + } } } SCpnt->request_bufflen = this_count << 9; @@ -1166,4 +1087,18 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ +#ifdef CONFIG_HIGHMEM + if (SHpnt->can_dma_32 && (SDpnt->type == TYPE_DISK)) { + blk_queue_bounce_limit(q, BLK_BOUNCE_4G); + printk("SCSI: channel %d, id %d: enabling highmem I/O\n", + SDpnt->channel, SDpnt->id); + } else + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); +#endif + + blk_queue_max_sectors(q, 1024); } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_obsolete.c linux/drivers/scsi/scsi_obsolete.c --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_obsolete.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_obsolete.c Wed Jan 1 00:07:23 1997 @@ -145,9 +145,10 @@ void scsi_old_times_out(Scsi_Cmnd * SCpnt) { + struct Scsi_Host *host = SCpnt->host; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); /* Set the serial_number_at_timeout to the current serial_number */ SCpnt->serial_number_at_timeout = SCpnt->serial_number; @@ -164,7 +165,7 @@ break; case IN_ABORT: printk("SCSI host %d abort (pid %ld) timed out - resetting\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS)) break; case IN_RESET: @@ -175,7 +176,7 @@ */ printk("SCSI host %d channel %d reset (pid %ld) timed out - " "trying harder\n", - SCpnt->host->host_no, SCpnt->channel, SCpnt->pid); + host->host_no, SCpnt->channel, SCpnt->pid); SCpnt->internal_timeout &= ~IN_RESET; SCpnt->internal_timeout |= IN_RESET2; scsi_reset(SCpnt, @@ -188,7 +189,7 @@ * Maybe the HBA itself crashed and this will shake it loose. */ printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2); SCpnt->internal_timeout |= IN_RESET3; scsi_reset(SCpnt, @@ -197,19 +198,19 @@ default: printk("SCSI host %d reset (pid %ld) timed out again -\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); printk("probably an unrecoverable SCSI bus or device hang.\n"); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* * From what I can find in scsi_obsolete.c, this function is only called * by scsi_old_done and scsi_reset. Both of these functions run with the - * io_request_lock already held, so we need do nothing here about grabbing + * host_lock already held, so we need do nothing here about grabbing * any locks. */ static void scsi_request_sense(Scsi_Cmnd * SCpnt) @@ -217,7 +218,6 @@ SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE; update_timeout(SCpnt, SENSE_TIMEOUT); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); memset((void *) SCpnt->sense_buffer, 0, @@ -238,9 +238,9 @@ * Ugly, ugly. The newer interfaces all assume that the lock * isn't held. Mustn't disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&SCpnt->host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&SCpnt->host->host_lock); } @@ -646,9 +646,9 @@ * assume that the lock isn't held. Mustn't * disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } break; default: @@ -674,7 +674,7 @@ * use, the upper code is run from a bottom half handler, so * it isn't an issue. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); SRpnt = SCpnt->sc_request; if( SRpnt != NULL ) { SRpnt->sr_result = SRpnt->sr_command->result; @@ -686,7 +686,7 @@ } SCpnt->done(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } #undef CMD_FINISHED #undef REDO @@ -725,10 +725,10 @@ return 0; } if (SCpnt->internal_timeout & IN_ABORT) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_ABORT) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_ABORT; oldto = update_timeout(SCpnt, ABORT_TIMEOUT); @@ -908,10 +908,10 @@ return 0; } if (SCpnt->internal_timeout & IN_RESET) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_RESET) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_RESET; update_timeout(SCpnt, RESET_TIMEOUT); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/scsi_queue.c linux/drivers/scsi/scsi_queue.c --- /opt/kernel/linux-2.4.7/drivers/scsi/scsi_queue.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/scsi/scsi_queue.c Wed Jan 1 00:07:23 1997 @@ -80,6 +80,7 @@ { struct Scsi_Host *host; unsigned long flags; + request_queue_t *q = &cmd->device->request_queue; SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd)); @@ -137,10 +138,10 @@ * Decrement the counters, since these commands are no longer * active on the host/device. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); cmd->host->host_busy--; cmd->device->device_busy--; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); /* * Insert this command at the head of the queue for it's device. diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- /opt/kernel/linux-2.4.7/drivers/scsi/sd.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/sd.c Tue Jul 24 15:38:27 2001 @@ -61,10 +61,6 @@ #include -/* - * static const char RCSid[] = "$Header:"; - */ - #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) #define SCSI_DISKS_PER_MAJOR 16 @@ -72,8 +68,7 @@ #define SD_MINOR_NUMBER(i) ((i) & 255) #define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255) #define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4) -#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1) -#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR) +#define N_USED_SD_MAJORS (1 + ((sd_template.dev_max - 1) >> 4)) #define MAX_RETRIES 5 @@ -89,14 +84,12 @@ static Scsi_Disk *rscsi_disks; static int *sd_sizes; static int *sd_blocksizes; -static int *sd_hardsizes; /* Hardware sector size */ static int check_scsidisk_media_change(kdev_t); static int fop_revalidate_scsidisk(kdev_t); static int sd_init_onedisk(int); - static int sd_init(void); static void sd_finish(void); static int sd_attach(Scsi_Device *); @@ -177,9 +170,11 @@ diskinfo[0] = 0x40; diskinfo[1] = 0x20; - diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; + diskinfo[2] = + rscsi_disks[DEVICE_NR(dev)].capacity >> 11; - /* override with calculated, extended default, or driver values */ + /* override with calculated, extended default, + or driver values */ if(host->hostt->bios_param != NULL) host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], @@ -189,48 +184,14 @@ dev, &diskinfo[0]); if (put_user(diskinfo[0], &loc->heads) || - put_user(diskinfo[1], &loc->sectors) || - put_user(diskinfo[2], &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) - return -EFAULT; - return 0; - } - case HDIO_GETGEO_BIG: - { - struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; - - if(!loc) - return -EINVAL; - - host = rscsi_disks[DEVICE_NR(dev)].device->host; - - /* default to most commonly used values */ - - diskinfo[0] = 0x40; - diskinfo[1] = 0x20; - diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; - - /* override with calculated, extended default, or driver values */ - - if(host->hostt->bios_param != NULL) - host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], - dev, - &diskinfo[0]); - else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], - dev, &diskinfo[0]); - - if (put_user(diskinfo[0], &loc->heads) || - put_user(diskinfo[1], &loc->sectors) || - put_user(diskinfo[2], (unsigned int *) &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user(diskinfo[1], &loc->sectors) || + put_user(diskinfo[2], &loc->cylinders) || + put_user(get_start_sect(inode->i_rdev), + &loc->start)) return -EFAULT; return 0; } - case BLKGETSIZE: /* Return device size */ - if (!arg) - return -EINVAL; - return put_user(sd[SD_PARTITION(inode->i_rdev)].nr_sects, (long *) arg); - + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -240,6 +201,8 @@ case BLKPG: case BLKELVGET: case BLKELVSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case BLKRRPART: /* Re-read partition tables */ @@ -248,7 +211,8 @@ return revalidate_scsidisk(dev, 1); default: - return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg); + return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device, + cmd, (void *) arg); } } @@ -298,7 +262,7 @@ SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = %d, block = %d\n", devm, block)); dpnt = &rscsi_disks[dev]; - if (devm >= (sd_template.dev_max << 4) || + if (devm >= (sd_template.dev_max << 4) || (devm & 0xf) || !dpnt || !dpnt->device->online || block + SCpnt->request.nr_sectors > sd[devm].nr_sects) { @@ -306,7 +270,7 @@ SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); return 0; } - block += sd[devm].start_sect; + if (dpnt->device->changed) { /* * quietly refuse to do anything to a changed disc until the changed @@ -576,7 +540,6 @@ static struct gendisk *sd_gendisks = &sd_gendisk; #define SD_GENDISK(i) sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR] -#define LAST_SD_GENDISK sd_gendisks[N_USED_SD_MAJORS - 1] /* * rw_intr is the interrupt routine for the device driver. @@ -616,8 +579,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); switch (SCpnt->device->sector_size) { case 1024: error_sector <<= 1; @@ -640,7 +603,7 @@ default: break; } - error_sector -= sd[SD_PARTITION(SCpnt->request.rq_dev)].start_sect; + error_sector &= ~(block_sectors - 1); good_sectors = error_sector - SCpnt->request.sector; if (good_sectors < 0 || good_sectors >= this_count) @@ -959,15 +922,11 @@ * So I have created this table. See ll_rw_blk.c * Jacques Gelinas (Jacques@solucorp.qc.ca) */ - int m; int hard_sector = sector_size; int sz = rscsi_disks[i].capacity * (hard_sector/256); /* There are 16 minors allocated for each major device */ - for (m = i << 4; m < ((i + 1) << 4); m++) { - sd_hardsizes[m] = hard_sector; - } - + blk_queue_hardsect_size(blk_get_queue(SD_MAJOR(i)), hard_sector); printk("SCSI device %s: " "%d %d-byte hdwr sectors (%d MB)\n", nbuff, rscsi_disks[i].capacity, @@ -1052,7 +1011,7 @@ static int sd_init() { - int i; + int i, maxparts; if (sd_template.dev_noticed == 0) return 0; @@ -1063,10 +1022,17 @@ if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR) sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR; + /* At most 16 partitions on each scsi disk. */ + maxparts = (sd_template.dev_max << 4); + if (maxparts == 0) + return 0; + if (!sd_registered) { for (i = 0; i < N_USED_SD_MAJORS; i++) { - if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) { - printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i)); + if (devfs_register_blkdev(SD_MAJOR(i), "sd", + &sd_fops)) { + printk("Unable to get major %d for SCSI disk\n", + SD_MAJOR(i)); return 1; } } @@ -1076,93 +1042,78 @@ if (rscsi_disks) return 0; - rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC); - if (!rscsi_disks) - goto cleanup_devfs; - memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk)); + /* allocate memory */ +#define init_mem_lth(x,n) x = kmalloc((n) * sizeof(*x), GFP_ATOMIC) +#define zero_mem_lth(x,n) memset(x, 0, (n) * sizeof(*x)) + + init_mem_lth(rscsi_disks, sd_template.dev_max); + init_mem_lth(sd_sizes, maxparts); + init_mem_lth(sd_blocksizes, maxparts); + init_mem_lth(sd, maxparts); + init_mem_lth(sd_gendisks, N_USED_SD_MAJORS); + + if (!rscsi_disks || !sd_sizes || !sd_blocksizes || !sd || !sd_gendisks) + goto cleanup_mem; + + zero_mem_lth(rscsi_disks, sd_template.dev_max); + zero_mem_lth(sd_sizes, maxparts); + zero_mem_lth(sd, maxparts); - /* for every (necessary) major: */ - sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_sizes) - goto cleanup_disks; - memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int)); - - sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_blocksizes) - goto cleanup_sizes; - - sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_hardsizes) - goto cleanup_blocksizes; - - for (i = 0; i < sd_template.dev_max << 4; i++) { + for (i = 0; i < maxparts; i++) { sd_blocksizes[i] = 1024; - sd_hardsizes[i] = 512; } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4); - hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4); - } - sd = kmalloc((sd_template.dev_max << 4) * - sizeof(struct hd_struct), - GFP_ATOMIC); - if (!sd) - goto cleanup_sd; - memset(sd, 0, (sd_template.dev_max << 4) * sizeof(struct hd_struct)); - - if (N_USED_SD_MAJORS > 1) - sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC); - if (!sd_gendisks) - goto cleanup_sd_gendisks; + request_queue_t *q = blk_get_queue(SD_MAJOR(i)); + int parts_per_major = (SCSI_DISKS_PER_MAJOR << 4); + + blksize_size[SD_MAJOR(i)] = + sd_blocksizes + i * parts_per_major; + blk_queue_hardsect_size(q, 512); + } + for (i = 0; i < N_USED_SD_MAJORS; i++) { + int N = SCSI_DISKS_PER_MAJOR; + sd_gendisks[i] = sd_gendisk; - sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr, - GFP_ATOMIC); - if (!sd_gendisks[i].de_arr) - goto cleanup_gendisks_de_arr; - memset (sd_gendisks[i].de_arr, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr); - sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags, - GFP_ATOMIC); - if (!sd_gendisks[i].flags) - goto cleanup_gendisks_flags; - memset (sd_gendisks[i].flags, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags); + + init_mem_lth(sd_gendisks[i].de_arr, N); + init_mem_lth(sd_gendisks[i].flags, N); + + if (!sd_gendisks[i].de_arr || !sd_gendisks[i].flags) + goto cleanup_gendisks; + + zero_mem_lth(sd_gendisks[i].de_arr, N); + zero_mem_lth(sd_gendisks[i].flags, N); + sd_gendisks[i].major = SD_MAJOR(i); sd_gendisks[i].major_name = "sd"; sd_gendisks[i].minor_shift = 4; sd_gendisks[i].max_p = 1 << 4; - sd_gendisks[i].part = sd + (i * SCSI_DISKS_PER_MAJOR << 4); - sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4); + sd_gendisks[i].part = sd + i * (N << 4); + sd_gendisks[i].sizes = sd_sizes + i * (N << 4); sd_gendisks[i].nr_real = 0; - sd_gendisks[i].next = sd_gendisks + i + 1; sd_gendisks[i].real_devices = (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR); } - LAST_SD_GENDISK.next = NULL; return 0; -cleanup_gendisks_flags: - kfree(sd_gendisks[i].de_arr); -cleanup_gendisks_de_arr: - while (--i >= 0 ) { +#undef init_mem_lth +#undef zero_mem_lth + +cleanup_gendisks: + /* kfree can handle NULL, so no test is required here */ + for (i = 0; i < N_USED_SD_MAJORS; i++) { kfree(sd_gendisks[i].de_arr); kfree(sd_gendisks[i].flags); } +cleanup_mem: kfree(sd_gendisks); -cleanup_sd_gendisks: kfree(sd); -cleanup_sd: - kfree(sd_hardsizes); -cleanup_blocksizes: kfree(sd_blocksizes); -cleanup_sizes: kfree(sd_sizes); -cleanup_disks: kfree(rscsi_disks); -cleanup_devfs: for (i = 0; i < N_USED_SD_MAJORS; i++) { devfs_unregister_blkdev(SD_MAJOR(i), "sd"); } @@ -1173,19 +1124,13 @@ static void sd_finish() { - struct gendisk *gendisk; int i; for (i = 0; i < N_USED_SD_MAJORS; i++) { blk_dev[SD_MAJOR(i)].queue = sd_find_queue; + add_gendisk(&(sd_gendisks[i])); } - for (gendisk = gendisk_head; gendisk != NULL; gendisk = gendisk->next) - if (gendisk == sd_gendisks) - break; - if (gendisk == NULL) { - LAST_SD_GENDISK.next = gendisk_head; - gendisk_head = sd_gendisks; - } + for (i = 0; i < sd_template.dev_max; ++i) if (!rscsi_disks[i].capacity && rscsi_disks[i].device) { sd_init_onedisk(i); @@ -1273,9 +1218,7 @@ int revalidate_scsidisk(kdev_t dev, int maxusage) { int target; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); @@ -1285,36 +1228,18 @@ } DEVICE_BUSY = 1; - max_p = sd_gendisks->max_p; - start = target << sd_gendisks->minor_shift; - - for (i = max_p - 1; i >= 0; i--) { - int index = start + i; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - /* - * Reset the blocksize for everything so that we can read - * the partition table. Technically we will determine the - * correct block size when we revalidate, but we do this just - * to make sure that everything remains consistent. - */ - sd_blocksizes[index] = 1024; - if (rscsi_disks[target].device->sector_size == 2048) - sd_blocksizes[index] = 2048; - else - sd_blocksizes[index] = 1024; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR, - 1<<4, CAPACITY); - + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; - return 0; + return res; } static int fop_revalidate_scsidisk(kdev_t dev) @@ -1324,6 +1249,7 @@ static void sd_detach(Scsi_Device * SDp) { Scsi_Disk *dpnt; + kdev_t dev; int i, j; int max_p; int start; @@ -1331,18 +1257,13 @@ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) if (dpnt->device == SDp) { - /* If we are disconnecting a disk driver, sync and invalidate - * everything */ max_p = sd_gendisk.max_p; start = i << sd_gendisk.minor_shift; + dev = MKDEV_SD_PARTITION(start); + wipe_partitions(dev); + for (j = max_p - 1; j >= 0; j--) + sd_sizes[start + j] = 0; - for (j = max_p - 1; j >= 0; j--) { - int index = start + j; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - sd_sizes[index] = 0; - } devfs_register_partitions (&SD_GENDISK (i), SD_MINOR_NUMBER (start), 1); /* unregister_disk() */ @@ -1355,7 +1276,6 @@ SD_GENDISK(i).nr_real--; return; } - return; } static int __init init_sd(void) @@ -1366,10 +1286,7 @@ static void __exit exit_sd(void) { - struct gendisk **prev_sdgd_link; - struct gendisk *sdgd; int i; - int removed = 0; scsi_unregister_module(MODULE_SCSI_DEV, &sd_template); @@ -1381,31 +1298,11 @@ kfree(rscsi_disks); kfree(sd_sizes); kfree(sd_blocksizes); - kfree(sd_hardsizes); kfree((char *) sd); - - /* - * Now remove sd_gendisks from the linked list - */ - prev_sdgd_link = &gendisk_head; - while ((sdgd = *prev_sdgd_link) != NULL) { - if (sdgd >= sd_gendisks && sdgd <= &LAST_SD_GENDISK) { - removed++; - *prev_sdgd_link = sdgd->next; - continue; - } - prev_sdgd_link = &sdgd->next; - } - - if (removed != N_USED_SD_MAJORS) - printk("%s %d sd_gendisks in disk chain", - removed > N_USED_SD_MAJORS ? "total" : "just", removed); - } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blk_size[SD_MAJOR(i)] = NULL; - hardsect_size[SD_MAJOR(i)] = NULL; - read_ahead[SD_MAJOR(i)] = 0; + del_gendisk(&(sd_gendisks[i])); + blk_clear(SD_MAJOR(i)); } sd_template.dev_max = 0; if (sd_gendisks != &sd_gendisk) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- /opt/kernel/linux-2.4.7/drivers/scsi/sr.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/sr.c Wed Jan 1 00:07:23 1997 @@ -85,10 +85,9 @@ }; Scsi_CD *scsi_CDs; -static int *sr_sizes; +static sector_t *sr_sizes; static int *sr_blocksizes; -static int *sr_hardsizes; static int sr_open(struct cdrom_device_info *, int); void get_sectorsize(int); @@ -219,8 +218,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); if (block_sectors < 4) block_sectors = 4; if (scsi_CDs[device_nr].device->sector_size == 2048) @@ -643,6 +642,7 @@ scsi_CDs[i].needs_sector_size = 0; sr_sizes[i] = scsi_CDs[i].capacity >> (BLOCK_SIZE_BITS - 9); }; + blk_queue_hardsect_size(blk_get_queue(MAJOR_NR), sector_size); scsi_free(buffer, 512); } @@ -791,21 +791,14 @@ if (!sr_blocksizes) goto cleanup_sizes; - sr_hardsizes = kmalloc(sr_template.dev_max * sizeof(int), GFP_ATOMIC); - if (!sr_hardsizes) - goto cleanup_blocksizes; /* * These are good guesses for the time being. */ - for (i = 0; i < sr_template.dev_max; i++) { + for (i = 0; i < sr_template.dev_max; i++) sr_blocksizes[i] = 2048; - sr_hardsizes[i] = 2048; - } + blksize_size[MAJOR_NR] = sr_blocksizes; - hardsect_size[MAJOR_NR] = sr_hardsizes; return 0; -cleanup_blocksizes: - kfree(sr_blocksizes); cleanup_sizes: kfree(sr_sizes); cleanup_cds: @@ -877,7 +870,6 @@ else read_ahead[MAJOR_NR] = 4; /* 4 sector read-ahead */ - return; } static void sr_detach(Scsi_Device * SDp) @@ -885,17 +877,18 @@ Scsi_CD *cpnt; int i; - for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) + for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) { if (cpnt->device == SDp) { /* - * Since the cdrom is read-only, no need to sync the device. + * Since the cdrom is read-only, no need to sync + * the device. * We should be kind to our buffer cache, however. */ invalidate_device(MKDEV(MAJOR_NR, i), 0); /* - * Reset things back to a sane state so that one can re-load a new - * driver (perhaps the same one). + * Reset things back to a sane state so that one can + * re-load a new driver (perhaps the same one). */ unregister_cdrom(&(cpnt->cdi)); cpnt->device = NULL; @@ -906,7 +899,7 @@ sr_sizes[i] = 0; return; } - return; + } } static int __init init_sr(void) @@ -928,13 +921,9 @@ kfree(sr_blocksizes); sr_blocksizes = NULL; - kfree(sr_hardsizes); - sr_hardsizes = NULL; } - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; read_ahead[MAJOR_NR] = 0; + blk_clear(MAJOR_NR); sr_template.dev_max = 0; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c --- /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.c Thu Jul 5 20:28:16 2001 +++ linux/drivers/scsi/sym53c8xx.c Tue Jul 24 15:30:11 2001 @@ -644,10 +644,10 @@ #define NCR_LOCK_NCB(np, flags) spin_lock_irqsave(&np->smp_lock, flags) #define NCR_UNLOCK_NCB(np, flags) spin_unlock_irqrestore(&np->smp_lock, flags) -#define NCR_LOCK_SCSI_DONE(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define NCR_UNLOCK_SCSI_DONE(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) +#define NCR_LOCK_SCSI_DONE(host, flags) \ + spin_lock_irqsave(&((host)->host_lock), flags) +#define NCR_UNLOCK_SCSI_DONE(host, flags) \ + spin_unlock_irqrestore(&((host)->host_lock), flags) #else @@ -658,8 +658,8 @@ #define NCR_LOCK_NCB(np, flags) do { save_flags(flags); cli(); } while (0) #define NCR_UNLOCK_NCB(np, flags) do { restore_flags(flags); } while (0) -#define NCR_LOCK_SCSI_DONE(np, flags) do {;} while (0) -#define NCR_UNLOCK_SCSI_DONE(np, flags) do {;} while (0) +#define NCR_LOCK_SCSI_DONE(host, flags) do {;} while (0) +#define NCR_UNLOCK_SCSI_DONE(host, flags) do {;} while (0) #endif @@ -13667,9 +13667,9 @@ if (DEBUG_FLAGS & DEBUG_TINY) printk ("]\n"); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } @@ -13690,9 +13690,9 @@ NCR_UNLOCK_NCB(np, flags); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- /opt/kernel/linux-2.4.7/drivers/scsi/sym53c8xx.h Fri Jul 20 21:56:08 2001 +++ linux/drivers/scsi/sym53c8xx.h Wed Jan 1 00:07:23 1997 @@ -96,7 +96,8 @@ this_id: 7, \ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + can_dma_32: 1} #else diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/Makefile linux/fs/Makefile --- /opt/kernel/linux-2.4.7/fs/Makefile Tue May 22 18:35:42 2001 +++ linux/fs/Makefile Wed Jan 1 00:07:22 1997 @@ -7,12 +7,12 @@ O_TARGET := fs.o -export-objs := filesystems.o dcache.o +export-objs := filesystems.o dcache.o bio.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ - fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ + bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ + namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/bio.c linux/fs/bio.c --- /opt/kernel/linux-2.4.7/fs/bio.c Thu Jan 1 01:00:00 1970 +++ linux/fs/bio.c Tue Jul 24 14:19:09 2001 @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +kmem_cache_t *bio_cachep, *biovec_cachep; +static spinlock_t bio_lock = SPIN_LOCK_UNLOCKED; +static struct bio *bio_pool; +static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait); + +#define BIO_HASH_SCALE 3 + +#define bhash_fn(htable, key) \ + ((((key) >> BIO_HASH_SCALE) & (htable)->mask) + (htable)->table) + +/* + * make this changeable (1 or 2 would be a good default, 32 is insane :-) + */ +#define BIO_POOL_PAGES 1 + +int bio_hash_init(bio_hash_table_t *hash_table, int entries) +{ + int bit_size, hash_bits, hash_size; + + memset(hash_table, 0, sizeof(bio_hash_table_t)); + + bit_size = entries * sizeof(void); + hash_bits = 0; + while (bit_size) { + hash_bits++; + bit_size >>= 1; + } + + hash_size = (1UL << hash_bits) * sizeof(void *); + hash_table->table = vmalloc(hash_size); + if (hash_table->table == NULL) + return -ENOMEM; + + memset(hash_table->table, 0, hash_size); + hash_table->mask = (1UL << hash_bits) - 1; + hash_table->valid_counter = 1; + + return 0; +} + +void bio_hash_cleanup(bio_hash_table_t *table) +{ + vfree(table->table); + memset(table, 0, sizeof(bio_hash_table_t)); +} + +inline void __bio_hash_remove(bio_hash_t *entry) +{ + if (entry->valid_counter) { + bio_hash_t *nxt = entry->next_hash; + bio_hash_t **pprev = entry->pprev_hash; + + if (nxt) + nxt->pprev_hash = pprev; + + *pprev = nxt; + entry->pprev_hash = NULL; + entry->valid_counter = 0; + } +} + +inline void bio_hash_remove(request_queue_t *q, struct bio *bio) +{ + unsigned long flags; + + if (spin_is_locked(&q->queue_lock)) + BUG(); + + spin_lock_irqsave(&q->queue_lock, flags); + __bio_hash_remove(&bio->bi_hash); + spin_unlock_irqrestore(&q->queue_lock, flags); +} + +inline void __bio_hash_add(bio_hash_table_t *htable, struct bio *bio) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t **hash = bhash_fn(htable, bio->bi_sector); + bio_hash_t *nxt = *hash; + + *hash = entry; + entry->next_hash = nxt; + entry->pprev_hash = hash; + entry->valid_counter = htable->valid_counter; + + if (nxt) + nxt->pprev_hash = &entry->next_hash; + + htable->st.nr_inserts++; +} + +inline void bio_hash_add(request_queue_t *q, struct bio *bio) +{ + unsigned long flags; + + if (spin_is_locked(&q->queue_lock)) + BUG(); + + spin_lock_irqsave(&q->queue_lock, flags); + __bio_hash_add(&q->queue_hash, bio); + spin_unlock_irqrestore(&q->queue_lock, flags); +} + +inline struct bio *__bio_hash_find(bio_hash_table_t *htable, sector_t sector) +{ + bio_hash_t **hash = bhash_fn(htable, sector); + bio_hash_t *next = *hash, *entry; + struct bio *bio; + int nr = 0; + + htable->st.nr_lookups++; + + while ((entry = next)) { + next = entry->next_hash; + prefetch(next); + if (entry->valid_counter == htable->valid_counter) { + bio = hash_entry(entry, struct bio, bi_hash); + if (bio->bi_sector == sector) { + if (nr > htable->st.max_bucket_size) + htable->st.max_bucket_size = nr; + if (nr <= MAX_PROFILE_BUCKETS) + htable->st.bucket_size[nr]++; + htable->st.nr_hits++; +#if 0 + bio_get(bio); +#endif + return bio; + } + + nr++; + continue; + } + + /* + * prune out-of-date entries as we go along + */ + __bio_hash_remove(entry); + nr++; + } + + return NULL; +} + +inline struct bio *bio_hash_find(request_queue_t *q, sector_t sector) +{ + struct bio *bio; + unsigned long flags; + + if (spin_is_locked(&q->queue_lock)) + BUG(); + + spin_lock_irqsave(&q->queue_lock, flags); + bio = __bio_hash_find(&q->queue_hash, sector); + spin_unlock_irqrestore(&q->queue_lock, flags); + + return bio; +} + +inline int __bio_hash_add_unique(bio_hash_table_t *htable, struct bio *bio) +{ + int ret = 1; + + if (!__bio_hash_find(htable, bio->bi_sector)) { + __bio_hash_add(htable, bio); + ret = 0; + } + + return ret; +} + +inline int bio_hash_add_unique(request_queue_t *q, struct bio *bio) +{ + unsigned long flags; + int ret; + + if (spin_is_locked(&q->queue_lock)) + BUG(); + + spin_lock_irqsave(&q->queue_lock, flags); + ret = __bio_hash_add_unique(&q->queue_hash, bio); + spin_unlock_irqrestore(&q->queue_lock, flags); + + return ret; +} + +static inline struct bio *bio_pool_get(void) +{ + unsigned long flags; + struct bio *bio; + + spin_lock_irqsave(&bio_lock, flags); + if ((bio = bio_pool)) { + bio_pool = bio->bi_next; + bio->bi_next = NULL; + bio->bi_flags = BIO_POOL; + } + spin_unlock_irqrestore(&bio_lock, flags); + return bio; +} + +static inline void bio_pool_put(struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&bio_lock, flags); + bio->bi_next = bio_pool; + bio_pool = bio; + spin_unlock_irqrestore(&bio_lock, flags); + + /* + * use wake-one + */ + if (waitqueue_active(&bio_pool_wait)) + wake_up_nr(&bio_pool_wait, 1); +} + +/** + * bio_alloc - allocate a bio for I/O + * @gfp_mask: the GFP_ mask given to the slab allocator + * + * Description: + * bio_alloc will first try it's on internal pool to satisfy the allocation + * and if that fails fall back to the bio slab cache. In the latter case, + * the @gfp_mask specifies the priority of the allocation. In particular, + * if %__GFP_WAIT is set then we will block on the internal pool waiting + * for a &struct bio to become free. + **/ +#if 0 +struct bio *bio_alloc(int gfp_mask) +{ + DECLARE_WAITQUEUE(wait, current); + struct bio *bio; + + /* + * first try our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + /* + * no such luck, try slab alloc + */ + + /* + * first try slab + */ + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) { + bio->bi_flags = 0; + goto gotit; + } + + /* + * hrmpf, not much luck. if __GFP_WAIT is set, wait on + * bio_pool. + */ + if ((gfp_mask & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) { + add_wait_queue_exclusive(&bio_pool_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + bio = bio_pool_get(); + if (bio) + break; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bio_pool_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + if (bio) { +gotit: + bio->bi_next = NULL; + memset(&bio->bi_io_vec, 0, sizeof(bio->bi_io_vec)); + atomic_set(&bio->bi_cnt, 1); + bio->bi_end_io = NULL; + bio->bi_private = NULL; + } + return bio; +} +#endif + +struct bio *bio_alloc(int gfp_mask) +{ + struct bio *bio; + + /* + * first try slab + */ +restart: + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) { + bio->bi_flags = 0; + goto gotit; + } + + wakeup_bdflush(0); + + /* + * first try our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + run_task_queue(&tq_disk); + + current->policy |= SCHED_YIELD; + __set_current_state(TASK_RUNNING); + schedule(); + goto restart; + +gotit: + bio->bi_next = NULL; + memset(&bio->bi_io_vec, 0, sizeof(bio->bi_io_vec)); + atomic_set(&bio->bi_cnt, 1); + bio->bi_end_io = NULL; + bio->bi_private = NULL; + return bio; +} + + +/* + * queue lock assumed held! + */ +static inline void __bio_free(struct bio *bio) +{ + __bio_hash_remove(&bio->bi_hash); + + if (bio->bi_flags & BIO_POOL) + bio_pool_put(bio); + else + kmem_cache_free(bio_cachep, bio); +} + +/** + * bio_put - release a reference to a bio + * @bio: bio to be released + * + * Description: + * Put a reference to a &struct bio, either one you have gotten with + * bio_alloc or bio_get. The last put of a bio will free it. + **/ +void bio_put(struct bio *bio) +{ + if (!atomic_read(&bio->bi_cnt)) + BUG(); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->bi_cnt)) { + if (bio->bi_next) + BUG(); + + __bio_free(bio); + } +} + +struct bio_vec *biovec_alloc(int gfp_mask) +{ + return kmem_cache_alloc(biovec_cachep, gfp_mask); +} + +void biovec_free(struct bio_vec *biov) +{ + kmem_cache_free(biovec_cachep, biov); +} + +#ifdef BIO_PAGEIO +static int bio_end_io_page(struct bio *bio) +{ + struct page *page = bio_page(bio); + + if (!(bio->bi_flags & BIO_UPTODATE)) + SetPageError(page); + if (!PageError(page)) + SetPageUptodate(page); + + /* + * Run the hooks that have to be done when a page I/O has completed. + */ + if (PageTestandClearDecrAfter(page)) + atomic_dec(&nr_async_pages); + + UnlockPage(page); + bio_put(bio); + return 1; +} +#endif + +static void bio_end_io_kio(struct bio *bio) +{ + struct kiobuf *kio = bio->bi_private; + + end_kio_request(kio, bio->bi_flags & BIO_UPTODATE); + bio_put(bio); +} + +/** + * ll_rw_kio - submit a &struct kiobuf for I/O + * @rw: %READ or %WRITE + * @kio: the kiobuf to do I/O on + * @dev: target device + * @sector: start location on disk + * + * Description: + * ll_rw_kio will map the page list inside the &struct kiobuf to + * &struct bio and queue them for I/O. The kiobuf given must describe + * a continous range of data, and must be fully prepared for I/O. + **/ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long sector) +{ + struct bio *bio; + int i, offset, len, size; + + if ((rw & WRITE) && is_read_only(dev)) { + printk("ll_rw_bio: WRITE to ro device %s\n", kdevname(dev)); + kio->errno = -EPERM; + return; + } + + offset = kio->offset & ~PAGE_MASK; + + len = kio->length; + for (i = 0; i < kio->nr_pages; i++) { + bio = bio_alloc(GFP_NOIO); + + bio->bi_dev = dev; + bio->bi_sector = sector; + + size = PAGE_SIZE - offset; + if (size > len) + size = len; + + bio->bi_io_vec.bv_page = kio->maplist[i]; + bio->bi_io_vec.bv_len = size; + bio->bi_io_vec.bv_offset = offset; + + bio->bi_end_io = bio_end_io_kio; + bio->bi_private = kio; + + /* + * kiobuf only has an offset into the first page + */ + offset = 0; + len -= size; + sector += (size >> 9); + + atomic_inc(&kio->io_count); + submit_bio(rw, bio); + } +} + +static void bio_init(void *foo, kmem_cache_t *cachep, unsigned long flg) +{ + if ((flg & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { + struct bio *bio = foo; + + bio->bi_next = NULL; + bio->bi_flags = 0; + bio->bi_end_io = NULL; + } +} + +static int __init bio_init_pool(void) +{ + struct page *page; + struct bio *bio, *bio_map; + int nr = BIO_POOL_PAGES, i, total; + + total = 0; + do { + page = alloc_page(GFP_KERNEL); + + bio_map = page_address(page); + memset(bio_map, 0, PAGE_SIZE); + for (i = 0; i < PAGE_SIZE / sizeof(struct bio); i++) { + bio = bio_map + i; + bio_pool_put(bio); + total++; + } + } while (--nr); + + return total; +} + +static int __init init_bio(void) +{ + int nr; + + bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0, + SLAB_HWCACHE_ALIGN, bio_init, NULL); + if (!bio_cachep) + panic("bio: can't create bio_cachep slab cache\n"); + + biovec_cachep = kmem_cache_create("biovec", sizeof(struct bio_vec), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!biovec_cachep) + panic("bio: can't create biovec_cachep slab cache\n"); + + nr = bio_init_pool(); + printk("BIO: pool of %d setup, %luKb (%d bytes/bio)\n", nr, (BIO_POOL_PAGES * PAGE_SIZE) >> 10, sizeof(struct bio)); + + return 0; +} + +module_init(init_bio); + +EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_put); +EXPORT_SYMBOL(ll_rw_kio); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/block_dev.c linux/fs/block_dev.c --- /opt/kernel/linux-2.4.7/fs/block_dev.c Tue Jun 12 04:15:27 2001 +++ linux/fs/block_dev.c Wed Jan 1 00:07:22 1997 @@ -17,7 +17,6 @@ #include -extern int *blk_size[]; extern int *blksize_size[]; #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/buffer.c linux/fs/buffer.c --- /opt/kernel/linux-2.4.7/fs/buffer.c Wed Jul 18 16:55:05 2001 +++ linux/fs/buffer.c Wed Jan 1 00:07:22 1997 @@ -557,7 +557,8 @@ * will force it bad). This shouldn't really happen currently, but * the code is ready. */ -static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size) +static inline struct buffer_head * __get_hash_table(kdev_t dev, sector_t block, + int size) { struct buffer_head *bh = hash(dev, block); @@ -572,7 +573,7 @@ return bh; } -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +struct buffer_head * get_hash_table(kdev_t dev, sector_t block, int size) { struct buffer_head *bh; @@ -691,7 +692,6 @@ void set_blocksize(kdev_t dev, int size) { - extern int *blksize_size[]; int i, nlist, slept; struct buffer_head * bh, * bh_next; @@ -738,8 +738,10 @@ if (!atomic_read(&bh->b_count)) { if (buffer_dirty(bh)) printk(KERN_WARNING - "set_blocksize: dev %s buffer_dirty %lu size %hu\n", - kdevname(dev), bh->b_blocknr, bh->b_size); + "set_blocksize: dev %s buffer_dirty %Lu size %hu\n", + kdevname(dev), + (unsigned long long) bh->b_blocknr, + bh->b_size); remove_inode_queue(bh); __remove_from_queues(bh); put_last_free(bh); @@ -749,9 +751,10 @@ clear_bit(BH_Uptodate, &bh->b_state); printk(KERN_WARNING "set_blocksize: " - "b_count %d, dev %s, block %lu, from %p\n", + "b_count %d, dev %s, block %Lu, from %p\n", atomic_read(&bh->b_count), bdevname(bh->b_dev), - bh->b_blocknr, __builtin_return_address(0)); + (unsigned long long) bh->b_blocknr, + __builtin_return_address(0)); } write_unlock(&hash_table_lock); if (slept) @@ -997,7 +1000,7 @@ * 14.02.92: changed it to sync dirty buffers a bit: better performance * when the filesystem starts to get full of dirty blocks (I hope). */ -struct buffer_head * getblk(kdev_t dev, int block, int size) +struct buffer_head * getblk(kdev_t dev, sector_t block, int size) { struct buffer_head * bh; int isize; @@ -1977,7 +1980,8 @@ goto done; } -int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block) +sector_t generic_block_bmap(struct address_space *mapping, sector_t block, + get_block_t *get_block) { struct buffer_head tmp; struct inode *inode = mapping->host; @@ -1988,57 +1992,6 @@ } /* - * IO completion routine for a buffer_head being used for kiobuf IO: we - * can't dispatch the kiobuf callback until io_count reaches 0. - */ - -static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) -{ - struct kiobuf *kiobuf; - - mark_buffer_uptodate(bh, uptodate); - - kiobuf = bh->b_private; - unlock_buffer(bh); - end_kio_request(kiobuf, uptodate); -} - -/* - * For brw_kiovec: submit a set of buffer_head temporary IOs and wait - * for them to complete. Clean up the buffer_heads afterwards. - */ - -static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) -{ - int iosize, err; - int i; - struct buffer_head *tmp; - - iosize = 0; - err = 0; - - for (i = nr; --i >= 0; ) { - iosize += size; - tmp = bh[i]; - if (buffer_locked(tmp)) { - wait_on_buffer(tmp); - } - - if (!buffer_uptodate(tmp)) { - /* We are traversing bh'es in reverse order so - clearing iosize on error calculates the - amount of IO before the first error. */ - iosize = 0; - err = -EIO; - } - } - - if (iosize) - return iosize; - return err; -} - -/* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). * @@ -2050,21 +2003,14 @@ * passed in to completely map the iobufs to disk. */ -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size) +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, sector_t b[], + int size) { - int err; - int length; int transferred; int i; - int bufind; int pageind; - int bhind; - int offset; - unsigned long blocknr; - struct kiobuf * iobuf = NULL; + struct kiobuf * iobuf; struct page * map; - struct buffer_head *tmp, **bhs = NULL; if (!nr) return 0; @@ -2074,92 +2020,37 @@ */ for (i = 0; i < nr; i++) { iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || - (iobuf->length & (size-1))) + if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) return -EINVAL; if (!iobuf->nr_pages) panic("brw_kiovec: iobuf not initialised"); + for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { + map = iobuf->maplist[pageind]; + if (!map) + return -EFAULT; + } } /* * OK to walk down the iovec doing page IO on each page we find. */ - bufind = bhind = transferred = err = 0; for (i = 0; i < nr; i++) { iobuf = iovec[i]; - offset = iobuf->offset; - length = iobuf->length; iobuf->errno = 0; - if (!bhs) - bhs = iobuf->bh; - - for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - map = iobuf->maplist[pageind]; - if (!map) { - err = -EFAULT; - goto finished; - } - - while (length > 0) { - blocknr = b[bufind++]; - tmp = bhs[bhind++]; - - tmp->b_dev = B_FREE; - tmp->b_size = size; - set_bh_page(tmp, map, offset); - tmp->b_this_page = tmp; - - init_buffer(tmp, end_buffer_io_kiobuf, iobuf); - tmp->b_dev = dev; - tmp->b_blocknr = blocknr; - tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - - if (rw == WRITE) { - set_bit(BH_Uptodate, &tmp->b_state); - clear_bit(BH_Dirty, &tmp->b_state); - } else - set_bit(BH_Uptodate, &tmp->b_state); - - length -= size; - offset += size; - - atomic_inc(&iobuf->io_count); - submit_bh(rw, tmp); - /* - * Wait for IO if we have got too much - */ - if (bhind >= KIO_MAX_SECTORS) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - bhind = 0; - } - - if (offset >= PAGE_SIZE) { - offset = 0; - break; - } - } /* End of block loop */ - } /* End of page loop */ - } /* End of iovec loop */ - - /* Is there any IO still left to submit? */ - if (bhind) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - } - - finished: - if (transferred) - return transferred; - return err; + ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9)); + } + + /* + * now they are all submitted, wait for completion + */ + transferred = 0; + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + kiobuf_wait_for_io(iobuf); + transferred += iobuf->length; + } + + return transferred; } /* @@ -2174,7 +2065,7 @@ * FIXME: we need a swapper_inode->get_block function to remove * some of the bmap kludges and interface ugliness here. */ -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) +int brw_page(int rw, struct page *page, kdev_t dev, sector_t b[], int size) { struct buffer_head *head, *bh; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/iobuf.c linux/fs/iobuf.c --- /opt/kernel/linux-2.4.7/fs/iobuf.c Fri Apr 27 23:23:25 2001 +++ linux/fs/iobuf.c Wed Jan 1 00:07:23 1997 @@ -8,7 +8,6 @@ #include #include -#include void end_kio_request(struct kiobuf *kiobuf, int uptodate) { @@ -26,52 +25,23 @@ { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); + atomic_set(&iobuf->io_count, 0); iobuf->array_len = KIO_STATIC_PAGES; iobuf->maplist = iobuf->map_array; } -int alloc_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) - if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { - while (i--) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } - return -ENOMEM; - } - return 0; -} - -void free_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } -} - int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = vmalloc(sizeof(struct kiobuf)); + iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); - if (alloc_kiobuf_bhs(iobuf)) { - vfree(iobuf); - free_kiovec(i, bufp); - return -ENOMEM; - } bufp[i] = iobuf; } @@ -89,8 +59,7 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - free_kiobuf_bhs(iobuf); - vfree(bufp[i]); + kfree(bufp[i]); } } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/partitions/check.c linux/fs/partitions/check.c --- /opt/kernel/linux-2.4.7/fs/partitions/check.c Wed Jul 11 23:55:41 2001 +++ linux/fs/partitions/check.c Tue Jul 24 15:32:25 2001 @@ -1,4 +1,6 @@ /* + * fs/partitions/check.c + * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King @@ -33,12 +35,11 @@ #include "ibm.h" #include "ultrix.h" -extern int *blk_size[]; - struct gendisk *gendisk_head; int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ -static int (*check_part[])(struct gendisk *hd, kdev_t dev, unsigned long first_sect, int first_minor) = { +static int (*check_part[])(struct gendisk *hd, kdev_t dev, + unsigned long first_sect, int first_minor) = { #ifdef CONFIG_ACORN_PARTITION acorn_partition, #endif @@ -250,8 +251,9 @@ char buf[64]; len += sprintf(page + len, - "%4d %4d %10d %s\n", - dsk->major, n, dsk->sizes[n], + "%4d %4d %10Lu %s\n", + dsk->major, n, + (u64) dsk->sizes[n], disk_name(dsk, n, buf)); if (len < offset) offset -= len, len = 0; @@ -409,32 +411,85 @@ { if (!gdev) return; - grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); + grok_partitions(dev, size); } -void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) +void grok_partitions(kdev_t dev, long size) { - int i; - int first_minor = drive << dev->minor_shift; - int end_minor = first_minor + dev->max_p; + int i, minors, first_minor, end_minor; + struct gendisk *g = get_gendisk(dev); - if(!dev->sizes) - blk_size[dev->major] = NULL; + if (!g) + return; + + minors = 1 << g->minor_shift; + first_minor = MINOR(dev); + if (first_minor & (minors-1)) { + printk("grok_partitions: bad device 0x%02x:%02x\n", + MAJOR(dev), first_minor); + first_minor &= ~(minors-1); + } + end_minor = first_minor + minors; + + if (!g->sizes) + blk_size[g->major] = NULL; + + g->part[first_minor].nr_sects = size; - dev->part[first_minor].nr_sects = size; /* No such device or no minors to use for partitions */ if (!size || minors == 1) return; - check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor); + check_partition(g, MKDEV(g->major, first_minor), 1 + first_minor); /* * We need to set the sizes array before we will be able to access * any of the partitions on this device. */ - if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ + if (g->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ for (i = first_minor; i < end_minor; i++) - dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); - blk_size[dev->major] = dev->sizes; + g->sizes[i] = g->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); + blk_size[g->major] = (sector_t *) g->sizes; + + } +} + +int wipe_partitions(kdev_t dev) +{ + struct gendisk *g; + kdev_t devp; + int p, major, minor, minor0, max_p, res; + + g = get_gendisk(dev); + if (g == NULL) + return -EINVAL; + + max_p = 1 << g->minor_shift; + major = MAJOR(dev); + minor = MINOR(dev); + minor0 = minor & ~(max_p - 1); + if (minor0 != minor) /* for now only whole-disk reread */ + return -EINVAL; /* %%% later.. */ + + /* invalidate stuff */ + for (p = max_p - 1; p >= 0; p--) { + minor = minor0 + p; + devp = MKDEV(major,minor); +#if 0 /* %%% superfluous? */ + if (g->part[minor].nr_sects == 0) + continue; +#endif + res = invalidate_device(devp, 1); + if (res) + return res; + g->part[minor].start_sect = 0; + g->part[minor].nr_sects = 0; } + + /* some places do blksize_size[major][minor] = 1024, + as preparation for reading partition table - superfluous */ + /* sd.c used to set blksize_size to 2048 in case + rscsi_disks[target].device->sector_size == 2048 */ + + return 0; } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/partitions/check.h linux/fs/partitions/check.h --- /opt/kernel/linux-2.4.7/fs/partitions/check.h Thu Feb 17 00:42:06 2000 +++ linux/fs/partitions/check.h Wed Jan 1 00:07:23 1997 @@ -1,5 +1,5 @@ /* - * add_partition adds a partitions details to the devices partition + * add_gd_partition adds a partitions details to the devices partition * description. */ void add_gd_partition(struct gendisk *hd, int minor, int start, int size); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/partitions/ibm.c linux/fs/partitions/ibm.c --- /opt/kernel/linux-2.4.7/fs/partitions/ibm.c Sun May 20 21:11:39 2001 +++ linux/fs/partitions/ibm.c Wed Jan 1 00:07:23 1997 @@ -29,13 +29,11 @@ #include "check.h" #include -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,3,98)) /* We hook in when DASD is a module... */ int (*genhd_dasd_name)(char*,int,int,struct gendisk*) = NULL; int (*genhd_dasd_fillgeo)(int,struct hd_geometry *) = NULL; EXPORT_SYMBOL(genhd_dasd_fillgeo); EXPORT_SYMBOL(genhd_dasd_name); -#endif /* LINUX_IS_24 */ typedef enum { ibm_partition_lnx1 = 0, diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/fs/udf/lowlevel.c linux/fs/udf/lowlevel.c --- /opt/kernel/linux-2.4.7/fs/udf/lowlevel.c Tue Jun 12 04:15:27 2001 +++ linux/fs/udf/lowlevel.c Tue Jul 24 15:04:44 2001 @@ -1,5 +1,5 @@ /* - * lowlevel.c + * fs/udf/lowlevel.c * * PURPOSE * Low Level Device Routines for the UDF filesystem diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h --- /opt/kernel/linux-2.4.7/include/asm-i386/highmem.h Fri Jul 20 21:52:18 2001 +++ linux/include/asm-i386/highmem.h Wed Jan 1 00:07:23 1997 @@ -74,17 +74,15 @@ kunmap_high(page); } -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -static inline void *kmap_atomic(struct page *page, enum km_type type) +static inline void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; + /* + * could be moved outside __cli context, but then caller would + * have to check... + */ if (page < highmem_start_page) return page_address(page); @@ -100,7 +98,26 @@ return (void*) vaddr; } -static inline void kunmap_atomic(void *kvaddr, enum km_type type) +/* + * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap + * gives a more generic (and caching) interface. But kmap_atomic can + * be used in IRQ contexts, so in some (very limited) cases we need + * it. + */ +static inline void *kmap_atomic(struct page *page, enum km_type type) +{ + unsigned long flags; + void *vaddr; + + __save_flags(flags); + __cli(); + vaddr = __kmap_atomic(page, type); + __restore_flags(flags); + + return vaddr; +} + +static inline void __kunmap_atomic(void *kvaddr, enum km_type type) { #if HIGHMEM_DEBUG unsigned long vaddr = (unsigned long) kvaddr; @@ -118,6 +135,18 @@ */ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); +#endif +} + +static inline void kunmap_atomic(void *kvaddr, enum km_type type) +{ +#if HIGHMEM_DEBUG + unsigned long flags; + + __save_flags(flags); + __cli(); + __kunmap_atomic(kvaddr, type); + __restore_flags(flags); #endif } diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- /opt/kernel/linux-2.4.7/include/asm-i386/kmap_types.h Thu Apr 12 21:11:39 2001 +++ linux/include/asm-i386/kmap_types.h Wed Jan 1 00:07:23 1997 @@ -6,6 +6,7 @@ KM_BOUNCE_WRITE, KM_SKB_DATA, KM_SKB_DATA_SOFTIRQ, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/page.h linux/include/asm-i386/page.h --- /opt/kernel/linux-2.4.7/include/asm-i386/page.h Fri Jul 20 21:52:18 2001 +++ linux/include/asm-i386/page.h Wed Jan 1 00:07:23 1997 @@ -116,6 +116,8 @@ #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) #define VALID_PAGE(page) ((page - mem_map) < max_mapnr) +#define page_to_phys(page) (((page) - mem_map) * PAGE_SIZE) +#define page_to_bus(page) page_to_phys((page)) #endif /* __KERNEL__ */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/pci.h linux/include/asm-i386/pci.h --- /opt/kernel/linux-2.4.7/include/asm-i386/pci.h Fri Jul 20 21:52:38 2001 +++ linux/include/asm-i386/pci.h Tue Jul 24 15:30:29 2001 @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -84,6 +85,27 @@ /* Nothing to do */ } +/* + * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical + * to pci_map_single, but takes a struct page instead of a virtual address + */ +extern inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, + size_t size, int offset, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + + return (page - mem_map) * PAGE_SIZE + offset; +} + +extern inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -102,8 +124,20 @@ extern inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) { + int i; + if (direction == PCI_DMA_NONE) BUG(); + + for (i = 0; i < nents; i++ ) { + if (sg[i].address) + sg[i].dma_address = virt_to_bus(sg[i].address); + else if (sg[i].page) + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + else + BUG(); + } + return nents; } @@ -119,6 +153,33 @@ /* Nothing to do */ } +/* + * meant to replace the pci_map_sg api, new drivers should use this + * interface + */ +extern inline int pci_map_sgl(struct pci_dev *hwdev, struct sg_list *sg, + int nents, int direction) +{ + int i; + + if (direction == PCI_DMA_NONE) + BUG(); + + for (i = 0; i < nents; i++) + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + + return nents; +} + +extern inline void pci_unmap_sgl(struct pci_dev *hwdev, struct sg_list *sg, + int nents, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + + /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. * @@ -173,10 +234,9 @@ /* These macros should be used after a pci_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. + * returns. */ -#define sg_dma_address(sg) (virt_to_bus((sg)->address)) +#define sg_dma_address(sg) ((sg)->dma_address) #define sg_dma_len(sg) ((sg)->length) /* Return the index of the PCI controller for device. */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- /opt/kernel/linux-2.4.7/include/asm-i386/processor.h Fri Jul 20 21:52:18 2001 +++ linux/include/asm-i386/processor.h Wed Jan 1 00:07:23 1997 @@ -477,4 +477,32 @@ __asm__ __volatile__("rep;nop"); } +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + #endif /* __ASM_I386_PROCESSOR_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/scatterlist.h linux/include/asm-i386/scatterlist.h --- /opt/kernel/linux-2.4.7/include/asm-i386/scatterlist.h Mon Dec 30 12:01:10 1996 +++ linux/include/asm-i386/scatterlist.h Wed Jan 1 00:07:23 1997 @@ -1,12 +1,59 @@ #ifndef _I386_SCATTERLIST_H #define _I386_SCATTERLIST_H +/* + * temporary measure, include a page and offset. + */ struct scatterlist { - char * address; /* Location data is to be transferred to */ + struct page * page; /* Location for highmem page, if any */ + char * address; /* Location data is to be transferred to, NULL for + * highmem page */ char * alt_address; /* Location of actual if address is a * dma indirect buffer. NULL otherwise */ + dma_addr_t dma_address; unsigned int length; + unsigned int offset;/* for highmem, page offset */ }; + +/* + * new style scatter gather list -- move to this completely? + */ +struct sg_list { + /* + * input + */ + struct page *page; /* page to do I/O to */ + unsigned int length; /* length of I/O */ + unsigned int offset; /* offset into page */ + + /* + * original page, if bounced + */ + struct page *bounce_page; + + /* + * output + */ + dma_addr_t dma_address; /* mapped address */ +}; + +/* + * compat function... go to sg_list instead for new stuff! + */ +extern inline void set_bio_sg(struct scatterlist *sg, struct bio *bio) +{ + if (PageHighMem(bio_page(bio))) { + sg->page = bio_page(bio); + sg->offset = bio_offset(bio); + sg->address = NULL; + } else { + sg->page = NULL; + sg->offset = 0; + sg->address = bio_data(bio); + } + + sg->length = bio_size(bio); +} #define ISA_DMA_THRESHOLD (0x00ffffff) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h --- /opt/kernel/linux-2.4.7/include/asm-i386/spinlock.h Fri Jul 20 21:52:18 2001 +++ linux/include/asm-i386/spinlock.h Wed Jan 1 00:07:23 1997 @@ -12,7 +12,7 @@ * initialize their spinlocks properly, tsk tsk. * Remember to turn this off in 2.4. -ben */ -#define SPINLOCK_DEBUG 0 +#define SPINLOCK_DEBUG 2 /* * Your basic SMP spinlocks, allowing only a single CPU anywhere diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-i386/uaccess.h linux/include/asm-i386/uaccess.h --- /opt/kernel/linux-2.4.7/include/asm-i386/uaccess.h Fri Jul 20 21:52:18 2001 +++ linux/include/asm-i386/uaccess.h Wed Jan 1 00:07:23 1997 @@ -6,6 +6,7 @@ */ #include #include +#include #include #define VERIFY_READ 0 @@ -526,6 +527,7 @@ static inline unsigned long __constant_copy_to_user(void *to, const void *from, unsigned long n) { + prefetch(from); if (access_ok(VERIFY_WRITE, to, n)) __constant_copy_user(to,from,n); return n; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-m68k/machdep.h linux/include/asm-m68k/machdep.h --- /opt/kernel/linux-2.4.7/include/asm-m68k/machdep.h Tue Nov 28 02:57:34 2000 +++ linux/include/asm-m68k/machdep.h Wed Jan 1 00:07:23 1997 @@ -5,7 +5,6 @@ struct kbd_repeat; struct mktime; struct hwclk_time; -struct gendisk; struct buffer_head; extern void (*mach_sched_init) (void (*handler)(int, void *, struct pt_regs *)); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-s390/dasd.h linux/include/asm-s390/dasd.h --- /opt/kernel/linux-2.4.7/include/asm-s390/dasd.h Thu Apr 12 04:02:28 2001 +++ linux/include/asm-s390/dasd.h Wed Jan 1 00:07:23 1997 @@ -319,7 +319,7 @@ unsigned long dasd_io_times[32]; /* histogram of requests's times */ unsigned long dasd_io_timps[32]; /* histogram of requests's times per sector */ unsigned long dasd_io_time1[32]; /* histogram of time from build to start */ - unsigned long dasd_io_time2[32]; /* histogram of time from start to irq */ + unsigned long dasd_io_time2[32]; /* histogram of time from start to irq */ unsigned long dasd_io_time2ps[32]; /* histogram of time from start to irq */ unsigned long dasd_io_time3[32]; /* histogram of time from irq to end */ } dasd_profile_info_t; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/asm-s390x/dasd.h linux/include/asm-s390x/dasd.h --- /opt/kernel/linux-2.4.7/include/asm-s390x/dasd.h Thu Apr 12 04:02:29 2001 +++ linux/include/asm-s390x/dasd.h Wed Jan 1 00:07:23 1997 @@ -319,7 +319,7 @@ unsigned long dasd_io_times[32]; /* histogram of requests's times */ unsigned long dasd_io_timps[32]; /* histogram of requests's times per sector */ unsigned long dasd_io_time1[32]; /* histogram of time from build to start */ - unsigned long dasd_io_time2[32]; /* histogram of time from start to irq */ + unsigned long dasd_io_time2[32]; /* histogram of time from start to irq */ unsigned long dasd_io_time2ps[32]; /* histogram of time from start to irq */ unsigned long dasd_io_time3[32]; /* histogram of time from irq to end */ } dasd_profile_info_t; diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/bio.h linux/include/linux/bio.h --- /opt/kernel/linux-2.4.7/include/linux/bio.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/bio.h Wed Jan 1 00:07:23 1997 @@ -0,0 +1,199 @@ +/* + * New 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +/* + * transition to 64-bit sector_t, possibly making it an option... + */ +#undef BLK_64BIT_SECTOR + +#ifdef BLK_64BIT_SECTOR +typedef u64 sector_t; +#else +typedef unsigned long sector_t; +#endif + +struct bio_vec { + struct page *bv_page; + unsigned short bv_len; + unsigned short bv_offset; +}; + +typedef struct bio_hash_s { + struct bio_hash_s *next_hash; + struct bio_hash_s **pprev_hash; + unsigned int valid_counter; +} bio_hash_t; + +#define BLKHASHPROF _IOR(0x12,108,sizeof(struct bio_hash_stats)) +#define BLKHASHCLEAR _IO(0x12,109) + +#define MAX_PROFILE_BUCKETS 64 + +struct bio_hash_stats { + unsigned long nr_lookups; + unsigned long nr_hits; + unsigned long nr_inserts; + unsigned long max_bucket_size; + unsigned long bucket_size[MAX_PROFILE_BUCKETS + 1]; + + unsigned long q_nr_back_lookups; + unsigned long q_nr_back_hits; + unsigned long q_nr_back_merges; + unsigned long q_nr_front_lookups; + unsigned long q_nr_front_hits; + unsigned long q_nr_front_merges; +}; + +/* + * hash table must be a power of two + */ +typedef struct bio_hash_table_s { + bio_hash_t **table; + unsigned long mask; + unsigned int valid_counter; + + struct bio_hash_stats st; +} bio_hash_table_t; + +/* + * shamelessly stolen from the list.h implementation + */ +#define hash_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/* + * main unit of I/O for the block layer and lower layers (ie drivers) + */ +struct bio { + kdev_t bi_dev; + sector_t bi_sector; + bio_hash_t bi_hash; + struct bio *bi_next; /* request queue link */ + struct bio_vec bi_io_vec; + unsigned long bi_flags; /* status, command, etc */ + atomic_t bi_cnt; /* free when it hits zero */ + void (*bi_end_io)(struct bio *bio); + void *bi_private; + struct request *bi_req; /* linked to this request */ +}; + +#define BIO_SECTOR_BITS 9 +#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) +#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) + +/* + * bio flags + */ +#define BIO_UPTODATE 1 /* ok after I/O completion */ +#define BIO_READ 2 /* read request */ +#define BIO_WRITE 4 /* write request */ +#define BIO_RW_AHEAD 8 /* read/write ahead */ +#define BIO_BARRIER 16 /* barrier I/O */ +#define BIO_RW_BLOCK 32 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 64 /* out-out-bounds error */ +#define BIO_POOL 128 /* from bio pool, not slab cache */ + +/* + * if you change any of the above, make sure this is still correct!! + */ +#define BIO_RW_MASK (BIO_READ + BIO_WRITE + BIO_RW_AHEAD + BIO_BARRIER) + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define bio_iovec(bio) (&(bio)->bi_io_vec) +#define bio_page(bio) bio_iovec((bio))->bv_page +#define bio_size(bio) bio_iovec((bio))->bv_len +#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) +#define bio_to_bus(bio) (page_to_bus(bio_page((bio))) + bio_offset((bio))) +#define bio_barrier(bio) ((bio)->bi_flags & BIO_BARRIER) + +/* + * queues that have highmem support enabled may still need to revert to + * PIO transfers occasionally and thus map high pages temporarily. For + * permanent PIO fall back, user is probably better off disabling highmem + * I/O completely on that queue (see ide-dma for example) + */ +#define bio_kmap(bio) (kmap(bio_page((bio))) + bio_offset((bio))) +#define bio_kunmap(bio) kunmap(bio_page((bio))) +#define bio_kmap_irq(bio) (kmap_atomic(bio_page((bio)), KM_BIO_IRQ) + bio_offset((bio))) +#define bio_kunmap_irq(ptr) kunmap_atomic((void *) (((unsigned long) (ptr)) & PAGE_MASK), KM_BIO_IRQ) + +#define BIO_CONTIG(bio, nxt) (bio_to_bus((bio)) + bio_size((bio)) == bio_to_bus((nxt))) + +typedef void (bi_end_io_t) (struct bio *); + +#define bio_endio(bio, ok) do { \ + if (ok) \ + (bio)->bi_flags |= BIO_UPTODATE;\ + else \ + (bio)->bi_flags &= ~BIO_UPTODATE; \ + (bio)->bi_end_io((bio)); \ + } while (0) + +#define bio_io_error(bio) bio_endio((bio), 0) + +/* + * get a reference to a bio, so it won't disappear. the intended use is + * something like: + * + * bio_get(bio); + * submit_bio(rw, bio); + * if (bio->bi_flags ...) + * do_something + * bio_put(bio); + * + * without the bio_get(), it could potentially complete I/O before submit_bio + * returns. and then bio would be freed memory when if (bio->bi_flags ...) + * runs + */ +#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) + +struct request_queue; + +extern struct bio *bio_alloc(int); +extern inline void bio_put(struct bio *); +extern struct bio_vec *biovec_alloc(int); +extern void biovec_free(struct bio_vec *); + +extern int bio_hash_init(bio_hash_table_t *, int); +extern void bio_hash_cleanup(bio_hash_table_t *); +extern inline void bio_hash_remove(struct request_queue *, struct bio *); +extern inline void bio_hash_add(struct request_queue *, struct bio *); +extern inline struct bio *bio_hash_find(struct request_queue *, sector_t); +extern inline struct bio *__bio_hash_find(bio_hash_table_t *, sector_t); +extern inline int bio_hash_add_unique(struct request_queue *, struct bio *); +extern inline int __bio_hash_add_unique(bio_hash_table_t *, struct bio *); + +/* + * increment valid_counter, and make sure that wraps go to 1 and not 0 + */ +#define __bio_hash_inval(htable) do { \ + if (!++(htable)->valid_counter) \ + (htable)->valid_counter = 1; \ + } while (0) + +#endif /* __LINUX_BIO_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/blk.h linux/include/linux/blk.h --- /opt/kernel/linux-2.4.7/include/linux/blk.h Fri Jul 20 21:53:47 2001 +++ linux/include/linux/blk.h Tue Jul 24 15:30:40 2001 @@ -7,13 +7,6 @@ #include /* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; - -/* * Initialization functions. */ extern int isp16_init(void); @@ -85,13 +78,14 @@ * code duplication in drivers. */ -static inline void blkdev_dequeue_request(struct request * req) +static inline void blkdev_dequeue_request(struct request *req) { - list_del(&req->queue); + list_del(&req->queuelist); } -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); +int end_that_request_first(request_queue_t *, struct request *, int uptodate); +extern inline int __end_that_request_first(struct request *, int uptodate); +void end_that_request_last(struct request *); #if defined(MAJOR_NR) || defined(IDE_DRIVER) @@ -336,12 +330,16 @@ #if !defined(IDE_DRIVER) #ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define CURRENT elv_next_request(&blk_dev[MAJOR_NR].request_queue) +#endif +#ifndef QUEUE +#define QUEUE (&blk_dev[MAJOR_NR].request_queue) #endif #ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define QUEUE_EMPTY blk_queue_empty(QUEUE) #endif + #ifndef DEVICE_NAME #define DEVICE_NAME "unknown" #endif @@ -365,16 +363,14 @@ #endif #define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ + if (QUEUE_EMPTY) { \ CLEAR_INTR; \ - return; \ + return; \ } \ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } + if (!CURRENT->bio) \ + panic(DEVICE_NAME ": no bio"); \ #endif /* !defined(IDE_DRIVER) */ @@ -383,18 +379,26 @@ #if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) -static inline void end_request(int uptodate) { - struct request *req = CURRENT; - - if (end_that_request_first(req, uptodate, DEVICE_NAME)) - return; - +static inline void __end_request(struct request *req, int uptodate) +{ + if (!__end_that_request_first(req, uptodate)) { #ifndef DEVICE_NO_RANDOM - add_blkdev_randomness(MAJOR(req->rq_dev)); + add_blkdev_randomness(MAJOR(req->rq_dev)); #endif - DEVICE_OFF(req->rq_dev); - blkdev_dequeue_request(req); - end_that_request_last(req); + DEVICE_OFF(req->rq_dev); + blkdev_dequeue_request(req); + end_that_request_last(req); + } +} + +static inline void end_request(int uptodate) +{ + request_queue_t *q = CURRENT->q; + unsigned long flags; + + spin_lock_irqsave(&q->queue_lock, flags); + __end_request(CURRENT, uptodate); + spin_unlock_irqrestore(&q->queue_lock, flags); } #endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.4.7/include/linux/blkdev.h Fri Jul 20 21:53:09 2001 +++ linux/include/linux/blkdev.h Tue Jul 24 15:29:49 2001 @@ -12,57 +12,46 @@ struct elevator_s; typedef struct elevator_s elevator_t; -/* - * Ok, this is an expanded form so that we can use the same - * request for paging requests when that is implemented. In - * paging, 'bh' is NULL, and the completion is used to wait - * for the IO to be ready. - */ struct request { - struct list_head queue; + struct list_head queuelist; /* look for ->queue? you must _not_ + * access it directly, use + * blkdev_dequeue_request! */ int elevator_sequence; - struct list_head table; volatile int rq_status; /* should split this into a few status bits */ -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 -#define RQ_SCSI_BUSY 0xffff -#define RQ_SCSI_DONE 0xfffe -#define RQ_SCSI_DISCONNECTING 0xffe0 - kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; - unsigned long sector; + sector_t sector; unsigned long nr_sectors; unsigned long hard_sector, hard_nr_sectors; - unsigned int nr_segments; - unsigned int nr_hw_segments; - unsigned long current_nr_sectors; + unsigned short nr_segments; + unsigned short nr_hw_segments; + unsigned short current_nr_sectors; + unsigned short hard_cur_sectors; void * special; char * buffer; struct completion * waiting; - struct buffer_head * bh; - struct buffer_head * bhtail; + struct bio *bio, *biotail; request_queue_t *q; }; #include -typedef int (merge_request_fn) (request_queue_t *q, - struct request *req, - struct buffer_head *bh, - int); -typedef int (merge_requests_fn) (request_queue_t *q, - struct request *req, - struct request *req2, - int); +typedef int (merge_request_fn) (request_queue_t *, struct request *, + struct bio *); +typedef int (merge_requests_fn) (request_queue_t *, struct request *, + struct request *); typedef void (request_fn_proc) (request_queue_t *q); typedef request_queue_t * (queue_proc) (kdev_t dev); -typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); -typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef void (unplug_device_fn) (void *q); +enum blk_queue_state { + Queue_down, + Queue_up, +}; + /* * Default nr free requests per queue, ll_rw_blk will scale it down * according to available RAM at init time @@ -89,7 +78,7 @@ merge_request_fn * front_merge_fn; merge_requests_fn * merge_requests_fn; make_request_fn * make_request_fn; - plug_device_fn * plug_device_fn; + /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. @@ -97,33 +86,112 @@ void * queuedata; /* - * This is used to remove the plug when tq_disk runs. + * queue needs bounce pages for pages above this limit (phys addr) */ - struct tq_struct plug_tq; + struct page *bounce_limit; + int bounce_gfp; /* - * Boolean that indicates whether this queue is plugged or not. + * This is used to remove the plug when tq_disk runs. */ - char plugged; + struct tq_struct plug_tq; /* - * Boolean that indicates whether current_request is active or - * not. + * various queue flags, see QUEUE_* below */ - char head_active; + unsigned long queue_flags; /* - * Is meant to protect the queue in the future instead of - * io_request_lock + * protects queue structures from reentrancy */ spinlock_t queue_lock; /* * Tasks wait here for free request */ - wait_queue_head_t wait_for_request; + wait_queue_head_t wait_for_request[2]; + + /* + * queue settings + */ + unsigned short max_sectors; + unsigned short max_segments; + unsigned short hardsect_size; + + /* + * queue state + */ + enum blk_queue_state queue_state; + + /* + * bio hash table + */ + bio_hash_table_t queue_hash; }; +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_IDE_MAPPED 2 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + +#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ +#define QUEUE_FLAG_HEADACTIVE 1 /* has active head (going away) */ + +#define blk_queue_flag(q, flag) test_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_set_flag(q, flag) set_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_clear_flag(q, flag) clear_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) + +#define blk_queue_plugged(q) blk_queue_flag(q, PLUGGED) +#define blk_queue_headlive(q) blk_queue_flag(q, HEADACTIVE) + +#define blk_mark_plugged(q) blk_set_flag(q, PLUGGED) +#define blk_mark_headactive(q) blk_set_flag(q, HEADACTIVE) + +#define blk_set_unplugged(q) test_and_clear_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) +#define blk_set_plugged(q) test_and_set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#define blk_queue_empty(q) list_empty(&(q)->queue_head) + +#ifdef CONFIG_HIGHMEM +extern struct bio *create_bounce(struct bio *, int); +extern inline struct bio *blk_queue_bounce(request_queue_t *q, struct bio *bio) +{ + if (bio_page(bio) <= q->bounce_limit) + return bio; + + return create_bounce(bio, q->bounce_gfp); +} +#else +#define blk_queue_bounce(q, bio) (bio) +#endif + +extern unsigned long blk_max_low_pfn; + +static inline struct request *elv_next_request(request_queue_t *q) +{ + return q->elevator.elevator_next_req_fn(q); +} + +static inline void elv_add_request_fn(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + /* + * insert into queue pending list, merge hash, and possible latency + * list + */ + list_add(&rq->queuelist, insert_here); +} + +static inline struct request *elv_next_request_fn(request_queue_t *q) +{ + return list_entry(q->queue_head.next, struct request, queuelist); +} + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn * PAGE_SIZE) +#define BLK_BOUNCE_4G (0xffffffff) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -148,61 +216,68 @@ extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void grok_partitions(kdev_t dev, long size); +extern int wipe_partitions(kdev_t dev); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void generic_make_request(int rw, struct buffer_head * bh); -extern request_queue_t *blk_get_queue(kdev_t dev); -extern inline request_queue_t *__blk_get_queue(kdev_t dev); +extern void generic_make_request(struct bio *bio); +extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); +extern inline void blk_wake_queue(request_queue_t *); +extern void blk_attempt_remerge(request_queue_t *, struct request *); /* * Access functions for manipulating queue properties */ -extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern int blk_init_queue(request_queue_t *, request_fn_proc *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long); +extern void blk_queue_max_sectors(request_queue_t *q, unsigned short); +extern void blk_queue_max_segments(request_queue_t *q, unsigned short); +extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); extern void generic_unplug_device(void *); -extern int * blk_size[MAX_BLKDEV]; +extern sector_t * blk_size[MAX_BLKDEV]; extern int * blksize_size[MAX_BLKDEV]; -extern int * hardsect_size[MAX_BLKDEV]; - extern int * max_readahead[MAX_BLKDEV]; -extern int * max_sectors[MAX_BLKDEV]; - -extern int * max_segments[MAX_BLKDEV]; - extern atomic_t queued_sectors; #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 -#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) - /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) #define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) #define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) -#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) -#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev) extern void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io); +static inline void blk_clear(int major) +{ + blk_size[major] = NULL; +#if 0 + blk_size_in_bytes[major] = NULL; +#endif + blksize_size[major] = NULL; + max_readahead[major] = NULL; + read_ahead[major] = 0; +} + static inline int get_hardsect_size(kdev_t dev) { - extern int *hardsect_size[]; - if (hardsect_size[MAJOR(dev)] != NULL) - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - else - return 512; + request_queue_t *q = blk_get_queue(dev); + + return q ? q->hardsect_size : 512; } #define blk_finished_io(nsects) \ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/cdrom.h linux/include/linux/cdrom.h --- /opt/kernel/linux-2.4.7/include/linux/cdrom.h Fri Jul 20 21:53:03 2001 +++ linux/include/linux/cdrom.h Wed Jan 1 00:07:23 1997 @@ -577,6 +577,8 @@ struct dvd_manufact manufact; } dvd_struct; +#define CDROM_MAX_CDROMS 256 + /* * DVD authentication ioctl */ @@ -733,6 +735,7 @@ int number; /* generic driver updates this */ /* specifications */ kdev_t dev; /* device number */ + int nr; /* cdrom entry */ int mask; /* mask of capability: disables them */ int speed; /* maximum speed for reading data */ int capacity; /* number of discs in jukebox */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/devfs_fs_kernel.h linux/include/linux/devfs_fs_kernel.h --- /opt/kernel/linux-2.4.7/include/linux/devfs_fs_kernel.h Fri Jul 20 21:52:57 2001 +++ linux/include/linux/devfs_fs_kernel.h Wed Jan 1 00:07:23 1997 @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/elevator.h linux/include/linux/elevator.h --- /opt/kernel/linux-2.4.7/include/linux/elevator.h Fri Feb 16 01:58:34 2001 +++ linux/include/linux/elevator.h Wed Jan 1 00:07:23 1997 @@ -5,13 +5,19 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, + struct list_head *, struct bio *); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); typedef void (elevator_merge_req_fn) (struct request *, struct request *); +typedef struct request *(elevator_next_req_fn) (request_queue_t *); + +typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *); + +typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); + struct elevator_s { int read_latency; @@ -21,14 +27,24 @@ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_req_fn *elevator_merge_req_fn; + elevator_next_req_fn *elevator_next_req_fn; + elevator_add_req_fn *elevator_add_req_fn; + + elevator_init_fn *elevator_init_fn; + + /* + * per-elevator private data + */ + void *elevator_data; + unsigned int queue_ID; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_req(struct request *, struct request *); -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_req(struct request *, struct request *); @@ -45,7 +61,7 @@ extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *); extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *); -extern void elevator_init(elevator_t *, elevator_t); +extern int elevator_init(request_queue_t *, elevator_t *, elevator_t); /* * Return values from elevator merger @@ -99,6 +115,8 @@ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ }) #endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/fs.h linux/include/linux/fs.h --- /opt/kernel/linux-2.4.7/include/linux/fs.h Fri Jul 20 21:52:18 2001 +++ linux/include/linux/fs.h Wed Jan 1 00:07:23 1997 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -235,32 +236,31 @@ struct buffer_head { /* First cache line: */ struct buffer_head *b_next; /* Hash queue list */ - unsigned long b_blocknr; /* block number */ + sector_t b_blocknr; /* block number */ unsigned short b_size; /* block size */ unsigned short b_list; /* List that this buffer appears */ kdev_t b_dev; /* device (B_FREE = free) */ atomic_t b_count; /* users using this block */ - kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ struct buffer_head *b_next_free;/* lru/free list linkage */ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ struct buffer_head *b_this_page;/* circular list of buffers in one page */ - struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ + void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completio +n */ void *b_private; /* reserved for b_end_io */ - unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct inode * b_inode; struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ + + struct bio *b_bio; /* allocated on I/O to/from buffer */ }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); @@ -1133,10 +1133,24 @@ static inline void buffer_IO_error(struct buffer_head * bh) { mark_buffer_clean(bh); + /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! + * b_end_io has to clear the BH_Uptodate bitflag in the read error + * case, however buffer contents are not necessarily bad if a + * write fails */ - bh->b_end_io(bh, 0); + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); +} + +extern inline int bio_rw(struct bio *bio) +{ + if (bio->bi_flags & BIO_READ) + return READ; + else if (bio->bi_flags & BIO_WRITE) + return WRITE; + + BUG(); + return -1; /* ahem */ } extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); @@ -1297,10 +1311,11 @@ extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern void file_moveto(struct file *new, struct file *old); -extern struct buffer_head * get_hash_table(kdev_t, int, int); -extern struct buffer_head * getblk(kdev_t, int, int); +extern struct buffer_head * get_hash_table(kdev_t, sector_t, int); +extern struct buffer_head * getblk(kdev_t, sector_t, int); extern void ll_rw_block(int, int, struct buffer_head * bh[]); extern void submit_bh(int, struct buffer_head *); +extern void submit_bio(int, struct bio *); extern int is_read_only(kdev_t); extern void __brelse(struct buffer_head *); static inline void brelse(struct buffer_head *buf) @@ -1318,7 +1333,7 @@ extern struct buffer_head * bread(kdev_t, int, int); extern void wakeup_bdflush(int wait); -extern int brw_page(int, struct page *, kdev_t, int [], int); +extern int brw_page(int, struct page *, kdev_t, sector_t [], int); typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); @@ -1332,7 +1347,7 @@ unsigned long *); extern int block_sync_page(struct page *); -int generic_block_bmap(struct address_space *, long, get_block_t *); +sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/genhd.h linux/include/linux/genhd.h --- /opt/kernel/linux-2.4.7/include/linux/genhd.h Fri Jul 20 21:53:03 2001 +++ linux/include/linux/genhd.h Tue Jul 24 15:29:46 2001 @@ -74,6 +74,47 @@ devfs_handle_t *de_arr; /* one per physical disc */ char *flags; /* one per physical disc */ }; + +extern struct gendisk *blk_gendisk[MAX_BLKDEV]; /* in ll_rw_blk.c */ +extern struct gendisk *gendisk_head; /* in partitions/check.c */ + +static inline void +add_gendisk(struct gendisk *g) { + if (!blk_gendisk[g->major]) { + g->next = gendisk_head; + gendisk_head = g; + blk_gendisk[g->major] = g; + } +} + +static inline void +del_gendisk(struct gendisk *g) { + struct gendisk ** gp; + + blk_gendisk[g->major] = NULL; + for (gp = &gendisk_head; *gp; gp = &((*gp)->next)) + if (*gp == g) + break; + if (*gp) + *gp = (*gp)->next; +} + +static inline struct gendisk * +get_gendisk(kdev_t dev) { + return blk_gendisk[MAJOR(dev)]; +} + +static inline unsigned long +get_start_sect(kdev_t dev) { + struct gendisk *g = get_gendisk(dev); + return g ? g->part[MINOR(dev)].start_sect : 0; +} + +static inline unsigned long +get_nr_sects(kdev_t dev) { + struct gendisk *g = get_gendisk(dev); + return g ? g->part[MINOR(dev)].nr_sects : 0; +} #endif /* __KERNEL__ */ #ifdef CONFIG_SOLARIS_X86_PARTITION @@ -237,35 +278,10 @@ extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister); - - -/* - * FIXME: this should use genhd->minor_shift, but that is slow to look up. - */ static inline unsigned int disk_index (kdev_t dev) { - int major = MAJOR(dev); - int minor = MINOR(dev); - unsigned int index; - - switch (major) { - case DAC960_MAJOR+0: - index = (minor & 0x00f8) >> 3; - break; - case SCSI_DISK0_MAJOR: - index = (minor & 0x00f0) >> 4; - break; - case IDE0_MAJOR: /* same as HD_MAJOR */ - case XT_DISK_MAJOR: - index = (minor & 0x0040) >> 6; - break; - case IDE1_MAJOR: - index = ((minor & 0x0040) >> 6) + 2; - break; - default: - return 0; - } - return index; + struct gendisk *g = get_gendisk(dev); + return g ? (MINOR(dev) >> g->minor_shift) : 0; } #endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/highmem.h linux/include/linux/highmem.h --- /opt/kernel/linux-2.4.7/include/linux/highmem.h Fri Jul 20 21:52:18 2001 +++ linux/include/linux/highmem.h Tue Jul 24 15:29:51 2001 @@ -2,6 +2,7 @@ #define _LINUX_HIGHMEM_H #include +#include #include #ifdef CONFIG_HIGHMEM @@ -13,7 +14,7 @@ /* declarations for linux/mm/highmem.c */ FASTCALL(unsigned int nr_free_highpages(void)); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); +extern struct bio *create_bounce(struct bio * bio_orig, int gfp_mask); static inline char *bh_kmap(struct buffer_head *bh) diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/ide.h linux/include/linux/ide.h --- /opt/kernel/linux-2.4.7/include/linux/ide.h Fri Jul 20 21:53:47 2001 +++ linux/include/linux/ide.h Tue Jul 24 15:30:40 2001 @@ -87,6 +87,11 @@ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ /* + * state flags + */ +#define DMA_PIO_RETRY 1 /* retrying in PIO */ + +/* * Ensure that various configuration flags have compatible settings */ #ifdef REALLY_SLOW_IO @@ -152,6 +157,21 @@ #define DATA_READY (DRQ_STAT) /* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +/* * Some more useful definitions */ #define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */ @@ -287,6 +307,8 @@ byte keep_settings; /* restore settings after drive reset */ byte using_dma; /* disk is using dma for read/write */ byte waiting_for_dma; /* dma currently in progress */ + byte retry_pio; /* retrying in pio mode */ + byte state; /* retry state */ byte unmask; /* flag: okay to unmask other irqs */ byte slow; /* flag: slow data port */ byte bswap; /* flag: byte swap data */ @@ -436,7 +458,7 @@ ide_dmaproc_t *dmaproc; /* dma read/write/abort routine */ unsigned int *dmatable_cpu; /* dma physical region descriptor table (cpu view) */ dma_addr_t dmatable_dma; /* dma physical region descriptor table (dma view) */ - struct scatterlist *sg_table; /* Scatter-gather list used to build the above */ + struct sg_list *sg_table; /* Scatter-gather list used to build the above */ int sg_nents; /* Current number of entries in it */ int sg_dma_direction; /* dma transfer direction */ struct hwif_s *mate; /* other hwif from same PCI chip */ @@ -457,6 +479,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned highmem : 1; /* can do full 32-bit dma */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -489,10 +512,12 @@ */ typedef int (ide_expiry_t)(ide_drive_t *); +#define IDE_BUSY 0 +#define IDE_SLEEP 1 + typedef struct hwgroup_s { ide_handler_t *handler;/* irq handler, if active */ - volatile int busy; /* BOOL: protects all fields below */ - int sleeping; /* BOOL: wake us up on timer expiry */ + unsigned long flags; /* BUSY, SLEEPING */ ide_drive_t *drive; /* current drive */ ide_hwif_t *hwif; /* ptr to current hwif in linked-list */ struct request *rq; /* current request */ @@ -733,6 +758,11 @@ unsigned long current_capacity (ide_drive_t *drive); /* + * Revalidate (read partition tables) + */ +void ide_revalidate_drive (ide_drive_t *drive); + +/* * Start a reset operation for an IDE interface. * The caller should return immediately after invoking this. */ @@ -754,6 +784,21 @@ } ide_action_t; /* + * temporarily mapping a (possible) highmem bio for PIO transfer + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq) +{ + return bio_kmap_irq(rq->bio) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer) +{ + bio_kunmap_irq(buffer); +} + +/* * This function issues a special IDE device request * onto the request queue. * @@ -900,5 +945,8 @@ #endif void hwif_unregister (ide_hwif_t *hwif); + +#define DRIVE_LOCK(drive) ((drive)->queue.queue_lock) +extern spinlock_t ide_lock; #endif /* _IDE_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/iobuf.h linux/include/linux/iobuf.h --- /opt/kernel/linux-2.4.7/include/linux/iobuf.h Fri Jul 20 21:52:24 2001 +++ linux/include/linux/iobuf.h Wed Jan 1 00:07:23 1997 @@ -26,7 +26,6 @@ #define KIO_MAX_ATOMIC_IO 512 /* in kb */ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) -#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) /* The main kiobuf struct used for all our IO! */ @@ -48,8 +47,6 @@ /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; - struct buffer_head * bh[KIO_MAX_SECTORS]; - unsigned long blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -81,6 +78,9 @@ /* fs/buffer.c */ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size); + kdev_t dev, sector_t [], int size); + +/* fs/bio.c */ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block); #endif /* __LINUX_IOBUF_H */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/list.h linux/include/linux/list.h --- /opt/kernel/linux-2.4.7/include/linux/list.h Sat Feb 17 01:06:17 2001 +++ linux/include/linux/list.h Wed Jan 1 00:07:23 1997 @@ -3,6 +3,8 @@ #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE) +#include + /* * Simple doubly linked list implementation. * @@ -147,7 +149,8 @@ * @head: the head for your list. */ #define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) + for (pos = (head)->next, prefetch(pos->next); pos != (head); \ + pos = pos->next, prefetch(pos->next)) #endif /* __KERNEL__ || _LVM_H_INCLUDE */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/loop.h linux/include/linux/loop.h --- /opt/kernel/linux-2.4.7/include/linux/loop.h Wed Mar 7 04:35:36 2001 +++ linux/include/linux/loop.h Wed Jan 1 00:07:23 1997 @@ -49,8 +49,8 @@ int old_gfp_mask; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; + struct bio *lo_bio; + struct bio *lo_biotail; int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; @@ -77,6 +77,7 @@ */ #define LO_FLAGS_DO_BMAP 1 #define LO_FLAGS_READ_ONLY 2 +#define LO_FLAGS_BH_REMAP 4 /* * Note that this structure gets the wrong offsets when directly used diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/nbd.h linux/include/linux/nbd.h --- /opt/kernel/linux-2.4.7/include/linux/nbd.h Tue May 1 23:20:25 2001 +++ linux/include/linux/nbd.h Tue Jul 24 15:34:38 2001 @@ -37,24 +37,25 @@ static void nbd_end_request(struct request *req) { - struct buffer_head *bh; + struct bio *bio; unsigned nsect; unsigned long flags; int uptodate = (req->errors == 0) ? 1 : 0; + request_queue_t *q = req->q; #ifdef PARANOIA requests_out++; #endif - spin_lock_irqsave(&io_request_lock, flags); - while((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + spin_lock_irqsave(&q->queue_lock, flags); + while((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, uptodate); } blkdev_release_request(req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } #define MAX_NBD 128 diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/pci.h linux/include/linux/pci.h --- /opt/kernel/linux-2.4.7/include/linux/pci.h Fri Jul 20 21:52:38 2001 +++ linux/include/linux/pci.h Tue Jul 24 15:30:29 2001 @@ -314,6 +314,8 @@ #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 +#define PCI_MAX_DMA32 (0xffffffff) + #define DEVICE_COUNT_COMPATIBLE 4 #define DEVICE_COUNT_IRQ 2 #define DEVICE_COUNT_DMA 2 diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/prefetch.h linux/include/linux/prefetch.h --- /opt/kernel/linux-2.4.7/include/linux/prefetch.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/prefetch.h Wed Jan 1 00:07:23 1997 @@ -0,0 +1,60 @@ +/* + * Generic cache management functions. Everything is arch-specific, + * but this header exists to make sure the defines/functions can be + * used in a generic way. + * + * 2000-11-13 Arjan van de Ven + * + */ + +#ifndef _LINUX_PREFETCH_H +#define _LINUX_PREFETCH_H + +#include +#include + +/* + prefetch(x) attempts to pre-emptively get the memory pointed to + by address "x" into the CPU L1 cache. + prefetch(x) should not cause any kind of exception, prefetch(0) is + specifically ok. + + prefetch() should be defined by the architecture, if not, the + #define below provides a no-op define. + + There are 3 prefetch() macros: + + prefetch(x) - prefetches the cacheline at "x" for read + prefetchw(x) - prefetches the cacheline at "x" for write + spin_lock_prefetch(x) - prefectches the spinlock *x for taking + + there is also PREFETCH_STRIDE which is the architecure-prefered + "lookahead" size for prefetching streamed operations. + +*/ + +/* + * These cannot be do{}while(0) macros. See the mental gymnastics in + * the loop macro. + */ + +#ifndef ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *x) {;} +#endif + +#ifndef ARCH_HAS_PREFETCHW +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *x) {;} +#endif + +#ifndef ARCH_HAS_SPINLOCK_PREFETCH +#define ARCH_HAS_SPINLOCK_PREFETCH +#define spin_lock_prefetch(x) prefetchw(x) +#endif + +#ifndef PREFETCH_STRIDE +#define PREFETCH_STRIDE (4*L1_CACHE_BYTE) +#endif + +#endif diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/raid/md.h linux/include/linux/raid/md.h --- /opt/kernel/linux-2.4.7/include/linux/raid/md.h Fri Jul 20 21:53:47 2001 +++ linux/include/linux/raid/md.h Tue Jul 24 15:30:47 2001 @@ -77,7 +77,6 @@ extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors); extern void md_recover_arrays (void); extern int md_check_ordering (mddev_t *mddev); -extern struct gendisk * find_gendisk (kdev_t dev); extern int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x); extern int md_error (mddev_t *mddev, kdev_t rdev); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/include/linux/slab.h linux/include/linux/slab.h --- /opt/kernel/linux-2.4.7/include/linux/slab.h Fri Jul 20 21:52:18 2001 +++ linux/include/linux/slab.h Wed Jan 1 00:07:23 1997 @@ -75,6 +75,8 @@ extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; +extern kmem_cache_t *bio_cachep; +extern kmem_cache_t *biovec_cachep; #endif /* __KERNEL__ */ diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/kernel/linux-2.4.7/kernel/ksyms.c Fri Jul 20 21:41:02 2001 +++ linux/kernel/ksyms.c Wed Jan 1 00:07:23 1997 @@ -121,6 +121,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ @@ -282,7 +284,6 @@ EXPORT_SYMBOL(block_read); EXPORT_SYMBOL(block_write); EXPORT_SYMBOL(blksize_size); -EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size); EXPORT_SYMBOL(blk_dev); EXPORT_SYMBOL(is_read_only); @@ -300,9 +301,10 @@ EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); -EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); EXPORT_SYMBOL(file_moveto); +EXPORT_SYMBOL(wipe_partitions); +EXPORT_SYMBOL(blk_gendisk); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/mm/highmem.c linux/mm/highmem.c --- /opt/kernel/linux-2.4.7/mm/highmem.c Sat Jun 30 01:17:34 2001 +++ linux/mm/highmem.c Wed Jan 1 00:07:23 1997 @@ -22,6 +22,8 @@ #include #include +#include + /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -173,18 +175,19 @@ static LIST_HEAD(emergency_bhs); /* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_from_high_bio(struct bio *to, struct bio *from) { - struct page *p_from; - char *vfrom; + char *vfrom, *vto; unsigned long flags; - p_from = from->b_page; + if (bio_offset(to)) + BUG(); + if (bio_size(to) != bio_size(from)) + BUG(); /* * Since this can be executed from IRQ context, reentrance @@ -192,38 +195,60 @@ */ __save_flags(flags); __cli(); - vfrom = kmap_atomic(p_from, KM_BOUNCE_WRITE); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_BOUNCE_WRITE); + vfrom = __kmap_atomic(bio_page(from), KM_BOUNCE_WRITE); + vto = __kmap_atomic(bio_page(to), KM_BOUNCE_WRITE); + memcpy(vto, vfrom + bio_offset(from), bio_size(to)); + __kunmap_atomic(vto, KM_BOUNCE_WRITE); + __kunmap_atomic(vfrom, KM_BOUNCE_WRITE); __restore_flags(flags); } -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) { - struct page *p_to; - char *vto; + char *vto, *vfrom; unsigned long flags; - p_to = to->b_page; + if (bio_offset(from)) + BUG(); + if (bio_size(to) != bio_size(from)) + BUG(); + __save_flags(flags); __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); - kunmap_atomic(vto, KM_BOUNCE_READ); + vto = __kmap_atomic(bio_page(to), KM_BOUNCE_READ); + vfrom = __kmap_atomic(bio_page(from), KM_BOUNCE_READ); + memcpy(vto + bio_offset(to), vfrom, bio_size(to)); + __kunmap_atomic(vfrom, KM_BOUNCE_READ); + __kunmap_atomic(vto, KM_BOUNCE_READ); __restore_flags(flags); } -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) +static __init int init_emergency_pool(void) { - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; + spin_lock_irq(&emergency_lock); + while (nr_emergency_pages < POOL_SIZE) { + struct page * page = alloc_page(GFP_ATOMIC); + if (!page) { + printk("couldn't refill highmem emergency pages"); + break; + } + list_add(&page->list, &emergency_pages); + nr_emergency_pages++; + } + spin_unlock_irq(&emergency_lock); + printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages); + return 0; +} - bh_orig->b_end_io(bh_orig, uptodate); +__initcall(init_emergency_pool); - page = bh->b_page; +static inline void bounce_end_io (struct bio *bio) +{ + struct bio *bio_orig = bio->bi_private; + struct page *page = bio_page(bio); + unsigned long flags; + bio_endio(bio_orig, bio->bi_flags & BIO_UPTODATE); spin_lock_irqsave(&emergency_lock, flags); if (nr_emergency_pages >= POOL_SIZE) __free_page(page); @@ -235,74 +260,32 @@ list_add(&page->list, &emergency_pages); nr_emergency_pages++; } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } spin_unlock_irqrestore(&emergency_lock, flags); + bio_put(bio); } -static __init int init_emergency_pool(void) +static void bounce_end_io_write (struct bio *bio) { - spin_lock_irq(&emergency_lock); - while (nr_emergency_pages < POOL_SIZE) { - struct page * page = alloc_page(GFP_ATOMIC); - if (!page) { - printk("couldn't refill highmem emergency pages"); - break; - } - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - - return 0; + bounce_end_io(bio); } -__initcall(init_emergency_pool); - -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) +static void bounce_end_io_read (struct bio *bio) { - bounce_end_io(bh, uptodate); -} + struct bio *bio_orig = bio->bi_private; -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) -{ - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + if (bio->bi_flags & BIO_UPTODATE) + copy_to_high_bio_irq(bio_orig, bio); - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); + bounce_end_io(bio); } -struct page *alloc_bounce_page (void) +struct page *alloc_bounce_page(int gfp_mask) { struct list_head *tmp; struct page *page; repeat_alloc: - page = alloc_page(GFP_NOIO); + page = alloc_page(gfp_mask); if (page) return page; /* @@ -334,91 +317,35 @@ goto repeat_alloc; } -struct buffer_head *alloc_bounce_bh (void) +struct bio *create_bounce(struct bio *bio_orig, int gfp_mask) { - struct list_head *tmp; - struct buffer_head *bh; + struct page *page; + struct bio *bio; -repeat_alloc: - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesnt idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(0); + bio = bio_alloc(GFP_NOIO); /* - * Try to allocate from the emergency pool. + * wasteful for 1kB fs, but machines with lots of ram are less likely + * to have 1kB fs for anything that needs to go fast. so all things + * considered, it should be ok. */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; + page = alloc_bounce_page(gfp_mask); - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); + bio->bi_dev = bio_orig->bi_dev; + bio->bi_sector = bio_orig->bi_sector; + bio->bi_flags |= bio_orig->bi_flags & BIO_RW_MASK; - current->policy |= SCHED_YIELD; - __set_current_state(TASK_RUNNING); - schedule(); - goto repeat_alloc; -} + bio->bi_io_vec.bv_page = page; + bio->bi_io_vec.bv_len = bio_size(bio_orig); + bio->bi_io_vec.bv_offset = 0; -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; + bio->bi_private = bio_orig; - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); + if (bio->bi_flags & BIO_WRITE) { + bio->bi_end_io = bounce_end_io_write; + copy_from_high_bio(bio, bio_orig); } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif + bio->bi_end_io = bounce_end_io_read; - return bh; + return bio; } - diff -urN -X /home/axboe/exclude /opt/kernel/linux-2.4.7/mm/page_io.c linux/mm/page_io.c --- /opt/kernel/linux-2.4.7/mm/page_io.c Wed Apr 25 23:46:21 2001 +++ linux/mm/page_io.c Wed Jan 1 00:07:23 1997 @@ -36,7 +36,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) { unsigned long offset; - int zones[PAGE_SIZE/512]; + sector_t zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size;