[PATCH] disk barriers: IDE

ide bits Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Jens Axboe <axboe@suse.de> 2004-08-22 22:37:19 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-08-22 22:37:19 -0700
commit: 5e9dbcaa2c752116b116bda619cf2e5438757e6a (patch)
tree: cfef5aa97d34f24ce0574ca9ebc570e52dc38df0 /drivers
parent: d2a8285687638fad1fb6b4b5e7e809330e21b1eb (diff)
download: history-5e9dbcaa2c752116b116bda619cf2e5438757e6a.tar.gz
4 files changed, 360 insertions, 31 deletions
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7b3bf05cbf95da..8a8cfe6c70b55b 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -702,6 +702,37 @@ static u8 idedisk_dump_status (ide_drive_t *drive, const char *msg, u8 stat)
 	}
 #endif	/* FANCY_STATUS_DUMPS */
 	printk("\n");
+	{
+		struct request *rq;
+		unsigned char opcode = 0;
+		int found = 0;
+
+		spin_lock(&ide_lock);
+		rq = HWGROUP(drive)->rq;
+		spin_unlock(&ide_lock);
+		if (!rq)
+			goto out;
+		if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+			char *args = rq->buffer;
+			if (args) {
+				opcode = args[0];
+				found = 1;
+			}
+		} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+			ide_task_t *args = rq->special;
+			if (args) {
+				task_struct_t *tf = (task_struct_t *) args->tfRegister;
+				opcode = tf->command;
+				found = 1;
+			}
+		}
+		printk("ide: failed opcode was: ");
+		if (!found)
+			printk("unknown\n");
+		else
+			printk("0x%02x\n", opcode);
+	}
+out:
 	local_irq_restore(flags);
 	return err;
 }
@@ -1203,6 +1234,42 @@ static ide_proc_entry_t idedisk_proc[] = {
 
 #endif	/* CONFIG_PROC_FS */
 
+static int idedisk_issue_flush(request_queue_t *q, struct gendisk *disk,
+			       sector_t *error_sector)
+{
+	ide_drive_t *drive = q->queuedata;
+	struct request *rq;
+	int ret;
+
+	if (!drive->wcache)
+		return 0;
+
+	rq = blk_get_request(q, WRITE, __GFP_WAIT);
+
+	memset(rq->cmd, 0, sizeof(rq->cmd));
+
+	if (ide_id_has_flush_cache_ext(drive->id) &&
+	    (drive->capacity64 >= (1UL << 28)))
+		rq->cmd[0] = WIN_FLUSH_CACHE_EXT;
+	else
+		rq->cmd[0] = WIN_FLUSH_CACHE;
+
+
+	rq->flags |= REQ_DRIVE_TASK | REQ_SOFTBARRIER;
+	rq->buffer = rq->cmd;
+
+	ret = blk_execute_rq(q, disk, rq);
+
+	/*
+	 * if we failed and caller wants error offset, get it
+	 */
+	if (ret && error_sector)
+		*error_sector = ide_get_error_location(drive, rq->cmd);
+
+	blk_put_request(rq);
+	return ret;
+}
+
 /*
  * This is tightly woven into the driver->do_special can not touch.
  * DON'T do it again until a total personality rewrite is committed.
@@ -1231,16 +1298,10 @@ static int set_nowerr(ide_drive_t *drive, int arg)
 	return 0;
 }
 
-/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */
-#define ide_id_has_flush_cache(id)	((id)->cfs_enable_2 & 0x3000)
-
-/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */
-#define ide_id_has_flush_cache_ext(id)	\
-	(((id)->cfs_enable_2 & 0x2400) == 0x2400)
-
 static int write_cache (ide_drive_t *drive, int arg)
 {
 	ide_task_t args;
+	int err;
 
 	if (!ide_id_has_flush_cache(drive->id))
 		return 1;
@@ -1251,7 +1312,10 @@ static int write_cache (ide_drive_t *drive, int arg)
 	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_SETFEATURES;
 	args.command_type			= IDE_DRIVE_TASK_NO_DATA;
 	args.handler				= &task_no_data_intr;
-	(void) ide_raw_taskfile(drive, &args, NULL);
+
+	err = ide_raw_taskfile(drive, &args, NULL);
+	if (err)
+		return err;
 
 	drive->wcache = arg;
 	return 0;
@@ -1412,6 +1476,7 @@ static void idedisk_setup (ide_drive_t *drive)
 {
 	struct hd_driveid *id = drive->id;
 	unsigned long long capacity;
+	int barrier;
 
 	idedisk_add_settings(drive);
 
@@ -1543,6 +1608,27 @@ static void idedisk_setup (ide_drive_t *drive)
 		drive->wcache = 1;
 
 	write_cache(drive, 1);
+
+	/*
+	 * decide if we can sanely support flushes and barriers on
+	 * this drive. unfortunately not all drives advertise FLUSH_CACHE
+	 * support even if they support it. So assume FLUSH_CACHE is there
+	 * always. LBA48 drives are newer, so expect it to flag support
+	 * properly. We can safely support FLUSH_CACHE on lba48, if capacity
+	 * doesn't exceed lba28
+	 */
+	barrier = 1;
+	if (drive->addressing == 1) {
+		if (capacity > (1ULL << 28) && !ide_id_has_flush_cache_ext(id))
+			barrier = 0;
+	}
+
+	printk("%s: cache flushes %ssupported\n",
+		drive->name, barrier ? "" : "not ");
+	if (barrier) {
+		blk_queue_ordered(drive->queue, 1);
+		blk_queue_issue_flush_fn(drive->queue, idedisk_issue_flush);
+	}
 }
 
 static void ide_cacheflush_p(ide_drive_t *drive)
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ff5b16a87e26d4..ba620bc0c0e0bb 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,38 +54,77 @@
 #include <asm/io.h>
 #include <asm/bitops.h>
 
-/**
- *	ide_end_request		-	complete an IDE I/O
- *	@drive: IDE device for the I/O
- *	@uptodate: 
- *	@nr_sectors: number of sectors completed
- *
- *	This is our end_request wrapper function. We complete the I/O
- *	update random number input and dequeue the request, which if
- *	it was tagged may be out of order.
+static void ide_fill_flush_cmd(ide_drive_t *drive, struct request *rq)
+{
+	char *buf = rq->cmd;
+
+	/*
+	 * reuse cdb space for ata command
+	 */
+	memset(buf, 0, sizeof(rq->cmd));
+
+	rq->flags |= REQ_DRIVE_TASK | REQ_STARTED;
+	rq->buffer = buf;
+	rq->buffer[0] = WIN_FLUSH_CACHE;
+
+	if (ide_id_has_flush_cache_ext(drive->id) &&
+	    (drive->capacity64 >= (1UL << 28)))
+		rq->buffer[0] = WIN_FLUSH_CACHE_EXT;
+}
+
+/*
+ * preempt pending requests, and store this cache flush for immediate
+ * execution
  */
- 
-int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
+static struct request *ide_queue_flush_cmd(ide_drive_t *drive,
+					   struct request *rq, int post)
 {
-	struct request *rq;
-	unsigned long flags;
-	int ret = 1;
+	struct request *flush_rq = &HWGROUP(drive)->wrq;
 
-	spin_lock_irqsave(&ide_lock, flags);
-	rq = HWGROUP(drive)->rq;
+	/*
+	 * write cache disabled, clear the barrier bit and treat it like
+	 * an ordinary write
+	 */
+	if (!drive->wcache) {
+		rq->flags |= REQ_BAR_PREFLUSH;
+		return rq;
+	}
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	ide_init_drive_cmd(flush_rq);
+	ide_fill_flush_cmd(drive, flush_rq);
 
-	if (!nr_sectors)
-		nr_sectors = rq->hard_cur_sectors;
+	flush_rq->special = rq;
+	flush_rq->nr_sectors = rq->nr_sectors;
+
+	if (!post) {
+		drive->doing_barrier = 1;
+		flush_rq->flags |= REQ_BAR_PREFLUSH;
+		blkdev_dequeue_request(rq);
+	} else
+		flush_rq->flags |= REQ_BAR_POSTFLUSH;
+
+	__elv_add_request(drive->queue, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+	HWGROUP(drive)->rq = NULL;
+	return flush_rq;
+}
+
+static int __ide_end_request(ide_drive_t *drive, struct request *rq,
+			     int uptodate, int nr_sectors)
+{
+	int ret = 1;
+
+	BUG_ON(!(rq->flags & REQ_STARTED));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
 	 * complete the whole request right now
 	 */
-	if (blk_noretry_request(rq) && !uptodate)
+	if (blk_noretry_request(rq) && end_io_error(uptodate))
 		nr_sectors = rq->hard_nr_sectors;
 
+	if (!blk_fs_request(rq) && end_io_error(uptodate) && !rq->errors)
+		rq->errors = -EIO;
+
 	/*
 	 * decide whether to reenable DMA -- 3 is a random magic for now,
 	 * if we DMA timeout more than 3 times, just stay in PIO
@@ -97,15 +136,56 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
 
 	if (!end_that_request_first(rq, uptodate, nr_sectors)) {
 		add_disk_randomness(rq->rq_disk);
+
+		if (blk_rq_tagged(rq))
+			blk_queue_end_tag(drive->queue, rq);
+
 		blkdev_dequeue_request(rq);
 		HWGROUP(drive)->rq = NULL;
 		end_that_request_last(rq);
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
 	return ret;
 }
 
+/**
+ *	ide_end_request		-	complete an IDE I/O
+ *	@drive: IDE device for the I/O
+ *	@uptodate:
+ *	@nr_sectors: number of sectors completed
+ *
+ *	This is our end_request wrapper function. We complete the I/O
+ *	update random number input and dequeue the request, which if
+ *	it was tagged may be out of order.
+ */
+
+int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
+{
+	struct request *rq;
+	unsigned long flags;
+	int ret = 1;
+
+	spin_lock_irqsave(&ide_lock, flags);
+	rq = HWGROUP(drive)->rq;
+
+	if (!nr_sectors)
+		nr_sectors = rq->hard_cur_sectors;
+
+	if (!blk_barrier_rq(rq) || !drive->wcache)
+		ret = __ide_end_request(drive, rq, uptodate, nr_sectors);
+	else {
+		struct request *flush_rq = &HWGROUP(drive)->wrq;
+
+		flush_rq->nr_sectors -= nr_sectors;
+		if (!flush_rq->nr_sectors) {
+			ide_queue_flush_cmd(drive, rq, 1);
+			ret = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&ide_lock, flags);
+	return ret;
+}
 EXPORT_SYMBOL(ide_end_request);
 
 /**
@@ -137,6 +217,113 @@ static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
 	spin_unlock_irqrestore(&ide_lock, flags);
 }
 
+/*
+ * FIXME: probably move this somewhere else, name is bad too :)
+ */
+u64 ide_get_error_location(ide_drive_t *drive, char *args)
+{
+	u32 high, low;
+	u8 hcyl, lcyl, sect;
+	u64 sector;
+
+	high = 0;
+	hcyl = args[5];
+	lcyl = args[4];
+	sect = args[3];
+
+	if (ide_id_has_flush_cache_ext(drive->id)) {
+		low = (hcyl << 16) | (lcyl << 8) | sect;
+		HWIF(drive)->OUTB(drive->ctl|0x80, IDE_CONTROL_REG);
+		high = ide_read_24(drive);
+	} else {
+		u8 cur = HWIF(drive)->INB(IDE_SELECT_REG);
+		if (cur & 0x40)
+			low = (hcyl << 16) | (lcyl << 8) | sect;
+		else {
+			low = hcyl * drive->head * drive->sect;
+			low += lcyl * drive->sect;
+			low += sect - 1;
+		}
+	}
+
+	sector = ((u64) high << 24) | low;
+	return sector;
+}
+EXPORT_SYMBOL(ide_get_error_location);
+
+static void ide_complete_barrier(ide_drive_t *drive, struct request *rq,
+				 int error)
+{
+	struct request *real_rq = rq->special;
+	int good_sectors, bad_sectors;
+	sector_t sector;
+
+	if (!error) {
+		if (blk_barrier_postflush(rq)) {
+			/*
+			 * this completes the barrier write
+			 */
+			__ide_end_request(drive, real_rq, 1, real_rq->hard_nr_sectors);
+			drive->doing_barrier = 0;
+		} else {
+			/*
+			 * just indicate that we did the pre flush
+			 */
+			real_rq->flags |= REQ_BAR_PREFLUSH;
+			elv_requeue_request(drive->queue, real_rq);
+		}
+		/*
+		 * all is fine, return
+		 */
+		return;
+	}
+
+	/*
+	 * we need to end real_rq, but it's not on the queue currently.
+	 * put it back on the queue, so we don't have to special case
+	 * anything else for completing it
+	 */
+	if (!blk_barrier_postflush(rq))
+		elv_requeue_request(drive->queue, real_rq);
+
+	/*
+	 * drive aborted flush command, assume FLUSH_CACHE_* doesn't
+	 * work and disable barrier support
+	 */
+	if (error & ABRT_ERR) {
+		printk(KERN_ERR "%s: barrier support doesn't work\n", drive->name);
+		__ide_end_request(drive, real_rq, -EOPNOTSUPP, real_rq->hard_nr_sectors);
+		blk_queue_ordered(drive->queue, 0);
+		blk_queue_issue_flush_fn(drive->queue, NULL);
+	} else {
+		/*
+		 * find out what part of the request failed
+		 */
+		good_sectors = 0;
+		if (blk_barrier_postflush(rq)) {
+			sector = ide_get_error_location(drive, rq->buffer);
+
+			if ((sector >= real_rq->hard_sector) &&
+			    (sector < real_rq->hard_sector + real_rq->hard_nr_sectors))
+				good_sectors = sector - real_rq->hard_sector;
+		} else
+			sector = real_rq->hard_sector;
+
+		bad_sectors = real_rq->hard_nr_sectors - good_sectors;
+		if (good_sectors)
+			__ide_end_request(drive, real_rq, 1, good_sectors);
+		if (bad_sectors)
+			__ide_end_request(drive, real_rq, 0, bad_sectors);
+
+		printk(KERN_ERR "%s: failed barrier write: "
+				"sector=%Lx(good=%d/bad=%d)\n",
+				drive->name, (unsigned long long)sector,
+				good_sectors, bad_sectors);
+	}
+
+	drive->doing_barrier = 0;
+}
+
 /**
  *	ide_end_drive_cmd	-	end an explicit drive command
  *	@drive: command 
@@ -226,6 +413,10 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 
 	spin_lock_irqsave(&ide_lock, flags);
 	blkdev_dequeue_request(rq);
+
+	if (blk_barrier_preflush(rq) || blk_barrier_postflush(rq))
+		ide_complete_barrier(drive, rq, err);
+
 	HWGROUP(drive)->rq = NULL;
 	end_that_request_last(rq);
 	spin_unlock_irqrestore(&ide_lock, flags);
@@ -712,6 +903,22 @@ static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
 repeat:	
 	best = NULL;
 	drive = hwgroup->drive;
+
+	/*
+	 * drive is doing pre-flush, ordered write, post-flush sequence. even
+	 * though that is 3 requests, it must be seen as a single transaction.
+	 * we must not preempt this drive until that is complete
+	 */
+	if (drive->doing_barrier) {
+		/*
+		 * small race where queue could get replugged during
+		 * the 3-request flush cycle, just yank the plug since
+		 * we want it to finish asap
+		 */
+		blk_remove_plug(drive->queue);
+		return drive;
+	}
+
 	do {
 		if ((!drive->sleep || time_after_eq(jiffies, drive->sleep))
 		    && !elv_queue_empty(drive->queue)) {
@@ -868,6 +1075,13 @@ void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		}
 
 		/*
+		 * if rq is a barrier write, issue pre cache flush if not
+		 * already done
+		 */
+		if (blk_barrier_rq(rq) && !blk_barrier_preflush(rq))
+			rq = ide_queue_flush_cmd(drive, rq, 0);
+
+		/*
 		 * Sanity: don't accept a request that isn't a PM request
 		 * if we are currently power managed. This is very important as
 		 * blk_stop_queue() doesn't prevent the elv_next_request()
@@ -917,7 +1131,9 @@ EXPORT_SYMBOL(ide_do_request);
  */
 void do_ide_request(request_queue_t *q)
 {
-	ide_do_request(q->queuedata, IDE_NO_IRQ);
+	ide_drive_t *drive = q->queuedata;
+
+	ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
 }
 
 /*
@@ -1286,6 +1502,7 @@ void ide_init_drive_cmd (struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
 	rq->flags = REQ_DRIVE_CMD;
+	rq->ref_count = 1;
 }
 
 EXPORT_SYMBOL(ide_init_drive_cmd);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 4ffaf90163e068..1935045360fbf1 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -893,7 +893,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	if (!q)
 		return 1;
 
-	q->queuedata = HWGROUP(drive);
+	q->queuedata = drive;
 	blk_queue_segment_boundary(q, 0xffff);
 
 	if (!hwif->rqsize)
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index e99d9ec19bbe6e..ce97619211488b 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -437,6 +437,32 @@ u8 ide_dump_status (ide_drive_t *drive, const char *msg, u8 stat)
 #endif	/* FANCY_STATUS_DUMPS */
 		printk("\n");
 	}
+	{
+		struct request *rq;
+		int opcode = 0x100;
+
+		spin_lock(&ide_lock);
+		rq = 0;
+		if (HWGROUP(drive))
+			rq = HWGROUP(drive)->rq;
+		spin_unlock(&ide_lock);
+		if (!rq)
+			goto out;
+		if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+			char *args = rq->buffer;
+			if (args)
+				opcode = args[0];
+		} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+			ide_task_t *args = rq->special;
+			if (args) {
+				task_struct_t *tf = (task_struct_t *) args->tfRegister;
+				opcode = tf->command;
+			}
+		}
+
+		printk("ide: failed opcode was %x\n", opcode);
+	}
+out:
 	local_irq_restore(flags);
 	return err;
 }
author	Jens Axboe <axboe@suse.de>	2004-08-22 22:37:19 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-08-22 22:37:19 -0700
commit	5e9dbcaa2c752116b116bda619cf2e5438757e6a (patch)
tree	cfef5aa97d34f24ce0574ca9ebc570e52dc38df0 /drivers
parent	d2a8285687638fad1fb6b4b5e7e809330e21b1eb (diff)
download	history-5e9dbcaa2c752116b116bda619cf2e5438757e6a.tar.gz