From: Jens Axboe <axboe@suse.de>

ide bits
DESC
disk-barrier-ide-symbol-expoprt
EDESC
DESC
disk-barrier ide warning fix
EDESC

drivers/ide/ide-io.c: In function `ide_complete_barrier':
drivers/ide/ide-io.c:301: warning: long long unsigned int format, different type arg (arg 3)

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/drivers/ide/ide-disk.c  |   44 +++++++
 25-akpm/drivers/ide/ide-io.c    |  231 ++++++++++++++++++++++++++++++++++++----
 25-akpm/drivers/ide/ide-probe.c |    2 
 25-akpm/include/linux/ide.h     |    6 +
 4 files changed, 260 insertions(+), 23 deletions(-)

diff -puN drivers/ide/ide-disk.c~disk-barrier-ide drivers/ide/ide-disk.c
--- 25/drivers/ide/ide-disk.c~disk-barrier-ide	2004-06-26 13:51:57.801753032 -0700
+++ 25-akpm/drivers/ide/ide-disk.c	2004-06-26 13:51:57.811751512 -0700
@@ -1203,6 +1203,41 @@ static ide_proc_entry_t idedisk_proc[] =
 
 #endif	/* CONFIG_PROC_FS */
 
+static int idedisk_issue_flush(request_queue_t *q, struct gendisk *disk,
+			       sector_t *error_sector)
+{
+	ide_drive_t *drive = q->queuedata;
+	struct request *rq;
+	int ret;
+
+	if (!drive->wcache)
+		return 0;
+
+	rq = blk_get_request(q, WRITE, __GFP_WAIT);
+
+	memset(rq->cmd, 0, sizeof(rq->cmd));
+
+	if ((drive->id->cfs_enable_2 & 0x2400) == 0x2400)
+		rq->cmd[0] = WIN_FLUSH_CACHE_EXT;
+	else
+		rq->cmd[0] = WIN_FLUSH_CACHE;
+
+
+	rq->flags |= REQ_DRIVE_TASK | REQ_SOFTBARRIER;
+	rq->buffer = rq->cmd;
+
+	ret = blk_execute_rq(q, disk, rq);
+
+	/*
+	 * if we failed and caller wants error offset, get it
+	 */
+	if (ret && error_sector)
+		*error_sector = ide_get_error_location(drive, rq->cmd);
+
+	blk_put_request(rq);
+	return ret;
+}
+
 /*
  * This is tightly woven into the driver->do_special can not touch.
  * DON'T do it again until a total personality rewrite is committed.
@@ -1241,6 +1276,7 @@ static int set_nowerr(ide_drive_t *drive
 static int write_cache (ide_drive_t *drive, int arg)
 {
 	ide_task_t args;
+	int err;
 
 	if (!ide_id_has_flush_cache(drive->id))
 		return 1;
@@ -1251,7 +1287,10 @@ static int write_cache (ide_drive_t *dri
 	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_SETFEATURES;
 	args.command_type			= IDE_DRIVE_TASK_NO_DATA;
 	args.handler				= &task_no_data_intr;
-	(void) ide_raw_taskfile(drive, &args, NULL);
+
+	err = ide_raw_taskfile(drive, &args, NULL);
+	if (err)
+		return err;
 
 	drive->wcache = arg;
 	return 0;
@@ -1543,6 +1582,9 @@ static void idedisk_setup (ide_drive_t *
 		drive->wcache = 1;
 
 	write_cache(drive, 1);
+
+	blk_queue_ordered(drive->queue, 1);
+	blk_queue_issue_flush_fn(drive->queue, idedisk_issue_flush);
 }
 
 static void ide_cacheflush_p(ide_drive_t *drive)
diff -puN drivers/ide/ide-io.c~disk-barrier-ide drivers/ide/ide-io.c
--- 25/drivers/ide/ide-io.c~disk-barrier-ide	2004-06-26 13:51:57.803752728 -0700
+++ 25-akpm/drivers/ide/ide-io.c	2004-06-26 13:51:57.814751056 -0700
@@ -54,30 +54,62 @@
 #include <asm/io.h>
 #include <asm/bitops.h>
 
-/**
- *	ide_end_request		-	complete an IDE I/O
- *	@drive: IDE device for the I/O
- *	@uptodate: 
- *	@nr_sectors: number of sectors completed
- *
- *	This is our end_request wrapper function. We complete the I/O
- *	update random number input and dequeue the request, which if
- *	it was tagged may be out of order.
+static void ide_fill_flush_cmd(ide_drive_t *drive, struct request *rq)
+{
+	char *buf = rq->cmd;
+
+	/*
+	 * reuse cdb space for ata command
+	 */
+	memset(buf, 0, sizeof(rq->cmd));
+
+	rq->flags |= REQ_DRIVE_TASK | REQ_STARTED;
+	rq->buffer = buf;
+	rq->buffer[0] = WIN_FLUSH_CACHE;
+
+	if (drive->id->cfs_enable_2 & 0x2400)
+		rq->buffer[0] = WIN_FLUSH_CACHE_EXT;
+}
+
+/*
+ * preempt pending requests, and store this cache flush for immediate
+ * execution
  */
- 
-int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
+static struct request *ide_queue_flush_cmd(ide_drive_t *drive,
+					   struct request *rq, int post)
 {
-	struct request *rq;
-	unsigned long flags;
-	int ret = 1;
+	struct request *flush_rq = &HWGROUP(drive)->wrq;
 
-	spin_lock_irqsave(&ide_lock, flags);
-	rq = HWGROUP(drive)->rq;
+	/*
+	 * write cache disabled, just return barrier write immediately
+	 */
+	if (!drive->wcache)
+		return rq;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	ide_init_drive_cmd(flush_rq);
+	ide_fill_flush_cmd(drive, flush_rq);
 
-	if (!nr_sectors)
-		nr_sectors = rq->hard_cur_sectors;
+	flush_rq->special = rq;
+	flush_rq->nr_sectors = rq->nr_sectors;
+
+	if (!post) {
+		drive->doing_barrier = 1;
+		flush_rq->flags |= REQ_BAR_PREFLUSH;
+		blkdev_dequeue_request(rq);
+	} else
+		flush_rq->flags |= REQ_BAR_POSTFLUSH;
+
+	__elv_add_request(drive->queue, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+	HWGROUP(drive)->rq = NULL;
+	return flush_rq;
+}
+
+static int __ide_end_request(ide_drive_t *drive, struct request *rq,
+			     int uptodate, int nr_sectors)
+{
+	int ret = 1;
+
+	BUG_ON(!(rq->flags & REQ_STARTED));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
@@ -86,6 +118,9 @@ int ide_end_request (ide_drive_t *drive,
 	if (blk_noretry_request(rq) && !uptodate)
 		nr_sectors = rq->hard_nr_sectors;
 
+	if (!blk_fs_request(rq) && !uptodate && !rq->errors)
+		rq->errors = -EIO;
+
 	/*
 	 * decide whether to reenable DMA -- 3 is a random magic for now,
 	 * if we DMA timeout more than 3 times, just stay in PIO
@@ -97,15 +132,56 @@ int ide_end_request (ide_drive_t *drive,
 
 	if (!end_that_request_first(rq, uptodate, nr_sectors)) {
 		add_disk_randomness(rq->rq_disk);
+
+		if (blk_rq_tagged(rq))
+			blk_queue_end_tag(drive->queue, rq);
+
 		blkdev_dequeue_request(rq);
 		HWGROUP(drive)->rq = NULL;
 		end_that_request_last(rq);
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
 	return ret;
 }
 
+/**
+ *	ide_end_request		-	complete an IDE I/O
+ *	@drive: IDE device for the I/O
+ *	@uptodate:
+ *	@nr_sectors: number of sectors completed
+ *
+ *	This is our end_request wrapper function. We complete the I/O
+ *	update random number input and dequeue the request, which if
+ *	it was tagged may be out of order.
+ */
+
+int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
+{
+	struct request *rq;
+	unsigned long flags;
+	int ret = 1;
+
+	spin_lock_irqsave(&ide_lock, flags);
+	rq = HWGROUP(drive)->rq;
+
+	if (!nr_sectors)
+		nr_sectors = rq->hard_cur_sectors;
+
+	if (!blk_barrier_rq(rq))
+		ret = __ide_end_request(drive, rq, uptodate, nr_sectors);
+	else {
+		struct request *flush_rq = &HWGROUP(drive)->wrq;
+
+		flush_rq->nr_sectors -= nr_sectors;
+		if (!flush_rq->nr_sectors) {
+			ide_queue_flush_cmd(drive, rq, 1);
+			ret = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&ide_lock, flags);
+	return ret;
+}
 EXPORT_SYMBOL(ide_end_request);
 
 /**
@@ -137,6 +213,96 @@ static void ide_complete_pm_request (ide
 	spin_unlock_irqrestore(&ide_lock, flags);
 }
 
+/*
+ * FIXME: probably move this somewhere else, name is bad too :)
+ */
+u64 ide_get_error_location(ide_drive_t *drive, char *args)
+{
+	u32 high, low;
+	u8 hcyl, lcyl, sect;
+	u64 sector;
+
+	high = 0;
+	hcyl = args[5];
+	lcyl = args[4];
+	sect = args[3];
+
+	if (drive->id->cfs_enable_2 & 0x2400) {
+		low = (hcyl << 16) | (lcyl << 8) | sect;
+		HWIF(drive)->OUTB(drive->ctl|0x80, IDE_CONTROL_REG);
+		high = ide_read_24(drive);
+	} else {
+		u8 cur = HWIF(drive)->INB(IDE_SELECT_REG);
+		if (cur & 0x40)
+			low = (hcyl << 16) | (lcyl << 8) | sect;
+		else {
+			low = hcyl * drive->head * drive->sect;
+			low += lcyl * drive->sect;
+			low += sect - 1;
+		}
+	}
+
+	sector = ((u64) high << 24) | low;
+	return sector;
+}
+EXPORT_SYMBOL(ide_get_error_location);
+
+static void ide_complete_barrier(ide_drive_t *drive, struct request *rq,
+				 int error)
+{
+	struct request *real_rq = rq->special;
+	int good_sectors, bad_sectors;
+	sector_t sector;
+
+	if (!error) {
+		if (blk_barrier_postflush(rq)) {
+			/*
+			 * this completes the barrier write
+			 */
+			__ide_end_request(drive, real_rq, 1, real_rq->hard_nr_sectors);
+			drive->doing_barrier = 0;
+		} else {
+			/*
+			 * just indicate that we did the pre flush
+			 */
+			real_rq->flags |= REQ_BAR_PREFLUSH;
+			elv_requeue_request(drive->queue, real_rq);
+		}
+		/*
+		 * all is fine, return
+		 */
+		return;
+	}
+
+	/*
+	 * bummer, flush failed. if it was the pre-flush, fail the barrier.
+	 * if it was the post-flush, complete the succesful part of the request
+	 * and fail the rest
+	 */
+	good_sectors = 0;
+	if (blk_barrier_postflush(rq)) {
+		sector = ide_get_error_location(drive, rq->buffer);
+
+		if ((sector >= real_rq->hard_sector) &&
+		    (sector < real_rq->hard_sector + real_rq->hard_nr_sectors))
+			good_sectors = sector - real_rq->hard_sector;
+	} else
+		sector = real_rq->hard_sector;
+
+	bad_sectors = real_rq->hard_nr_sectors - good_sectors;
+	if (good_sectors)
+		__ide_end_request(drive, real_rq, 1, good_sectors);
+	if (bad_sectors)
+		__ide_end_request(drive, real_rq, 0, bad_sectors);
+
+	drive->doing_barrier = 0;
+
+	printk(KERN_ERR "%s: failed barrier write: "
+			"sector=%Lx(good=%d/bad=%d)\n",
+			drive->name, (unsigned long long)sector,
+			good_sectors, bad_sectors);
+}
+
 /**
  *	ide_end_drive_cmd	-	end an explicit drive command
  *	@drive: command 
@@ -226,6 +392,10 @@ void ide_end_drive_cmd (ide_drive_t *dri
 
 	spin_lock_irqsave(&ide_lock, flags);
 	blkdev_dequeue_request(rq);
+
+	if (blk_barrier_preflush(rq) || blk_barrier_postflush(rq))
+		ide_complete_barrier(drive, rq, err);
+
 	HWGROUP(drive)->rq = NULL;
 	end_that_request_last(rq);
 	spin_unlock_irqrestore(&ide_lock, flags);
@@ -712,6 +882,15 @@ static inline ide_drive_t *choose_drive 
 repeat:	
 	best = NULL;
 	drive = hwgroup->drive;
+
+	/*
+	 * drive is doing pre-flush, ordered write, post-flush sequence. even
+	 * though that is 3 requests, it must be seen as a single transaction.
+	 * we must not preempt this drive until that is complete
+	 */
+	if (drive->doing_barrier)
+		return drive;
+
 	do {
 		if ((!drive->sleep || time_after_eq(jiffies, drive->sleep))
 		    && !elv_queue_empty(drive->queue)) {
@@ -868,6 +1047,13 @@ void ide_do_request (ide_hwgroup_t *hwgr
 		}
 
 		/*
+		 * if rq is a barrier write, issue pre cache flush if not
+		 * already done
+		 */
+		if (blk_barrier_rq(rq) && !blk_barrier_preflush(rq))
+			rq = ide_queue_flush_cmd(drive, rq, 0);
+
+		/*
 		 * Sanity: don't accept a request that isn't a PM request
 		 * if we are currently power managed. This is very important as
 		 * blk_stop_queue() doesn't prevent the elv_next_request()
@@ -917,7 +1103,9 @@ EXPORT_SYMBOL(ide_do_request);
  */
 void do_ide_request(request_queue_t *q)
 {
-	ide_do_request(q->queuedata, IDE_NO_IRQ);
+	ide_drive_t *drive = q->queuedata;
+
+	ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
 }
 
 /*
@@ -1286,6 +1474,7 @@ void ide_init_drive_cmd (struct request 
 {
 	memset(rq, 0, sizeof(*rq));
 	rq->flags = REQ_DRIVE_CMD;
+	rq->ref_count = 1;
 }
 
 EXPORT_SYMBOL(ide_init_drive_cmd);
diff -puN drivers/ide/ide-probe.c~disk-barrier-ide drivers/ide/ide-probe.c
--- 25/drivers/ide/ide-probe.c~disk-barrier-ide	2004-06-26 13:51:57.805752424 -0700
+++ 25-akpm/drivers/ide/ide-probe.c	2004-06-26 13:51:57.815750904 -0700
@@ -893,7 +893,7 @@ static int ide_init_queue(ide_drive_t *d
 	if (!q)
 		return 1;
 
-	q->queuedata = HWGROUP(drive);
+	q->queuedata = drive;
 	blk_queue_segment_boundary(q, 0xffff);
 
 	if (!hwif->rqsize)
diff -puN include/linux/ide.h~disk-barrier-ide include/linux/ide.h
--- 25/include/linux/ide.h~disk-barrier-ide	2004-06-26 13:51:57.806752272 -0700
+++ 25-akpm/include/linux/ide.h	2004-06-26 13:51:57.816750752 -0700
@@ -780,6 +780,7 @@ typedef struct ide_drive_s {
 	u8	sect;		/* "real" sectors per track */
 	u8	bios_head;	/* BIOS/fdisk/LILO number of heads */
 	u8	bios_sect;	/* BIOS/fdisk/LILO sectors per track */
+	u8	doing_barrier;	/* state, 1=currently doing flush */
 
 	unsigned int	bios_cyl;	/* BIOS/fdisk/LILO number of cyls */
 	unsigned int	cyl;		/* "real" number of cyls */
@@ -1293,6 +1294,11 @@ extern ide_startstop_t ide_do_reset (ide
 extern void ide_init_drive_cmd (struct request *rq);
 
 /*
+ * this function returns error location sector offset in case of a write error
+ */
+extern u64 ide_get_error_location(ide_drive_t *, char *);
+
+/*
  * "action" parameter type for ide_do_drive_cmd() below.
  */
 typedef enum {
_