diff -urN /md0/kernels/2.4/v2.4.9-ac14/@ aio-v2.4.9-ac14.diff/@
--- /md0/kernels/2.4/v2.4.9-ac14/@	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/@	Mon Sep 24 19:09:13 2001
@@ -0,0 +1,397 @@
+/*
+   md_k.h : kernel internal structure of the Linux MD driver
+          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
+	  
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+   
+   You should have received a copy of the GNU General Public License
+   (for example /usr/src/linux/COPYING); if not, write to the Free
+   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
+*/
+
+#ifndef _MD_K_H
+#define _MD_K_H
+
+#include <linux/kernel.h>	// for panic()
+
+#define MD_RESERVED       0UL
+#define LINEAR            1UL
+#define STRIPED           2UL
+#define RAID0             STRIPED
+#define RAID1             3UL
+#define RAID5             4UL
+#define TRANSLUCENT       5UL
+#define HSM               6UL
+#define MAX_PERSONALITY   7UL
+
+static inline int pers_to_level (int pers)
+{
+	switch (pers) {
+		case HSM:		return -3;
+		case TRANSLUCENT:	return -2;
+		case LINEAR:		return -1;
+		case RAID0:		return 0;
+		case RAID1:		return 1;
+		case RAID5:		return 5;
+	}
+	panic("pers_to_level()");
+	return 0;
+}
+
+static inline int level_to_pers (int level)
+{
+	switch (level) {
+		case -3: return HSM;
+		case -2: return TRANSLUCENT;
+		case -1: return LINEAR;
+		case 0: return RAID0;
+		case 1: return RAID1;
+		case 4:
+		case 5: return RAID5;
+	}
+	return MD_RESERVED;
+}
+
+typedef struct mddev_s mddev_t;
+typedef struct mdk_rdev_s mdk_rdev_t;
+
+#if (MINORBITS != 8)
+#error MD doesnt handle bigger kdev yet
+#endif
+
+#define MAX_MD_DEVS  (1<<MINORBITS)	/* Max number of md dev */
+
+/*
+ * Maps a kdev to an mddev/subdev. How 'data' is handled is up to
+ * the personality. (eg. HSM uses this to identify individual LVs)
+ */
+typedef struct dev_mapping_s {
+	mddev_t *mddev;
+	void *data;
+} dev_mapping_t;
+
+extern dev_mapping_t mddev_map [MAX_MD_DEVS];
+
+static inline mddev_t * kdev_to_mddev (kdev_t dev)
+{
+	if (MAJOR(dev) != MD_MAJOR)
+		BUG();
+        return mddev_map[MINOR(dev)].mddev;
+}
+
+/*
+ * options passed in raidrun:
+ */
+
+#define MAX_CHUNK_SIZE (4096*1024)
+
+/*
+ * default readahead
+ */
+#define MD_READAHEAD	vm_max_readahead
+
+static inline int disk_faulty(mdp_disk_t * d)
+{
+	return d->state & (1 << MD_DISK_FAULTY);
+}
+
+static inline int disk_active(mdp_disk_t * d)
+{
+	return d->state & (1 << MD_DISK_ACTIVE);
+}
+
+static inline int disk_sync(mdp_disk_t * d)
+{
+	return d->state & (1 << MD_DISK_SYNC);
+}
+
+static inline int disk_spare(mdp_disk_t * d)
+{
+	return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
+}
+
+static inline int disk_removed(mdp_disk_t * d)
+{
+	return d->state & (1 << MD_DISK_REMOVED);
+}
+
+static inline void mark_disk_faulty(mdp_disk_t * d)
+{
+	d->state |= (1 << MD_DISK_FAULTY);
+}
+
+static inline void mark_disk_active(mdp_disk_t * d)
+{
+	d->state |= (1 << MD_DISK_ACTIVE);
+}
+
+static inline void mark_disk_sync(mdp_disk_t * d)
+{
+	d->state |= (1 << MD_DISK_SYNC);
+}
+
+static inline void mark_disk_spare(mdp_disk_t * d)
+{
+	d->state = 0;
+}
+
+static inline void mark_disk_removed(mdp_disk_t * d)
+{
+	d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
+}
+
+static inline void mark_disk_inactive(mdp_disk_t * d)
+{
+	d->state &= ~(1 << MD_DISK_ACTIVE);
+}
+
+static inline void mark_disk_nonsync(mdp_disk_t * d)
+{
+	d->state &= ~(1 << MD_DISK_SYNC);
+}
+
+/*
+ * MD's 'extended' device
+ */
+struct mdk_rdev_s
+{
+	struct md_list_head same_set;	/* RAID devices within the same set */
+	struct md_list_head all;	/* all RAID devices */
+	struct md_list_head pending;	/* undetected RAID devices */
+
+	kdev_t dev;			/* Device number */
+	kdev_t old_dev;			/*  "" when it was last imported */
+	unsigned long size;		/* Device size (in blocks) */
+	mddev_t *mddev;			/* RAID array if running */
+	unsigned long last_events;	/* IO event timestamp */
+
+	struct block_device *bdev;	/* block device handle */
+
+	mdp_super_t *sb;
+	unsigned long sb_offset;
+
+	int faulty;			/* if faulty do not issue IO requests */
+	int desc_nr;			/* descriptor index in the superblock */
+};
+
+
+/*
+ * disk operations in a working array:
+ */
+#define DISKOP_SPARE_INACTIVE	0
+#define DISKOP_SPARE_WRITE	1
+#define DISKOP_SPARE_ACTIVE	2
+#define DISKOP_HOT_REMOVE_DISK	3
+#define DISKOP_HOT_ADD_DISK	4
+
+typedef struct mdk_personality_s mdk_personality_t;
+
+struct mddev_s
+{
+	void				*private;
+	mdk_personality_t		*pers;
+	int				__minor;
+	mdp_super_t			*sb;
+	int				nb_dev;
+	struct md_list_head 		disks;
+	int				sb_dirty;
+	mdu_param_t			param;
+	int				ro;
+	unsigned long			curr_resync;	/* blocks scheduled */
+	unsigned long			resync_mark;	/* a recent timestamp */
+	unsigned long			resync_mark_cnt;/* blocks written at resync_mark */
+	char				*name;
+	int				recovery_running;
+	struct semaphore		reconfig_sem;
+	struct semaphore		recovery_sem;
+	struct semaphore		resync_sem;
+	atomic_t			active;
+
+	atomic_t			recovery_active; /* blocks scheduled, but not written */
+	md_wait_queue_head_t		recovery_wait;
+
+	struct md_list_head		all_mddevs;
+};
+
+struct mdk_personality_s
+{
+	char *name;
+	int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh);
+	int (*run)(mddev_t *mddev);
+	int (*stop)(mddev_t *mddev);
+	int (*status)(char *page, mddev_t *mddev);
+	int (*error_handler)(mddev_t *mddev, kdev_t dev);
+
+/*
+ * Some personalities (RAID-1, RAID-5) can have disks hot-added and
+ * hot-removed. Hot removal is different from failure. (failure marks
+ * a disk inactive, but the disk is still part of the array) The interface
+ * to such operations is the 'pers->diskop()' function, can be NULL.
+ *
+ * the diskop function can change the pointer pointing to the incoming
+ * descriptor, but must do so very carefully. (currently only
+ * SPARE_ACTIVE expects such a change)
+ */
+	int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
+
+	int (*stop_resync)(mddev_t *mddev);
+	int (*restart_resync)(mddev_t *mddev);
+	int (*sync_request)(mddev_t *mddev, unsigned long block_nr);
+};
+
+
+/*
+ * Currently we index md_array directly, based on the minor
+ * number. This will have to change to dynamic allocation
+ * once we start supporting partitioning of md devices.
+ */
+static inline int mdidx (mddev_t * mddev)
+{
+	return mddev->__minor;
+}
+
+static inline kdev_t mddev_to_kdev(mddev_t * mddev)
+{
+	return MKDEV(MD_MAJOR, mdidx(mddev));
+}
+
+extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
+extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
+
+/*
+ * iterates through some rdev ringlist. It's safe to remove the
+ * current 'rdev'. Dont touch 'tmp' though.
+ */
+#define ITERATE_RDEV_GENERIC(head,field,rdev,tmp)			\
+									\
+	for (tmp = head.next;						\
+		rdev = md_list_entry(tmp, mdk_rdev_t, field),		\
+			tmp = tmp->next, tmp->prev != &head		\
+		; )
+/*
+ * iterates through the 'same array disks' ringlist
+ */
+#define ITERATE_RDEV(mddev,rdev,tmp)					\
+	ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
+
+/*
+ * Same as above, but assumes that the device has rdev->desc_nr numbered
+ * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
+ */
+#define ITERATE_RDEV_ORDERED(mddev,rdev,i)				\
+	for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
+
+
+/*
+ * Iterates through all 'RAID managed disks'
+ */
+#define ITERATE_RDEV_ALL(rdev,tmp)					\
+	ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)
+
+/*
+ * Iterates through 'pending RAID disks'
+ */
+#define ITERATE_RDEV_PENDING(rdev,tmp)					\
+	ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
+
+/*
+ * iterates through all used mddevs in the system.
+ */
+#define ITERATE_MDDEV(mddev,tmp)					\
+									\
+	for (tmp = all_mddevs.next;					\
+		mddev = md_list_entry(tmp, mddev_t, all_mddevs),	\
+			tmp = tmp->next, tmp->prev != &all_mddevs	\
+		; )
+
+static inline int lock_mddev (mddev_t * mddev)
+{
+	return down_interruptible(&mddev->reconfig_sem);
+}
+
+static inline void unlock_mddev (mddev_t * mddev)
+{
+	up(&mddev->reconfig_sem);
+}
+
+#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
+				x = y; y = __tmp; } while (0)
+
+typedef struct mdk_thread_s {
+	void			(*run) (void *data);
+	void			*data;
+	md_wait_queue_head_t	wqueue;
+	unsigned long           flags;
+	struct completion	*event;
+	struct task_struct	*tsk;
+	const char		*name;
+} mdk_thread_t;
+
+#define THREAD_WAKEUP  0
+
+#define MAX_DISKNAME_LEN 64
+
+typedef struct dev_name_s {
+	struct md_list_head list;
+	kdev_t dev;
+	char namebuf [MAX_DISKNAME_LEN];
+	char *name;
+} dev_name_t;
+
+
+#define __wait_event_lock_irq(wq, condition, lock) 			\
+do {									\
+	wait_queue_t __wait;						\
+	init_waitqueue_entry(&__wait, current);				\
+									\
+	add_wait_queue(&wq, &__wait);					\
+	for (;;) {							\
+		set_current_state(TASK_UNINTERRUPTIBLE);		\
+		if (condition)						\
+			break;						\
+		spin_unlock_irq(&lock);					\
+		run_task_queue(&tq_disk);				\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	current->state = TASK_RUNNING;					\
+	remove_wait_queue(&wq, &__wait);				\
+} while (0)
+
+#define wait_event_lock_irq(wq, condition, lock) 			\
+do {									\
+	if (condition)	 						\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock);			\
+} while (0)
+
+
+#define __wait_disk_event(wq, condition) 				\
+do {									\
+	wait_queue_t __wait;						\
+	init_waitqueue_entry(&__wait, current);				\
+									\
+	add_wait_queue(&wq, &__wait);					\
+	for (;;) {							\
+		set_current_state(TASK_UNINTERRUPTIBLE);		\
+		if (condition)						\
+			break;						\
+		run_task_queue(&tq_disk);				\
+		schedule();						\
+	}								\
+	current->state = TASK_RUNNING;					\
+	remove_wait_queue(&wq, &__wait);				\
+} while (0)
+
+#define wait_disk_event(wq, condition) 					\
+do {									\
+	if (condition)	 						\
+		break;							\
+	__wait_disk_event(wq, condition);				\
+} while (0)
+
+#endif 
+
diff -urN /md0/kernels/2.4/v2.4.9-ac14/MAINTAINERS aio-v2.4.9-ac14.diff/MAINTAINERS
--- /md0/kernels/2.4/v2.4.9-ac14/MAINTAINERS	Mon Sep 24 02:14:12 2001
+++ aio-v2.4.9-ac14.diff/MAINTAINERS	Mon Sep 24 19:09:13 2001
@@ -201,6 +201,12 @@
 L:	linux-net@vger.kernel.org
 S:	Maintained
 
+ASYNC IO
+P:	Benjamin LaHaise
+M:	bcrl@redhat.com
+L:	linux-aio@kvack.org
+S:	Maintained
+
 AX.25 NETWORK LAYER
 P:	Matthias Welwarsky
 M:	dg2fef@afthd.tu-darmstadt.de
diff -urN /md0/kernels/2.4/v2.4.9-ac14/Makefile aio-v2.4.9-ac14.diff/Makefile
--- /md0/kernels/2.4/v2.4.9-ac14/Makefile	Mon Sep 24 02:14:12 2001
+++ aio-v2.4.9-ac14.diff/Makefile	Mon Sep 24 19:09:29 2001
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 9
-EXTRAVERSION = -ac14
+EXTRAVERSION = -ac14-aio
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
diff -urN /md0/kernels/2.4/v2.4.9-ac14/arch/i386/kernel/entry.S aio-v2.4.9-ac14.diff/arch/i386/kernel/entry.S
--- /md0/kernels/2.4/v2.4.9-ac14/arch/i386/kernel/entry.S	Mon Sep 24 02:14:12 2001
+++ aio-v2.4.9-ac14.diff/arch/i386/kernel/entry.S	Mon Sep 24 19:09:13 2001
@@ -626,6 +626,12 @@
 	.long SYMBOL_NAME(sys_getdents64)	/* 220 */
 	.long SYMBOL_NAME(sys_fcntl64)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for TUX */
+	.long SYMBOL_NAME(sys___io_setup)	/* 223 */
+	.long SYMBOL_NAME(sys___io_destroy)
+	.long SYMBOL_NAME(sys___io_getevents)
+	.long SYMBOL_NAME(sys___io_submit)
+	.long SYMBOL_NAME(sys___io_cancel)
+	.long SYMBOL_NAME(sys___io_wait)
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long SYMBOL_NAME(sys_ni_syscall)
diff -urN /md0/kernels/2.4/v2.4.9-ac14/drivers/char/raw.c aio-v2.4.9-ac14.diff/drivers/char/raw.c
--- /md0/kernels/2.4/v2.4.9-ac14/drivers/char/raw.c	Mon Sep 24 02:14:14 2001
+++ aio-v2.4.9-ac14.diff/drivers/char/raw.c	Mon Sep 24 21:33:09 2001
@@ -16,6 +16,8 @@
 #include <linux/capability.h>
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
+#include <linux/aio.h>
+#include <linux/slab.h>
 
 #define dprintk(x...) 
 
@@ -36,13 +38,18 @@
 int	raw_open(struct inode *, struct file *);
 int	raw_release(struct inode *, struct file *);
 int	raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
-
+int	raw_kvec_read(struct file *filp, kvec_cb_t cb, size_t size, loff_t pos);
+int	raw_kvec_write(struct file *filp, kvec_cb_t cb, size_t size, loff_t pos);
 
 static struct file_operations raw_fops = {
 	read:		raw_read,
 	write:		raw_write,
 	open:		raw_open,
 	release:	raw_release,
+	aio_read:	generic_file_aio_read,
+	aio_write:	generic_file_aio_write,
+	kvec_read:	raw_kvec_read,
+	kvec_write:	raw_kvec_write,
 };
 
 static struct file_operations raw_ctl_fops = {
@@ -130,7 +137,7 @@
 	 * the blocksize on a device which is already mounted.  
 	 */
 	
-	sector_size = 512;
+	sector_size = 2048;
 	if (is_mounted(rdev)) {
 		if (blksize_size[MAJOR(rdev)])
 			sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)];
@@ -260,7 +267,6 @@
 }
 
 
-
 ssize_t	raw_read(struct file *filp, char * buf, 
 		 size_t size, loff_t *offp)
 {
@@ -393,3 +399,83 @@
  out:	
 	return err;
 }
+
+static int raw_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos);
+int raw_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
+{
+	return raw_kvec_rw(file, READ, cb, size, pos);
+}
+
+int raw_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
+{
+	return raw_kvec_rw(file, WRITE, cb, size, pos);
+}
+
+int	raw_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos)
+{
+	int		err;
+	unsigned	minor;
+	kdev_t		dev;
+	unsigned long	limit, blocknr, blocks;
+
+	unsigned	sector_size, sector_bits, sector_mask;
+	unsigned	max_sectors;
+
+printk("raw: cb.kvec=%p\n", cb.vec);
+	pr_debug("raw_rw_kiovec: %p %d %d %p %d %d %Lu\n", filp, rw, nr, kiovec, flags, size, pos);
+	/*
+	 * First, a few checks on device size limits 
+	 */
+
+	minor = MINOR(filp->f_dentry->d_inode->i_rdev);
+	dev = to_kdev_t(raw_devices[minor].binding->bd_dev);
+	sector_size = raw_devices[minor].sector_size;
+	sector_bits = raw_devices[minor].sector_bits;
+	sector_mask = sector_size- 1;
+	max_sectors = 25000; //KIO_MAX_SECTORS >> (sector_bits - 9);
+	
+	if (blk_size[MAJOR(dev)])
+		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
+	else
+		limit = INT_MAX;
+	dprintk ("rw_raw_dev_async: dev %d:%d (+%d)\n",
+		 MAJOR(dev), MINOR(dev), limit);
+
+	err = -EINVAL;
+	if ((pos < 0) || (pos & sector_mask) || (size & sector_mask)) {
+		printk("pos/size wrong\n");
+		goto out;
+	}
+
+	err = -ENXIO;
+	if ((pos >> sector_bits) >= limit) {
+		printk("raw: %Lu > %lu, %d\n", pos >> sector_bits, limit, sector_bits);
+		goto out;
+	}
+
+	/*
+	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
+	 * unmapping the single kiobuf as we go to perform each chunk of
+	 * IO.  
+	 */
+
+	blocknr = pos >> sector_bits;
+	blocks = size >> sector_bits;
+	if (blocks > max_sectors)
+		blocks = max_sectors;
+	if (blocks > limit - blocknr)
+		blocks = limit - blocknr;
+	err = -ENXIO;
+	pr_debug("raw: !blocks %d %ld %ld\n", max_sectors, limit, blocknr);
+	if (!blocks)
+		goto out;
+
+printk("raw: cb.kvec=%p\n", cb.vec);
+	err = brw_kvec_async(rw, cb, dev, blocks, blocknr, sector_bits);
+	pr_debug("brw_kiovec_async: %d\n", err);
+
+out:
+	pr_debug("brw_kiovec_async: ret is %d\n", err);
+	return err;
+}
+
diff -urN /md0/kernels/2.4/v2.4.9-ac14/drivers/net/ns83820.c aio-v2.4.9-ac14.diff/drivers/net/ns83820.c
--- /md0/kernels/2.4/v2.4.9-ac14/drivers/net/ns83820.c	Mon Sep 24 02:14:14 2001
+++ aio-v2.4.9-ac14.diff/drivers/net/ns83820.c	Tue Sep 25 15:14:59 2001
@@ -1,7 +1,7 @@
-#define VERSION "0.11"
+#define VERSION "0.12pre"
 /* ns83820.c by Benjamin LaHaise <bcrl@redhat.com>
  *
- * $Revision: 1.34.2.2 $
+ * $Revision: 1.34.2.7 $
  *
  * Copyright 2001 Benjamin LaHaise.
  * Copyright 2001 Red Hat.
@@ -41,7 +41,9 @@
  *	20010827	0.10 - fix ia64 unaligned access.
  *	20010906	0.11 - accept all packets with checksum errors as
  *			       otherwise fragments get lost
-			     - fix >> 32 bugs
+ *			     - fix >> 32 bugs
+ *			0.12 - add statistics counters
+ *			     not yet- add multicast support
  *
  * Driver Overview
  * ===============
@@ -61,7 +63,9 @@
  *	Cameo		SOHO-GA2000T	SOHO-GA2500T
  *	D-Link		DGE-500T
  *	PureData	PDP8023Z-TG
- *	SMC		SMC9462TX
+ *	SMC		SMC9452TX	SMC9462TX
+ *
+ * Special thanks to SMC for providing hardware to test this driver on.
  *
  * Reports of success or failure would be greatly appreciated.
  */
@@ -80,16 +84,15 @@
 #include <linux/ip.h>	/* for iph */
 #include <linux/in.h>	/* for IPPROTO_... */
 #include <linux/eeprom.h>
+#include <linux/compiler.h>
 //#include <linux/skbrefill.h>
 
+#include <asm/io.h>
+
 /* Dprintk is used for more interesting debug events */
 #undef Dprintk
 #define	Dprintk			dprintk
 
-#if !defined(GCC_VERSION) || (GCC_VERSION < 2096)
-#define __builtin_expect(x,y)	(x)
-#endif
-
 #ifdef CONFIG_HIGHMEM64G
 #define USE_64BIT_ADDR
 #elif defined(__ia64__)
@@ -367,6 +370,7 @@
 
 struct ns83820 {
 	struct net_device	net_dev;
+	struct net_device_stats	stats;
 	u8			*base;
 
 	struct pci_dev		*pci_dev;
@@ -733,39 +737,22 @@
 			kfree_skb(skb);
 			skb = tmp;
 #endif
+			if (cmdsts & CMDSTS_DEST_MULTI)
+				dev->stats.multicast ++;
+			dev->stats.rx_packets ++;
+			dev->stats.rx_bytes += len;
 			if ((extsts & 0x002a0000) && !(extsts & 0x00540000)) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 			} else {
 				skb->ip_summed = CHECKSUM_NONE;
 			}
 			skb->protocol = eth_type_trans(skb, &dev->net_dev);
-			switch (netif_rx(skb)) {
-			case NET_RX_SUCCESS:
-				dev->ihr = 3;
-				break;
-			case NET_RX_CN_LOW:
-				dev->ihr = 3;
-				break;
-			case NET_RX_CN_MOD:
-				dev->ihr = dev->ihr + 1;
-				break;
-			case NET_RX_CN_HIGH:
-				dev->ihr += dev->ihr/2 + 1;
-				break;
-			case NET_RX_DROP:
-				dev->ihr = 255;
-				break;
-			}
-			if (dev->ihr > 255)
-				dev->ihr = 255;
+			if (NET_RX_DROP == netif_rx(skb))
+				dev->stats.rx_dropped ++;
 #ifndef __i386__
 		done:;
 #endif
 		} else {
-			static int err;
-			if (err++ < 20) {
-				Dprintk("error packet: cmdsts: %08x extsts: %08x\n", cmdsts, extsts);
-			}
 			kfree_skb(skb);
 		}
 
@@ -808,6 +795,13 @@
 	       !(CMDSTS_OWN & (cmdsts = desc[CMDSTS])) ) {
 		struct sk_buff *skb;
 
+		if (cmdsts & CMDSTS_ERR)
+			dev->stats.tx_errors ++;
+		if (cmdsts & CMDSTS_OK)
+			dev->stats.tx_packets ++;
+		if (cmdsts & CMDSTS_OK)
+			dev->stats.tx_bytes += cmdsts & 0xffff;
+
 		dprintk("tx_done_idx=%d free_idx=%d cmdsts=%08x\n",
 			tx_done_idx, dev->tx_free_idx, desc[CMDSTS]);
 		skb = dev->tx_skbs[tx_done_idx];
@@ -986,6 +980,35 @@
 	return 0;
 }
 
+static void ns83820_update_stats(struct ns83820 *dev)
+{
+	u8 *base = dev->base;
+
+	dev->stats.rx_errors		+= readl(base + 0x60) & 0xffff;
+	dev->stats.rx_crc_errors	+= readl(base + 0x64) & 0xffff;
+	dev->stats.rx_missed_errors	+= readl(base + 0x68) & 0xffff;
+	dev->stats.rx_frame_errors	+= readl(base + 0x6c) & 0xffff;
+	/*dev->stats.rx_symbol_errors +=*/ readl(base + 0x70);
+	dev->stats.rx_length_errors	+= readl(base + 0x74) & 0xffff;
+	dev->stats.rx_length_errors	+= readl(base + 0x78) & 0xffff;
+	/*dev->stats.rx_badopcode_errors += */ readl(base + 0x7c);
+	/*dev->stats.rx_pause_count += */  readl(base + 0x80);
+	/*dev->stats.tx_pause_count += */  readl(base + 0x84);
+	dev->stats.tx_carrier_errors	+= readl(base + 0x88) & 0xff;
+}
+
+static struct net_device_stats *ns83820_get_stats(struct net_device *_dev)
+{
+	struct ns83820 *dev = (void *)_dev;
+
+	/* somewhat overkill */
+	spin_lock_irq(&dev->misc_lock);
+	ns83820_update_stats(dev);
+	spin_unlock_irq(&dev->misc_lock);
+
+	return &dev->stats;
+}
+
 static void ns83820_irq(int foo, void *data, struct pt_regs *regs)
 {
 	struct ns83820 *dev = data;
@@ -1061,6 +1084,12 @@
 	if ((ISR_TXDESC | ISR_TXIDLE) & isr)
 		do_tx_done(dev);
 
+	if (ISR_MIB & isr) {
+		spin_lock(&dev->misc_lock);
+		ns83820_update_stats(dev);
+		spin_unlock(&dev->misc_lock);
+	}
+
 	if (ISR_PHY & isr)
 		phy_intr(dev);
 	}
@@ -1179,6 +1208,28 @@
 	return 0;
 }
 
+static void ns83820_set_multicast(struct net_device *_dev)
+{
+	struct ns83820 *dev = (void *)_dev;
+	u8 *rfcr = dev->base + RFCR;
+	u32 and_mask = 0xffffffff;
+	u32 or_mask = 0;
+
+	if (dev->net_dev.flags & IFF_PROMISC)
+		or_mask |= RFCR_AAU | RFCR_AAM;
+	else
+		and_mask &= ~(RFCR_AAU | RFCR_AAM);
+
+	if (dev->net_dev.flags & IFF_ALLMULTI)
+		or_mask |= RFCR_AAM;
+	else
+		and_mask &= ~RFCR_AAM;
+
+	spin_lock_irq(&dev->misc_lock);
+	writel((readl(rfcr) & and_mask) | or_mask, rfcr);
+	spin_unlock_irq(&dev->misc_lock);
+}
+
 static int ns83820_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 {
 	struct ns83820 *dev;
@@ -1242,6 +1293,9 @@
 	dev->net_dev.stop = ns83820_stop;
 	dev->net_dev.hard_start_xmit = ns83820_hard_start_xmit;
 	dev->net_dev.change_mtu = ns83820_change_mtu;
+	dev->net_dev.get_stats = ns83820_get_stats;
+	dev->net_dev.change_mtu = ns83820_change_mtu;
+	dev->net_dev.set_multicast_list = ns83820_set_multicast;
 	//FIXME: dev->net_dev.tx_timeout = ns83820_tx_timeout;
 
 	lock_kernel();
@@ -1425,5 +1479,6 @@
 MODULE_AUTHOR("Benjamin LaHaise <bcrl@redhat.com>");
 MODULE_DESCRIPTION("National Semiconductor DP83820 10/100/1000 driver");
 MODULE_DEVICE_TABLE(pci, pci_device_id);
+MODULE_LICENSE("GPL");
 module_init(ns83820_init);
 module_exit(ns83820_exit);
diff -urN /md0/kernels/2.4/v2.4.9-ac14/drivers/scsi/53c700-mem.c aio-v2.4.9-ac14.diff/drivers/scsi/53c700-mem.c
--- /md0/kernels/2.4/v2.4.9-ac14/drivers/scsi/53c700-mem.c	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/drivers/scsi/53c700-mem.c	Mon Sep 24 19:16:27 2001
@@ -0,0 +1,1842 @@
+/* WARNING: GENERATED FILE (from 53c700.c), DO NOT MODIFY */
+#define MEM_MAPPED
+/* -*- mode: c; c-basic-offset: 8 -*- */
+
+/* NCR (or Symbios) 53c700 and 53c700-66 Driver
+ *
+ * Copyright (C) 2001 by James.Bottomley@HansenPartnership.com
+**-----------------------------------------------------------------------------
+**  
+**  This program is free software; you can redistribute it and/or modify
+**  it under the terms of the GNU General Public License as published by
+**  the Free Software Foundation; either version 2 of the License, or
+**  (at your option) any later version.
+**
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+**
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**-----------------------------------------------------------------------------
+ */
+
+/* Notes:
+ *
+ * This driver is designed exclusively for these chips (virtually the
+ * earliest of the scripts engine chips).  They need their own drivers
+ * because they are missing so many of the scripts and snazzy register
+ * features of their elder brothers (the 710, 720 and 770).
+ *
+ * The 700 is the lowliest of the line, it can only do async SCSI.
+ * The 700-66 can at least do synchronous SCSI up to 10MHz.
+ * 
+ * The 700 chip has no host bus interface logic of its own.  However,
+ * it is usually mapped to a location with well defined register
+ * offsets.  Therefore, if you can determine the base address and the
+ * irq your board incorporating this chip uses, you can probably use
+ * this driver to run it (although you'll probably have to write a
+ * minimal wrapper for the purpose---see the NCR_D700 driver for
+ * details about how to do this).
+ *
+ *
+ * TODO List:
+ *
+ * 1. Better statistics in the proc fs
+ *
+ * 2. Implement message queue (queues SCSI messages like commands) and make
+ *    the abort and device reset functions use them.
+ * */
+
+/* CHANGELOG
+ *
+ * Version 2.3
+ *
+ * More endianness/cache coherency changes.
+ *
+ * Better bad device handling (handles devices lying about tag
+ * queueing support and devices which fail to provide sense data on
+ * contingent allegiance conditions)
+ *
+ * Many thanks to Richard Hirst <rhirst@linuxcare.com> for patiently
+ * debugging this driver on the parisc architecture and suggesting
+ * many improvements and bug fixes.
+ *
+ * Thanks also go to Linuxcare Inc. for providing several PARISC
+ * machines for me to debug the driver on.
+ *
+ * Version 2.2
+ *
+ * Made the driver mem or io mapped; added endian invariance; added
+ * dma cache flushing operations for architectures which need it;
+ * added support for more varied clocking speeds.
+ *
+ * Version 2.1
+ *
+ * Initial modularisation from the D700.  See NCR_D700.c for the rest of
+ * the changelog.
+ * */
+#define NCR_700_VERSION "2.3"
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/mca.h>
+#include <asm/dma.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/byteorder.h>
+#include <linux/blk.h>
+#include <linux/module.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#include "53c700.h"
+
+#ifdef NCR_700_DEBUG
+#define STATIC
+#else
+#define STATIC static
+#endif
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("53c700 and 53c700-66 Driver");
+MODULE_LICENSE("GPL");
+
+/* This is the script */
+#include "53c700_d.h"
+
+
+STATIC int NCR_700_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+STATIC int NCR_700_abort(Scsi_Cmnd * SCpnt);
+STATIC int NCR_700_bus_reset(Scsi_Cmnd * SCpnt);
+STATIC int NCR_700_dev_reset(Scsi_Cmnd * SCpnt);
+STATIC int NCR_700_host_reset(Scsi_Cmnd * SCpnt);
+STATIC int NCR_700_proc_directory_info(char *, char **, off_t, int, int, int);
+STATIC void NCR_700_chip_setup(struct Scsi_Host *host);
+STATIC void NCR_700_chip_reset(struct Scsi_Host *host);
+
+static char *NCR_700_phase[] = {
+	"",
+	"after selection",
+	"before command phase",
+	"after command phase",
+	"after status phase",
+	"after data in phase",
+	"after data out phase",
+	"during data phase",
+};
+
+static char *NCR_700_condition[] = {
+	"",
+	"NOT MSG_OUT",
+	"UNEXPECTED PHASE",
+	"NOT MSG_IN",
+	"UNEXPECTED MSG",
+	"MSG_IN",
+	"SDTR_MSG RECEIVED",
+	"REJECT_MSG RECEIVED",
+	"DISCONNECT_MSG RECEIVED",
+	"MSG_OUT",
+	"DATA_IN",
+	
+};
+
+static char *NCR_700_fatal_messages[] = {
+	"unexpected message after reselection",
+	"still MSG_OUT after message injection",
+	"not MSG_IN after selection",
+	"Illegal message length received",
+};
+
+static char *NCR_700_SBCL_bits[] = {
+	"IO ",
+	"CD ",
+	"MSG ",
+	"ATN ",
+	"SEL ",
+	"BSY ",
+	"ACK ",
+	"REQ ",
+};
+
+static char *NCR_700_SBCL_to_phase[] = {
+	"DATA_OUT",
+	"DATA_IN",
+	"CMD_OUT",
+	"STATE",
+	"ILLEGAL PHASE",
+	"ILLEGAL PHASE",
+	"MSG OUT",
+	"MSG IN",
+};
+
+static __u8 NCR_700_SDTR_msg[] = {
+	0x01,			/* Extended message */
+	0x03,			/* Extended message Length */
+	0x01,			/* SDTR Extended message */
+	NCR_700_MIN_PERIOD,
+	NCR_700_MAX_OFFSET
+};
+
+struct Scsi_Host * __init
+NCR_700_detect(Scsi_Host_Template *tpnt,
+	       struct NCR_700_Host_Parameters *hostdata)
+{
+	__u32 *script = kmalloc(sizeof(SCRIPT), GFP_KERNEL);
+	__u32 pScript;
+	struct Scsi_Host *host;
+	static int banner = 0;
+	int j;
+
+	/* Fill in the missing routines from the host template */
+	tpnt->queuecommand = NCR_700_queuecommand;
+	tpnt->eh_abort_handler = NCR_700_abort;
+	tpnt->eh_device_reset_handler = NCR_700_dev_reset;
+	tpnt->eh_bus_reset_handler = NCR_700_bus_reset;
+	tpnt->eh_host_reset_handler = NCR_700_host_reset;
+	tpnt->can_queue = NCR_700_COMMAND_SLOTS_PER_HOST;
+	tpnt->sg_tablesize = NCR_700_SG_SEGMENTS;
+	tpnt->cmd_per_lun = NCR_700_MAX_TAGS;
+	tpnt->use_clustering = DISABLE_CLUSTERING;
+	tpnt->use_new_eh_code = 1;
+	tpnt->proc_info = NCR_700_proc_directory_info;
+	
+	if(tpnt->name == NULL)
+		tpnt->name = "53c700";
+	if(tpnt->proc_name == NULL)
+		tpnt->proc_name = "53c700";
+	
+
+	if((host = scsi_register(tpnt, 4)) == NULL)
+		return NULL;
+	if(script == NULL) {
+		printk(KERN_ERR "53c700: Failed to allocate script, detatching\n");
+		scsi_unregister(host);
+		return NULL;
+	}
+
+	hostdata->slots = kmalloc(sizeof(struct NCR_700_command_slot) * NCR_700_COMMAND_SLOTS_PER_HOST, GFP_KERNEL);
+	if(hostdata->slots == NULL) {
+		printk(KERN_ERR "53c700: Failed to allocate command slots, detatching\n");
+		scsi_unregister(host);
+		return NULL;
+	}
+	memset(hostdata->slots, 0, sizeof(struct NCR_700_command_slot) * NCR_700_COMMAND_SLOTS_PER_HOST);
+	for(j = 0; j < NCR_700_COMMAND_SLOTS_PER_HOST; j++) {
+		if(j == 0)
+			hostdata->free_list = &hostdata->slots[j];
+		else
+			hostdata->slots[j-1].ITL_forw = &hostdata->slots[j];
+		hostdata->slots[j].state = NCR_700_SLOT_FREE;
+	}
+	host->hostdata[0] = (__u32)hostdata;
+	for(j = 0; j < sizeof(SCRIPT)/sizeof(SCRIPT[0]); j++) {
+		script[j] = bS_to_host(SCRIPT[j]);
+	}
+	/* bus physical address of script */
+	pScript = virt_to_bus(script);
+	/* adjust all labels to be bus physical */
+	for(j = 0; j < PATCHES; j++) {
+		script[LABELPATCHES[j]] = bS_to_host(pScript + SCRIPT[LABELPATCHES[j]]);
+	}
+	/* now patch up fixed addresses */
+	script_patch_32(script, MessageLocation,
+			virt_to_bus(&hostdata->msgout[0]));
+	script_patch_32(script, StatusAddress,
+			virt_to_bus(&hostdata->status));
+	script_patch_32(script, ReceiveMsgAddress,
+			virt_to_bus(&hostdata->msgin[0]));
+
+	hostdata->script = script;
+	hostdata->pScript = pScript;
+	hostdata->state = NCR_700_HOST_FREE;
+	spin_lock_init(&hostdata->lock);
+	hostdata->cmd = NULL;
+	host->max_id = 7;
+	host->max_lun = NCR_700_MAX_LUNS;
+	host->unique_id = hostdata->base;
+	host->base = hostdata->base;
+	host->hostdata[0] = (unsigned long)hostdata;
+	/* kick the chip */
+	NCR_700_writeb(0xff, host, CTEST9_REG);
+	hostdata->rev = (NCR_700_readb(host, CTEST7_REG)<<4) & 0x0f;
+	hostdata->fast = (NCR_700_readb(host, CTEST9_REG) == 0);
+	if(banner == 0) {
+		printk(KERN_NOTICE "53c700: Version " NCR_700_VERSION " By James.Bottomley@HansenPartnership.com\n");
+		banner = 1;
+	}
+	printk(KERN_NOTICE "scsi%d: %s rev %d %s\n", host->host_no,
+	       hostdata->fast ? "53c700-66" : "53c700",
+	       hostdata->rev, hostdata->differential ?
+	       "(Differential)" : "");
+	/* reset the chip */
+	NCR_700_chip_reset(host);
+	NCR_700_writeb(ASYNC_OPERATION , host, SXFER_REG);
+
+	return host;
+}
+
+int
+NCR_700_release(struct Scsi_Host *host)
+{
+	struct NCR_700_Host_Parameters *hostdata = 
+		(struct NCR_700_Host_Parameters *)host->hostdata[0];
+
+	kfree(hostdata->script);
+	return 1;
+}
+
+static inline __u8
+NCR_700_identify(int can_disconnect, __u8 lun)
+{
+	return IDENTIFY_BASE |
+		((can_disconnect) ? 0x40 : 0) |
+		(lun & NCR_700_LUN_MASK);
+}
+
+/*
+ * Function : static int datapath_residual (Scsi_Host *host)
+ *
+ * Purpose : return residual data count of what's in the chip.  If you
+ * really want to know what this function is doing, it's almost a
+ * direct transcription of the algorithm described in the 53c710
+ * guide, except that the DBC and DFIFO registers are only 6 bits
+ * wide.
+ *
+ * Inputs : host - SCSI host */
+static inline int
+NCR_700_data_residual (struct Scsi_Host *host) {
+	int count, synchronous;
+	unsigned int ddir;
+
+	count = ((NCR_700_readb(host, DFIFO_REG) & 0x3f) -
+		 (NCR_700_readl(host, DBC_REG) & 0x3f)) & 0x3f;
+	
+	synchronous = NCR_700_readb(host, SXFER_REG) & 0x0f;
+	
+	/* get the data direction */
+	ddir = NCR_700_readb(host, CTEST0_REG) & 0x01;
+
+	if (ddir) {
+		/* Receive */
+		if (synchronous) 
+			count += (NCR_700_readb(host, SSTAT2_REG) & 0xf0) >> 4;
+		else
+			if (NCR_700_readb(host, SSTAT1_REG) & SIDL_REG_FULL)
+				++count;
+	} else {
+		/* Send */
+		__u8 sstat = NCR_700_readb(host, SSTAT1_REG);
+		if (sstat & SODL_REG_FULL)
+			++count;
+		if (synchronous && (sstat & SODR_REG_FULL))
+			++count;
+	}
+	return count;
+}
+
+/* print out the SCSI wires and corresponding phase from the SBCL register
+ * in the chip */
+static inline char *
+sbcl_to_string(__u8 sbcl)
+{
+	int i;
+	static char ret[256];
+
+	ret[0]='\0';
+	for(i=0; i<8; i++) {
+		if((1<<i) & sbcl) 
+			strcat(ret, NCR_700_SBCL_bits[i]);
+	}
+	strcat(ret, NCR_700_SBCL_to_phase[sbcl & 0x07]);
+	return ret;
+}
+
+static inline __u8
+bitmap_to_number(__u8 bitmap)
+{
+	__u8 i;
+
+	for(i=0; i<8 && !(bitmap &(1<<i)); i++)
+		;
+	return i;
+}
+
+/* Pull a slot off the free list */
+STATIC struct NCR_700_command_slot *
+find_empty_slot(struct NCR_700_Host_Parameters *hostdata)
+{
+	struct NCR_700_command_slot *slot = hostdata->free_list;
+
+	if(slot == NULL) {
+		/* sanity check */
+		if(hostdata->command_slot_count != NCR_700_COMMAND_SLOTS_PER_HOST)
+			printk(KERN_ERR "SLOTS FULL, but count is %d, should be %d\n", hostdata->command_slot_count, NCR_700_COMMAND_SLOTS_PER_HOST);
+		return NULL;
+	}
+
+	if(slot->state != NCR_700_SLOT_FREE)
+		/* should panic! */
+		printk(KERN_ERR "BUSY SLOT ON FREE LIST!!!\n");
+		
+
+	hostdata->free_list = slot->ITL_forw;
+	slot->ITL_forw = NULL;
+
+
+	/* NOTE: set the state to busy here, not queued, since this
+	 * indicates the slot is in use and cannot be run by the IRQ
+	 * finish routine.  If we cannot queue the command when it
+	 * is properly build, we then change to NCR_700_SLOT_QUEUED */
+	slot->state = NCR_700_SLOT_BUSY;
+	hostdata->command_slot_count++;
+	
+	return slot;
+}
+
+STATIC void 
+free_slot(struct NCR_700_command_slot *slot,
+	  struct NCR_700_Host_Parameters *hostdata)
+{
+	int hash;
+	struct NCR_700_command_slot **forw, **back;
+
+
+	if((slot->state & NCR_700_SLOT_MASK) != NCR_700_SLOT_MAGIC) {
+		printk(KERN_ERR "53c700: SLOT %p is not MAGIC!!!\n", slot);
+	}
+	if(slot->state == NCR_700_SLOT_FREE) {
+		printk(KERN_ERR "53c700: SLOT %p is FREE!!!\n", slot);
+	}
+	/* remove from queues */
+	if(slot->tag != NCR_700_NO_TAG) {
+		hash = hash_ITLQ(slot->cmnd->target, slot->cmnd->lun,
+				 slot->tag);
+		if(slot->ITLQ_forw == NULL)
+			back = &hostdata->ITLQ_Hash_back[hash];
+		else
+			back = &slot->ITLQ_forw->ITLQ_back;
+
+		if(slot->ITLQ_back == NULL)
+			forw = &hostdata->ITLQ_Hash_forw[hash];
+		else
+			forw = &slot->ITLQ_back->ITLQ_forw;
+
+		*forw = slot->ITLQ_forw;
+		*back = slot->ITLQ_back;
+	}
+	hash = hash_ITL(slot->cmnd->target, slot->cmnd->lun);
+	if(slot->ITL_forw == NULL)
+		back = &hostdata->ITL_Hash_back[hash];
+	else
+		back = &slot->ITL_forw->ITL_back;
+	
+	if(slot->ITL_back == NULL)
+		forw = &hostdata->ITL_Hash_forw[hash];
+	else
+		forw = &slot->ITL_back->ITL_forw;
+	
+	*forw = slot->ITL_forw;
+	*back = slot->ITL_back;
+	
+	slot->resume_offset = 0;
+	slot->cmnd = NULL;
+	slot->state = NCR_700_SLOT_FREE;
+	slot->ITL_forw = hostdata->free_list;
+	hostdata->free_list = slot;
+	hostdata->command_slot_count--;
+}
+
+
+/* This routine really does very little.  The command is indexed on
+   the ITL and (if tagged) the ITLQ lists in _queuecommand */
+STATIC void
+save_for_reselection(struct NCR_700_Host_Parameters *hostdata,
+		     Scsi_Cmnd *SCp, __u32 dsp)
+{
+	/* Its just possible that this gets executed twice */
+	if(SCp != NULL) {
+		struct NCR_700_command_slot *slot =
+			(struct NCR_700_command_slot *)SCp->host_scribble;
+
+		slot->resume_offset = dsp;
+	}
+	hostdata->state = NCR_700_HOST_FREE;
+	hostdata->cmd = NULL;
+}
+
+/* Most likely nexus is the oldest in each case */
+STATIC inline struct NCR_700_command_slot *
+find_ITL_Nexus(struct NCR_700_Host_Parameters *hostdata, __u8 pun, __u8 lun)
+{
+	int hash = hash_ITL(pun, lun);
+	struct NCR_700_command_slot *slot = hostdata->ITL_Hash_back[hash];
+	while(slot != NULL && !(slot->cmnd->target == pun &&
+				slot->cmnd->lun == lun))
+		slot = slot->ITL_back;
+	return slot;
+}
+
+STATIC inline struct NCR_700_command_slot *
+find_ITLQ_Nexus(struct NCR_700_Host_Parameters *hostdata, __u8 pun,
+		__u8 lun, __u8 tag)
+{
+	int hash = hash_ITLQ(pun, lun, tag);
+	struct NCR_700_command_slot *slot = hostdata->ITLQ_Hash_back[hash];
+
+	while(slot != NULL && !(slot->cmnd->target == pun 
+	      && slot->cmnd->lun == lun && slot->tag == tag))
+		slot = slot->ITLQ_back;
+
+#ifdef NCR_700_TAG_DEBUG
+	if(slot != NULL) {
+		struct NCR_700_command_slot *n = slot->ITLQ_back;
+		while(n != NULL && n->cmnd->target != pun
+		      && n->cmnd->lun != lun && n->tag != tag)
+			n = n->ITLQ_back;
+
+		if(n != NULL && n->cmnd->target == pun && n->cmnd->lun == lun
+		   && n->tag == tag) {
+			printk(KERN_WARNING "53c700: WARNING: DUPLICATE tag %d\n",
+			       tag);
+		}
+	}
+#endif
+	return slot;
+}
+
+
+
+/* This translates the SDTR message offset and period to a value
+ * which can be loaded into the SXFER_REG.
+ *
+ * NOTE: According to SCSI-2, the true transfer period (in ns) is
+ *       actually four times this period value */
+STATIC inline __u8
+NCR_700_offset_period_to_sxfer(struct NCR_700_Host_Parameters *hostdata,
+			       __u8 offset, __u8 period)
+{
+	int XFERP;
+
+	if(period*4 < NCR_700_MIN_PERIOD) {
+		printk(KERN_WARNING "53c700: Period %dns is less than SCSI-2 minimum, setting to %d\n", period*4, NCR_700_MIN_PERIOD);
+		period = NCR_700_MIN_PERIOD/4;
+	}
+	XFERP = (period*4 * hostdata->sync_clock)/1000 - 4;
+	if(offset > NCR_700_MAX_OFFSET) {
+		printk(KERN_WARNING "53c700: Offset %d exceeds maximum, setting to %d\n",
+		       offset, NCR_700_MAX_OFFSET);
+		offset = NCR_700_MAX_OFFSET;
+	}
+	if(XFERP < NCR_700_MIN_XFERP) {
+		printk(KERN_WARNING "53c700: XFERP %d is less than minium, setting to %d\n",
+		       XFERP,  NCR_700_MIN_XFERP);
+		XFERP =  NCR_700_MIN_XFERP;
+	}
+	return (offset & 0x0f) | (XFERP & 0x07)<<4;
+}
+	
+
+STATIC inline void
+NCR_700_scsi_done(struct NCR_700_Host_Parameters *hostdata,
+	       Scsi_Cmnd *SCp, int result)
+{
+	hostdata->state = NCR_700_HOST_FREE;
+	hostdata->cmd = NULL;
+
+	if(SCp != NULL) {
+		struct NCR_700_command_slot *slot = 
+			(struct NCR_700_command_slot *)SCp->host_scribble;
+
+		if(SCp->cmnd[0] == REQUEST_SENSE && SCp->cmnd[6] == NCR_700_INTERNAL_SENSE_MAGIC) {
+#ifdef NCR_700_DEBUG
+			printk(" ORIGINAL CMD %p RETURNED %d, new return is %d sense is",
+			       SCp, SCp->cmnd[7], result);
+			print_sense("53c700", SCp);
+#endif
+			if(result == 0)
+				result = SCp->cmnd[7];
+		}
+			
+		free_slot(slot, hostdata);
+
+		SCp->host_scribble = NULL;
+		SCp->result = result;
+		SCp->scsi_done(SCp);
+		if(NCR_700_get_depth(SCp->device) == 0 ||
+		   NCR_700_get_depth(SCp->device) > NCR_700_MAX_TAGS)
+			printk(KERN_ERR "Invalid depth in NCR_700_scsi_done(): %d\n",
+			       NCR_700_get_depth(SCp->device));
+		NCR_700_set_depth(SCp->device, NCR_700_get_depth(SCp->device) - 1);
+	} else {
+		printk(KERN_ERR "53c700: SCSI DONE HAS NULL SCp\n");
+	}
+}
+
+
+STATIC void
+NCR_700_internal_bus_reset(struct Scsi_Host *host)
+{
+	/* Bus reset */
+	NCR_700_writeb(ASSERT_RST, host, SCNTL1_REG);
+	udelay(50);
+	NCR_700_writeb(0, host, SCNTL1_REG);
+
+}
+
+STATIC void
+NCR_700_chip_setup(struct Scsi_Host *host)
+{
+	struct NCR_700_Host_Parameters *hostdata = 
+		(struct NCR_700_Host_Parameters *)host->hostdata[0];
+
+	NCR_700_writeb(1 << host->this_id, host, SCID_REG);
+	NCR_700_writeb(0, host, SBCL_REG);
+	NCR_700_writeb(0, host, SXFER_REG);
+
+	NCR_700_writeb(PHASE_MM_INT | SEL_TIMEOUT_INT | GROSS_ERR_INT | UX_DISC_INT
+	     | RST_INT | PAR_ERR_INT | SELECT_INT, host, SIEN_REG);
+
+	NCR_700_writeb(ABORT_INT | INT_INST_INT | ILGL_INST_INT, host, DIEN_REG);
+	NCR_700_writeb(BURST_LENGTH_8, host, DMODE_REG);
+	NCR_700_writeb(FULL_ARBITRATION | PARITY | AUTO_ATN, host, SCNTL0_REG);
+	NCR_700_writeb(LAST_DIS_ENBL | ENABLE_ACTIVE_NEGATION|GENERATE_RECEIVE_PARITY,
+	     host, CTEST8_REG);
+	NCR_700_writeb(ENABLE_SELECT, host, SCNTL1_REG);
+	if(hostdata->clock > 75) {
+		printk(KERN_ERR "53c700: Clock speed %dMHz is too high: 75Mhz is the maximum this chip can be driven at\n", hostdata->clock);
+		/* do the best we can, but the async clock will be out
+		 * of spec: sync divider 2, async divider 3 */
+		DEBUG(("53c700: sync 2 async 3\n"));
+		NCR_700_writeb(SYNC_DIV_2_0, host, SBCL_REG);
+		NCR_700_writeb(ASYNC_DIV_3_0, host, DCNTL_REG);
+		hostdata->sync_clock = hostdata->clock/2;
+	} else	if(hostdata->clock > 50  && hostdata->clock <= 75) {
+		/* sync divider 1.5, async divider 3 */
+		DEBUG(("53c700: sync 1.5 async 3\n"));
+		NCR_700_writeb(SYNC_DIV_1_5, host, SBCL_REG);
+		NCR_700_writeb(ASYNC_DIV_3_0, host, DCNTL_REG);
+		hostdata->sync_clock = hostdata->clock*2;
+		hostdata->sync_clock /= 3;
+		
+	} else if(hostdata->clock > 37 && hostdata->clock <= 50) {
+		/* sync divider 1, async divider 2 */
+		DEBUG(("53c700: sync 1 async 2\n"));
+		NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+		NCR_700_writeb(ASYNC_DIV_2_0, host, DCNTL_REG);
+		hostdata->sync_clock = hostdata->clock;
+	} else if(hostdata->clock > 25 && hostdata->clock <=37) {
+		/* sync divider 1, async divider 1.5 */
+		DEBUG(("53c700: sync 1 async 1.5\n"));
+		NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+		NCR_700_writeb(ASYNC_DIV_1_5, host, DCNTL_REG);
+		hostdata->sync_clock = hostdata->clock;
+	} else {
+		DEBUG(("53c700: sync 1 async 1\n"));
+		NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+		NCR_700_writeb(ASYNC_DIV_1_0, host, DCNTL_REG);
+		/* sync divider 1, async divider 1 */
+	}
+}
+
+STATIC void
+NCR_700_chip_reset(struct Scsi_Host *host)
+{
+	/* Chip reset */
+	NCR_700_writeb(SOFTWARE_RESET, host, DCNTL_REG);
+	udelay(100);
+
+	NCR_700_writeb(0, host, DCNTL_REG);
+
+	mdelay(1000);
+
+	NCR_700_chip_setup(host);
+}
+
+/* The heart of the message processing engine is that the instruction
+ * immediately after the INT is the normal case (and so must be CLEAR
+ * ACK).  If we want to do something else, we call that routine in
+ * scripts and set temp to be the normal case + 8 (skipping the CLEAR
+ * ACK) so that the routine returns correctly to resume its activity
+ * */
+STATIC __u32
+process_extended_message(struct Scsi_Host *host, 
+			 struct NCR_700_Host_Parameters *hostdata,
+			 Scsi_Cmnd *SCp, __u32 dsp, __u32 dsps)
+{
+	__u32 resume_offset = dsp, temp = dsp + 8;
+	__u8 pun = 0xff, lun = 0xff;
+
+	if(SCp != NULL) {
+		pun = SCp->target;
+		lun = SCp->lun;
+	}
+
+	switch(hostdata->msgin[2]) {
+	case A_SDTR_MSG:
+		if(SCp != NULL && NCR_700_is_flag_set(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION)) {
+			__u8 period = hostdata->msgin[3];
+			__u8 offset = hostdata->msgin[4];
+			__u8 sxfer;
+
+			if(offset != 0 && period != 0)
+				sxfer = NCR_700_offset_period_to_sxfer(hostdata, offset, period);
+			else 
+				sxfer = 0;
+			
+			if(sxfer != NCR_700_get_SXFER(SCp->device)) {
+				printk(KERN_INFO "scsi%d: (%d:%d) Synchronous at offset %d, period %dns\n",
+				       host->host_no, pun, lun,
+				       offset, period*4);
+				
+				NCR_700_set_SXFER(SCp->device, sxfer);
+			}
+			
+
+			NCR_700_set_flag(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC);
+			NCR_700_clear_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+			
+			NCR_700_writeb(NCR_700_get_SXFER(SCp->device),
+				       host, SXFER_REG);
+
+		} else {
+			/* SDTR message out of the blue, reject it */
+			printk(KERN_WARNING "scsi%d Unexpected SDTR msg\n",
+			       host->host_no);
+			hostdata->msgout[0] = A_REJECT_MSG;
+			dma_cache_wback((unsigned long)hostdata->msgout, sizeof(hostdata->msgout));
+			script_patch_16(hostdata->script, MessageCount, 1);
+			/* SendMsgOut returns, so set up the return
+			 * address */
+			resume_offset = hostdata->pScript + Ent_SendMessageWithATN;
+		}
+		break;
+	
+	case A_WDTR_MSG:
+		printk(KERN_INFO "scsi%d: (%d:%d), Unsolicited WDTR after CMD, Rejecting\n",
+		       host->host_no, pun, lun);
+		hostdata->msgout[0] = A_REJECT_MSG;
+		dma_cache_wback((unsigned long)hostdata->msgout, sizeof(hostdata->msgout));
+		script_patch_16(hostdata->script, MessageCount, 1);
+		resume_offset = hostdata->pScript + Ent_SendMessageWithATN;
+
+		break;
+
+	default:
+		printk(KERN_INFO "scsi%d (%d:%d): Unexpected message %s: ",
+		       host->host_no, pun, lun,
+		       NCR_700_phase[(dsps & 0xf00) >> 8]);
+		print_msg(hostdata->msgin);
+		printk("\n");
+		/* just reject it */
+		hostdata->msgout[0] = A_REJECT_MSG;
+		dma_cache_wback((unsigned long)hostdata->msgout, sizeof(hostdata->msgout));
+		script_patch_16(hostdata->script, MessageCount, 1);
+		/* SendMsgOut returns, so set up the return
+		 * address */
+		resume_offset = hostdata->pScript + Ent_SendMessageWithATN;
+	}
+	NCR_700_writel(temp, host, TEMP_REG);
+	return resume_offset;
+}
+
+STATIC __u32
+process_message(struct Scsi_Host *host,	struct NCR_700_Host_Parameters *hostdata,
+		Scsi_Cmnd *SCp, __u32 dsp, __u32 dsps)
+{
+	/* work out where to return to */
+	__u32 temp = dsp + 8, resume_offset = dsp;
+	__u8 pun = 0xff, lun = 0xff;
+
+	dma_cache_inv((unsigned long)hostdata->msgin, sizeof(hostdata->msgin));
+
+	if(SCp != NULL) {
+		pun = SCp->target;
+		lun = SCp->lun;
+	}
+
+#ifdef NCR_700_DEBUG
+	printk("scsi%d (%d:%d): message %s: ", host->host_no, pun, lun,
+	       NCR_700_phase[(dsps & 0xf00) >> 8]);
+	print_msg(hostdata->msgin);
+	printk("\n");
+#endif
+
+	switch(hostdata->msgin[0]) {
+
+	case A_EXTENDED_MSG:
+		return process_extended_message(host, hostdata, SCp,
+						dsp, dsps);
+
+	case A_REJECT_MSG:
+		if(SCp != NULL && NCR_700_is_flag_set(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION)) {
+			/* Rejected our sync negotiation attempt */
+			NCR_700_set_SXFER(SCp->device, 0);
+			NCR_700_set_flag(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC);
+			NCR_700_clear_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+		} else if(SCp != NULL && NCR_700_is_flag_set(SCp->device, NCR_700_DEV_BEGIN_TAG_QUEUEING)) {
+			/* rejected our first simple tag message */
+			printk(KERN_WARNING "scsi%d (%d:%d) Rejected first tag queue attempt, turning off tag queueing\n", host->host_no, pun, lun);
+			NCR_700_clear_flag(SCp->device, NCR_700_DEV_BEGIN_TAG_QUEUEING);
+			hostdata->tag_negotiated &= ~(1<<SCp->target);
+		} else {
+			printk(KERN_WARNING "scsi%d (%d:%d) Unexpected REJECT Message %s\n",
+			       host->host_no, pun, lun,
+			       NCR_700_phase[(dsps & 0xf00) >> 8]);
+			/* however, just ignore it */
+		}
+		break;
+
+	case A_PARITY_ERROR_MSG:
+		printk(KERN_ERR "scsi%d (%d:%d) Parity Error!\n", host->host_no,
+		       pun, lun);
+		NCR_700_internal_bus_reset(host);
+		break;
+	case A_SIMPLE_TAG_MSG:
+		printk(KERN_INFO "scsi%d (%d:%d) SIMPLE TAG %d %s\n", host->host_no,
+		       pun, lun, hostdata->msgin[1],
+		       NCR_700_phase[(dsps & 0xf00) >> 8]);
+		/* just ignore it */
+		break;
+	default:
+		printk(KERN_INFO "scsi%d (%d:%d): Unexpected message %s: ",
+		       host->host_no, pun, lun,
+		       NCR_700_phase[(dsps & 0xf00) >> 8]);
+
+		print_msg(hostdata->msgin);
+		printk("\n");
+		/* just reject it */
+		hostdata->msgout[0] = A_REJECT_MSG;
+		dma_cache_wback((unsigned long)hostdata->msgout, sizeof(hostdata->msgout));
+		script_patch_16(hostdata->script, MessageCount, 1);
+		/* SendMsgOut returns, so set up the return
+		 * address */
+		resume_offset = hostdata->pScript + Ent_SendMessageWithATN;
+
+		break;
+	}
+	NCR_700_writel(temp, host, TEMP_REG);
+	return resume_offset;
+}
+
+STATIC __u32
+process_script_interrupt(__u32 dsps, __u32 dsp, Scsi_Cmnd *SCp,
+			 struct Scsi_Host *host,
+			 struct NCR_700_Host_Parameters *hostdata)
+{
+	__u32 resume_offset = 0;
+	__u8 pun = 0xff, lun=0xff;
+
+	if(SCp != NULL) {
+		pun = SCp->target;
+		lun = SCp->lun;
+	}
+
+	if(dsps == A_GOOD_STATUS_AFTER_STATUS) {
+		dma_cache_inv((unsigned long)hostdata->status, sizeof(hostdata->status));
+		DEBUG(("  COMMAND COMPLETE, status=%02x\n",
+		       hostdata->status));
+		/* OK, if TCQ still on, we know it works */
+		NCR_700_clear_flag(SCp->device, NCR_700_DEV_BEGIN_TAG_QUEUEING);
+		/* check for contingent allegiance contitions */
+		if(status_byte(hostdata->status) == CHECK_CONDITION ||
+		   status_byte(hostdata->status) == COMMAND_TERMINATED) {
+			struct NCR_700_command_slot *slot =
+				(struct NCR_700_command_slot *)SCp->host_scribble;
+			if(SCp->cmnd[0] == REQUEST_SENSE) {
+				/* OOPS: bad device, returning another
+				 * contingent allegiance condition */
+				printk(KERN_ERR "scsi%d (%d:%d) broken device is looping in contingent allegiance: ignoring\n", host->host_no, pun, lun);
+				NCR_700_scsi_done(hostdata, SCp, hostdata->status);
+			} else {
+
+				DEBUG(("  cmd %p has status %d, requesting sense\n",
+				       SCp, hostdata->status));
+				/* we can destroy the command here because the
+				 * contingent allegiance condition will cause a 
+				 * retry which will re-copy the command from the
+				 * saved data_cmnd */
+				SCp->cmnd[0] = REQUEST_SENSE;
+				SCp->cmnd[1] = (SCp->lun & 0x7) << 5;
+				SCp->cmnd[2] = 0;
+				SCp->cmnd[3] = 0;
+				SCp->cmnd[4] = sizeof(SCp->sense_buffer);
+				SCp->cmnd[5] = 0;
+				SCp->cmd_len = 6;
+				/* Here's a quiet hack: the REQUEST_SENSE command is
+				 * six bytes, so store a flag indicating that this
+				 * was an internal sense request and the original
+				 * status at the end of the command */
+				SCp->cmnd[6] = NCR_700_INTERNAL_SENSE_MAGIC;
+				SCp->cmnd[7] = hostdata->status;
+				slot->SG[0].ins = bS_to_host(SCRIPT_MOVE_DATA_IN | sizeof(SCp->sense_buffer));
+				slot->SG[0].pAddr = bS_to_host(virt_to_bus(SCp->sense_buffer));
+				slot->SG[1].ins = bS_to_host(SCRIPT_RETURN);
+				slot->SG[1].pAddr = 0;
+				slot->resume_offset = hostdata->pScript;
+				dma_cache_wback((unsigned long)slot->SG, sizeof(slot->SG[0])*2);
+				dma_cache_inv((unsigned long)SCp->sense_buffer, sizeof(SCp->sense_buffer));
+				
+				/* queue the command for reissue */
+				slot->state = NCR_700_SLOT_QUEUED;
+				hostdata->state = NCR_700_HOST_FREE;
+				hostdata->cmd = NULL;
+			}
+		} else {
+			if(status_byte(hostdata->status) == GOOD &&
+			   SCp->cmnd[0] == INQUIRY && SCp->use_sg == 0) {
+				/* Piggy back the tag queueing support
+				 * on this command */
+				if(((char *)SCp->request_buffer)[7] & 0x02) {
+					printk(KERN_INFO "scsi%d: (%d:%d) Enabling Tag Command Queuing\n", host->host_no, pun, lun);
+					hostdata->tag_negotiated |= (1<<SCp->target);
+					NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_TAG_QUEUEING);
+				} else {
+					NCR_700_clear_flag(SCp->device, NCR_700_DEV_BEGIN_TAG_QUEUEING);
+					hostdata->tag_negotiated &= ~(1<<SCp->target);
+				}
+			}
+			NCR_700_scsi_done(hostdata, SCp, hostdata->status);
+		}
+	} else if((dsps & 0xfffff0f0) == A_UNEXPECTED_PHASE) {
+		__u8 i = (dsps & 0xf00) >> 8;
+
+		printk(KERN_ERR "scsi%d: (%d:%d), UNEXPECTED PHASE %s (%s)\n",
+		       host->host_no, pun, lun,
+		       NCR_700_phase[i],
+		       sbcl_to_string(NCR_700_readb(host, SBCL_REG)));
+		printk(KERN_ERR "         len = %d, cmd =", SCp->cmd_len);
+		print_command(SCp->cmnd);
+
+		NCR_700_internal_bus_reset(host);
+	} else if((dsps & 0xfffff000) == A_FATAL) {
+		int i = (dsps & 0xfff);
+
+		printk(KERN_ERR "scsi%d: (%d:%d) FATAL ERROR: %s\n",
+		       host->host_no, pun, lun, NCR_700_fatal_messages[i]);
+		if(dsps == A_FATAL_ILLEGAL_MSG_LENGTH) {
+			printk(KERN_ERR "     msg begins %02x %02x\n",
+			       hostdata->msgin[0], hostdata->msgin[1]);
+		}
+		NCR_700_internal_bus_reset(host);
+	} else if((dsps & 0xfffff0f0) == A_DISCONNECT) {
+#ifdef NCR_700_DEBUG
+		__u8 i = (dsps & 0xf00) >> 8;
+
+		printk("scsi%d: (%d:%d), DISCONNECTED (%d) %s\n",
+		       host->host_no, pun, lun,
+		       i, NCR_700_phase[i]);
+#endif
+		save_for_reselection(hostdata, SCp, dsp);
+
+	} else if(dsps == A_RESELECTION_IDENTIFIED) {
+		__u8 lun;
+		struct NCR_700_command_slot *slot;
+		__u8 reselection_id = hostdata->reselection_id;
+
+		dma_cache_inv((unsigned long)hostdata->msgin, sizeof(hostdata->msgin));
+
+		lun = hostdata->msgin[0] & 0x1f;
+
+		hostdata->reselection_id = 0xff;
+		DEBUG(("scsi%d: (%d:%d) RESELECTED!\n",
+		       host->host_no, reselection_id, lun));
+		/* clear the reselection indicator */
+		if(hostdata->msgin[1] == A_SIMPLE_TAG_MSG) {
+			slot = find_ITLQ_Nexus(hostdata, reselection_id,
+					       lun, hostdata->msgin[2]);
+		} else {
+			slot = find_ITL_Nexus(hostdata, reselection_id, lun);
+		}
+	retry:
+		if(slot == NULL) {
+			struct NCR_700_command_slot *s = find_ITL_Nexus(hostdata, reselection_id, lun);
+			printk(KERN_ERR "scsi%d: (%d:%d) RESELECTED but no saved command (MSG = %02x %02x %02x)!!\n",
+			       host->host_no, reselection_id, lun,
+			       hostdata->msgin[0], hostdata->msgin[1],
+			       hostdata->msgin[2]);
+			printk(KERN_ERR " OUTSTANDING TAGS:");
+			while(s != NULL) {
+				if(s->cmnd->target == reselection_id &&
+				   s->cmnd->lun == lun) {
+					printk("%d ", s->tag);
+					if(s->tag == hostdata->msgin[2]) {
+						printk(" ***FOUND*** \n");
+						slot = s;
+						goto retry;
+					}
+						
+				}
+				s = s->ITL_back;
+			}
+			printk("\n");
+		} else {
+			if(hostdata->state != NCR_700_HOST_BUSY)
+				printk(KERN_ERR "scsi%d: FATAL, host not busy during valid reselection!\n",
+				       host->host_no);
+			resume_offset = slot->resume_offset;
+			hostdata->cmd = slot->cmnd;
+
+			/* re-patch for this command */
+			script_patch_32_abs(hostdata->script, CommandAddress, 
+					    virt_to_bus(slot->cmnd->cmnd));
+			script_patch_16(hostdata->script,
+					CommandCount, slot->cmnd->cmd_len);
+			script_patch_32_abs(hostdata->script, SGScriptStartAddress,
+					    virt_to_bus(&slot->SG[0].ins));
+
+			/* Note: setting SXFER only works if we're
+			 * still in the MESSAGE phase, so it is vital
+			 * that ACK is still asserted when we process
+			 * the reselection message.  The resume offset
+			 * should therefore always clear ACK */
+			NCR_700_writeb(NCR_700_get_SXFER(hostdata->cmd->device),
+				       host, SXFER_REG);
+			
+		}
+	} else if(dsps == A_RESELECTED_DURING_SELECTION) {
+
+		/* This section is full of debugging code because I've
+		 * never managed to reach it.  I think what happens is
+		 * that, because the 700 runs with selection
+		 * interrupts enabled the whole time that we take a
+		 * selection interrupt before we manage to get to the
+		 * reselected script interrupt */
+
+		__u8 reselection_id = NCR_700_readb(host, SFBR_REG);
+		struct NCR_700_command_slot *slot;
+		
+		/* Take out our own ID */
+		reselection_id &= ~(1<<host->this_id);
+		
+		printk(KERN_INFO "scsi%d: (%d:%d) RESELECTION DURING SELECTION, dsp=%p[%04x] state=%d, count=%d\n",
+		       host->host_no, reselection_id, lun, (void *)dsp, dsp - hostdata->pScript, hostdata->state, hostdata->command_slot_count);
+
+		{
+			/* FIXME: DEBUGGING CODE */
+			__u32 SG = (__u32)bus_to_virt(hostdata->script[A_SGScriptStartAddress_used[0]]);
+			int i;
+
+			for(i=0; i< NCR_700_COMMAND_SLOTS_PER_HOST; i++) {
+				if(SG >= (__u32)(&hostdata->slots[i].SG[0])
+				   && SG <= (__u32)(&hostdata->slots[i].SG[NCR_700_SG_SEGMENTS]))
+					break;
+			}
+			printk(KERN_INFO "IDENTIFIED SG segment as being %p in slot %p, cmd %p, slot->resume_offset=%p\n", (void *)SG, &hostdata->slots[i], hostdata->slots[i].cmnd, (void *)hostdata->slots[i].resume_offset);
+			SCp =  hostdata->slots[i].cmnd;
+		}
+
+		if(SCp != NULL) {
+			slot = (struct NCR_700_command_slot *)SCp->host_scribble;
+			/* change slot from busy to queued to redo command */
+			slot->state = NCR_700_SLOT_QUEUED;
+		}
+		hostdata->cmd = NULL;
+		
+		if(reselection_id == 0) {
+			if(hostdata->reselection_id == 0xff) {
+				printk(KERN_ERR "scsi%d: Invalid reselection during selection!!\n", host->host_no);
+				return 0;
+			} else {
+				printk(KERN_ERR "scsi%d: script reselected and we took a selection interrupt\n",
+				       host->host_no);
+				reselection_id = hostdata->reselection_id;
+			}
+		} else {
+			
+			/* convert to real ID */
+			reselection_id = bitmap_to_number(reselection_id);
+		}
+		hostdata->reselection_id = reselection_id;
+		hostdata->msgin[1] = 0;
+		dma_cache_wback((unsigned long)hostdata->msgin, sizeof(hostdata->msgin));
+		if(hostdata->tag_negotiated & (1<<reselection_id)) {
+			resume_offset = hostdata->pScript + Ent_GetReselectionWithTag;
+		} else {
+			resume_offset = hostdata->pScript + Ent_GetReselectionData;
+		}
+	} else if(dsps == A_COMPLETED_SELECTION_AS_TARGET) {
+		/* we've just disconnected from the bus, do nothing since
+		 * a return here will re-run the queued command slot
+		 * that may have been interrupted by the initial selection */
+		DEBUG((" SELECTION COMPLETED\n"));
+	} else if((dsps & 0xfffff0f0) == A_MSG_IN) { 
+		resume_offset = process_message(host, hostdata, SCp,
+						dsp, dsps);
+	} else if((dsps &  0xfffff000) == 0) {
+		__u8 i = (dsps & 0xf0) >> 4, j = (dsps & 0xf00) >> 8;
+		printk(KERN_ERR "scsi%d: (%d:%d), unhandled script condition %s %s at %04x\n",
+		       host->host_no, pun, lun, NCR_700_condition[i],
+		       NCR_700_phase[j], dsp - hostdata->pScript);
+		if(SCp != NULL) {
+			print_command(SCp->cmnd);
+
+			if(SCp->use_sg) {
+				for(i = 0; i < SCp->use_sg + 1; i++) {
+					printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, ((struct scatterlist *)SCp->buffer)[i].length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr);
+				}
+			}
+		}	       
+		NCR_700_internal_bus_reset(host);
+	} else if((dsps & 0xfffff000) == A_DEBUG_INTERRUPT) {
+		printk(KERN_NOTICE "scsi%d (%d:%d) DEBUG INTERRUPT %d AT %p[%04x], continuing\n",
+		       host->host_no, pun, lun, dsps & 0xfff, (void *)dsp, dsp - hostdata->pScript);
+		resume_offset = dsp;
+	} else {
+		printk(KERN_ERR "scsi%d: (%d:%d), unidentified script interrupt 0x%x at %04x\n",
+		       host->host_no, pun, lun, dsps, dsp - hostdata->pScript);
+		NCR_700_internal_bus_reset(host);
+	}
+	return resume_offset;
+}
+
+/* We run the 53c700 with selection interrupts always enabled.  This
+ * means that the chip may be selected as soon as the bus frees.  On a
+ * busy bus, this can be before the scripts engine finishes its
+ * processing.  Therefore, part of the selection processing has to be
+ * to find out what the scripts engine is doing and complete the
+ * function if necessary (i.e. process the pending disconnect or save
+ * the interrupted initial selection */
+STATIC inline __u32
+process_selection(struct Scsi_Host *host, __u32 dsp)
+{
+	__u8 id = 0;	/* Squash compiler warning */
+	int count = 0;
+	__u32 resume_offset = 0;
+	struct NCR_700_Host_Parameters *hostdata =
+		(struct NCR_700_Host_Parameters *)host->hostdata[0];
+	Scsi_Cmnd *SCp = hostdata->cmd;
+	__u8 sbcl;
+
+	for(count = 0; count < 5; count++) {
+		id = NCR_700_readb(host, SFBR_REG);
+
+		/* Take out our own ID */
+		id &= ~(1<<host->this_id);
+		if(id != 0) 
+			break;
+		udelay(5);
+	}
+	sbcl = NCR_700_readb(host, SBCL_REG);
+	if((sbcl & SBCL_IO) == 0) {
+		/* mark as having been selected rather than reselected */
+		id = 0xff;
+	} else {
+		/* convert to real ID */
+		hostdata->reselection_id = id = bitmap_to_number(id);
+		DEBUG(("scsi%d:  Reselected by %d\n",
+		       host->host_no, id));
+	}
+	if(hostdata->state == NCR_700_HOST_BUSY && SCp != NULL) {
+		struct NCR_700_command_slot *slot =
+			(struct NCR_700_command_slot *)SCp->host_scribble;
+		DEBUG(("  ID %d WARNING: RESELECTION OF BUSY HOST, saving cmd %p, slot %p, addr %x [%04x], resume %x!\n", id, hostdata->cmd, slot, dsp, dsp - hostdata->pScript, resume_offset));
+		
+		switch(dsp - hostdata->pScript) {
+		case Ent_Disconnect1:
+		case Ent_Disconnect2:
+			save_for_reselection(hostdata, SCp, Ent_Disconnect2 + hostdata->pScript);
+			break;
+		case Ent_Disconnect3:
+		case Ent_Disconnect4:
+			save_for_reselection(hostdata, SCp, Ent_Disconnect4 + hostdata->pScript);
+			break;
+		case Ent_Disconnect5:
+		case Ent_Disconnect6:
+			save_for_reselection(hostdata, SCp, Ent_Disconnect6 + hostdata->pScript);
+			break;
+		case Ent_Disconnect7:
+		case Ent_Disconnect8:
+			save_for_reselection(hostdata, SCp, Ent_Disconnect8 + hostdata->pScript);
+			break;
+		case Ent_Finish1:
+		case Ent_Finish2:
+			process_script_interrupt(A_GOOD_STATUS_AFTER_STATUS, dsp, SCp, host, hostdata);
+			break;
+			
+		default:
+			slot->state = NCR_700_SLOT_QUEUED;
+			break;
+			}
+	}
+	hostdata->state = NCR_700_HOST_BUSY;
+	hostdata->cmd = NULL;
+	hostdata->msgin[1] = 0;
+	dma_cache_wback((unsigned long)hostdata->msgin, sizeof(hostdata->msgin));
+
+	if(id == 0xff) {
+		/* Selected as target, Ignore */
+		resume_offset = hostdata->pScript + Ent_SelectedAsTarget;
+	} else if(hostdata->tag_negotiated & (1<<id)) {
+		resume_offset = hostdata->pScript + Ent_GetReselectionWithTag;
+	} else {
+		resume_offset = hostdata->pScript + Ent_GetReselectionData;
+	}
+	return resume_offset;
+}
+
+
+STATIC int
+NCR_700_start_command(Scsi_Cmnd *SCp)
+{
+	struct NCR_700_command_slot *slot =
+		(struct NCR_700_command_slot *)SCp->host_scribble;
+	struct NCR_700_Host_Parameters *hostdata =
+		(struct NCR_700_Host_Parameters *)SCp->host->hostdata[0];
+	unsigned long flags;
+	__u16 count = 1;	/* for IDENTIFY message */
+	
+	save_flags(flags);
+	cli();
+	if(hostdata->state != NCR_700_HOST_FREE) {
+		/* keep this inside the lock to close the race window where
+		 * the running command finishes on another CPU while we don't
+		 * change the state to queued on this one */
+		slot->state = NCR_700_SLOT_QUEUED;
+		restore_flags(flags);
+
+		DEBUG(("scsi%d: host busy, queueing command %p, slot %p\n",
+		       SCp->host->host_no, slot->cmnd, slot));
+		return 0;
+	}
+	hostdata->state = NCR_700_HOST_BUSY;
+	hostdata->cmd = SCp;
+	slot->state = NCR_700_SLOT_BUSY;
+	/* keep interrupts disabled until we have the command correctly
+	 * set up so we cannot take a selection interrupt */
+
+	hostdata->msgout[0] = NCR_700_identify(SCp->cmnd[0] != REQUEST_SENSE,
+					    SCp->lun);
+	/* for INQUIRY or REQUEST_SENSE commands, we cannot be sure
+	 * if the negotiated transfer parameters still hold, so
+	 * always renegotiate them */
+	if(SCp->cmnd[0] == INQUIRY || SCp->cmnd[0] == REQUEST_SENSE) {
+		NCR_700_clear_flag(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC);
+	}
+
+	/* REQUEST_SENSE is asking for contingent I_T_L status.  If a
+	 * contingent allegiance condition exists, the device will
+	 * refuse all tags, so send the request sense as untagged */
+	if((hostdata->tag_negotiated & (1<<SCp->target))
+	   && (slot->tag != NCR_700_NO_TAG && SCp->cmnd[0] != REQUEST_SENSE)) {
+		hostdata->msgout[count++] = A_SIMPLE_TAG_MSG;
+		hostdata->msgout[count++] = slot->tag;
+	}
+
+	if(hostdata->fast &&
+	   NCR_700_is_flag_clear(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC)) {
+		memcpy(&hostdata->msgout[count], NCR_700_SDTR_msg,
+		       sizeof(NCR_700_SDTR_msg));
+		count += sizeof(NCR_700_SDTR_msg);
+		NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+	}
+
+	dma_cache_wback((unsigned long)hostdata->msgout, count);
+
+	script_patch_16(hostdata->script, MessageCount, count);
+
+
+	script_patch_ID(hostdata->script,
+			Device_ID, 1<<SCp->target);
+
+	script_patch_32_abs(hostdata->script, CommandAddress, 
+			virt_to_bus(SCp->cmnd));
+	script_patch_16(hostdata->script, CommandCount, SCp->cmd_len);
+	/* finally plumb the beginning of the SG list into the script
+	 * */
+	script_patch_32_abs(hostdata->script, SGScriptStartAddress,
+			    virt_to_bus(&slot->SG[0].ins));
+	NCR_700_writeb(CLR_FIFO, SCp->host, DFIFO_REG);
+
+	/* set the synchronous period/offset */
+	if(slot->resume_offset == 0)
+		slot->resume_offset = hostdata->pScript;
+	NCR_700_writeb(NCR_700_get_SXFER(SCp->device),
+	     SCp->host, SXFER_REG);
+	/* allow interrupts here so that if we're selected we can take
+	 * a selection interrupt.  The script start may not be
+	 * effective in this case, but the selection interrupt will
+	 * save our command in that case */
+	NCR_700_writel(slot->temp, SCp->host, TEMP_REG);
+	NCR_700_writel(slot->resume_offset, SCp->host, DSP_REG);
+	restore_flags(flags);
+
+	return 1;
+}
+
+void
+NCR_700_intr(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct Scsi_Host *host = (struct Scsi_Host *)dev_id;
+	struct NCR_700_Host_Parameters *hostdata =
+		(struct NCR_700_Host_Parameters *)host->hostdata[0];
+	__u8 istat;
+	__u32 resume_offset = 0;
+	__u8 pun = 0xff, lun = 0xff;
+	unsigned long flags;
+
+	/* Unfortunately, we have to take the io_request_lock here
+	 * rather than the host lock hostdata->lock because we're
+	 * looking to exclude queuecommand from messing with the
+	 * registers while we're processing the interrupt.  Since
+	 * queuecommand is called holding io_request_lock, and we have
+	 * to take io_request_lock before we call the command
+	 * scsi_done, we would get a deadlock if we took
+	 * hostdata->lock here and in queuecommand (because the order
+	 * of locking in queuecommand: 1) io_request_lock then 2)
+	 * hostdata->lock would be the reverse of taking it in this
+	 * routine */
+	spin_lock_irqsave(&io_request_lock, flags);
+	if((istat = NCR_700_readb(host, ISTAT_REG))
+	      & (SCSI_INT_PENDING | DMA_INT_PENDING)) {
+		__u32 dsps;
+		__u8 sstat0 = 0, dstat = 0;
+		__u32 dsp;
+		Scsi_Cmnd *SCp = hostdata->cmd;
+		enum NCR_700_Host_State state;
+
+		state = hostdata->state;
+		SCp = hostdata->cmd;
+
+		if(istat & SCSI_INT_PENDING) {
+			udelay(10);
+
+			sstat0 = NCR_700_readb(host, SSTAT0_REG);
+		}
+
+		if(istat & DMA_INT_PENDING) {
+			udelay(10);
+
+			dstat = NCR_700_readb(host, DSTAT_REG);
+		}
+
+		dsps = NCR_700_readl(host, DSPS_REG);
+		dsp = NCR_700_readl(host, DSP_REG);
+
+		DEBUG(("scsi%d: istat %02x sstat0 %02x dstat %02x dsp %04x[%08x] dsps 0x%x\n",
+		       host->host_no, istat, sstat0, dstat,
+		       (dsp - (__u32)virt_to_bus(hostdata->script))/4,
+		       dsp, dsps));
+
+		if(SCp != NULL) {
+			pun = SCp->target;
+			lun = SCp->lun;
+		}
+
+		if(sstat0 & SCSI_RESET_DETECTED) {
+			Scsi_Device *SDp;
+			int i;
+
+			hostdata->state = NCR_700_HOST_BUSY;
+
+			printk(KERN_ERR "scsi%d: Bus Reset detected, executing command %p, slot %p, dsp %p[%04x]\n",
+			       host->host_no, SCp, SCp == NULL ? NULL : SCp->host_scribble, (void *)dsp, dsp - hostdata->pScript);
+
+			/* clear all the negotiated parameters */
+			for(SDp = host->host_queue; SDp != NULL; SDp = SDp->next)
+				SDp->hostdata = 0;
+			
+			/* clear all the slots and their pending commands */
+			for(i = 0; i < NCR_700_COMMAND_SLOTS_PER_HOST; i++) {
+				Scsi_Cmnd *SCp;
+				struct NCR_700_command_slot *slot =
+					&hostdata->slots[i];
+
+				if(slot->state == NCR_700_SLOT_FREE)
+					continue;
+				
+				SCp = slot->cmnd;
+				printk(KERN_ERR " failing command because of reset, slot %p, cmnd %p\n",
+				       slot, SCp);
+				free_slot(slot, hostdata);
+				SCp->host_scribble = NULL;
+				NCR_700_set_depth(SCp->device, 0);
+				/* NOTE: deadlock potential here: we
+				 * rely on mid-layer guarantees that
+				 * scsi_done won't try to issue the
+				 * command again otherwise we'll
+				 * deadlock on the
+				 * hostdata->state_lock */
+				SCp->result = DID_RESET << 16;
+				SCp->scsi_done(SCp);
+			}
+			mdelay(25);
+			NCR_700_chip_setup(host);
+
+			hostdata->state = NCR_700_HOST_FREE;
+			hostdata->cmd = NULL;
+			goto out_unlock;
+		} else if(sstat0 & SELECTION_TIMEOUT) {
+			DEBUG(("scsi%d: (%d:%d) selection timeout\n",
+			       host->host_no, pun, lun));
+			NCR_700_scsi_done(hostdata, SCp, DID_NO_CONNECT<<16);
+		} else if(sstat0 & PHASE_MISMATCH) {
+			struct NCR_700_command_slot *slot = (SCp == NULL) ? NULL :
+				(struct NCR_700_command_slot *)SCp->host_scribble;
+
+			if(dsp == Ent_SendMessage + 8 + hostdata->pScript) {
+				/* It wants to reply to some part of
+				 * our message */
+#ifdef NCR_700_DEBUG
+				__u32 temp = NCR_700_readl(host, TEMP_REG);
+				int count = (hostdata->script[Ent_SendMessage/4] & 0xffffff) - ((NCR_700_readl(host, DBC_REG) & 0xffffff) + NCR_700_data_residual(host));
+				printk("scsi%d (%d:%d) PHASE MISMATCH IN SEND MESSAGE %d remain, return %p[%04x], phase %s\n", host->host_no, pun, lun, count, (void *)temp, temp - hostdata->pScript, sbcl_to_string(NCR_700_readb(host, SBCL_REG)));
+#endif
+				resume_offset = hostdata->pScript + Ent_SendMessagePhaseMismatch;
+			} else if(dsp >= virt_to_bus(&slot->SG[0].ins) &&
+				  dsp <= virt_to_bus(&slot->SG[NCR_700_SG_SEGMENTS].ins)) {
+				int data_transfer = NCR_700_readl(host, DBC_REG) & 0xffffff;
+				int SGcount = (dsp - virt_to_bus(&slot->SG[0].ins))/sizeof(struct NCR_700_SG_List);
+				int residual = NCR_700_data_residual(host);
+				int i;
+#ifdef NCR_700_DEBUG
+				printk("scsi%d: (%d:%d) Expected phase mismatch in slot->SG[%d], transferred 0x%x\n",
+				       host->host_no, pun, lun,
+				       SGcount, data_transfer);
+				print_command(SCp->cmnd);
+				if(residual) {
+					printk("scsi%d: (%d:%d) Expected phase mismatch in slot->SG[%d], transferred 0x%x, residual %d\n",
+				       host->host_no, pun, lun,
+				       SGcount, data_transfer, residual);
+				}
+#endif
+				data_transfer += residual;
+
+				if(data_transfer != 0) {
+					int count; 
+					__u32 pAddr;
+
+					SGcount--;
+
+					count = (bS_to_cpu(slot->SG[SGcount].ins) & 0x00ffffff);
+					DEBUG(("DATA TRANSFER MISMATCH, count = %d, transferred %d\n", count, count-data_transfer));
+					slot->SG[SGcount].ins &= bS_to_host(0xff000000);
+					slot->SG[SGcount].ins |= bS_to_host(data_transfer);
+					pAddr = bS_to_cpu(slot->SG[SGcount].pAddr);
+					pAddr += (count - data_transfer);
+					slot->SG[SGcount].pAddr = bS_to_host(pAddr);
+				}
+				/* set the executed moves to nops */
+				for(i=0; i<SGcount; i++) {
+					slot->SG[i].ins = bS_to_host(SCRIPT_NOP);
+					slot->SG[i].pAddr = 0;
+				}
+				dma_cache_wback((unsigned long)slot->SG, sizeof(slot->SG));
+				/* and pretend we disconnected after
+				 * the command phase */
+				resume_offset = hostdata->pScript + Ent_MsgInDuringData;
+			} else {
+				__u8 sbcl = NCR_700_readb(host, SBCL_REG);
+				printk(KERN_ERR "scsi%d: (%d:%d) phase mismatch at %04x, phase %s\n",
+				       host->host_no, pun, lun, dsp - hostdata->pScript, sbcl_to_string(sbcl));
+				NCR_700_internal_bus_reset(host);
+			}
+
+		} else if(sstat0 & SCSI_GROSS_ERROR) {
+			printk(KERN_ERR "scsi%d: (%d:%d) GROSS ERROR\n",
+			       host->host_no, pun, lun);
+			NCR_700_scsi_done(hostdata, SCp, DID_ERROR<<16);
+		} else if(dstat & SCRIPT_INT_RECEIVED) {
+			DEBUG(("scsi%d: (%d:%d) ====>SCRIPT INTERRUPT<====\n",
+			       host->host_no, pun, lun));
+			resume_offset = process_script_interrupt(dsps, dsp, SCp, host, hostdata);
+		} else if(dstat & (ILGL_INST_DETECTED)) {
+			printk(KERN_ERR "scsi%d: (%d:%d) Illegal Instruction detected at 0x%p[0x%x]!!!\n"
+			       "         Please email James.Bottomley@HansenPartnership.com with the details\n",
+			       host->host_no, pun, lun,
+			       (void *)dsp, dsp - hostdata->pScript);
+			NCR_700_scsi_done(hostdata, SCp, DID_ERROR<<16);
+		} else if(dstat & (WATCH_DOG_INTERRUPT|ABORTED)) {
+			printk(KERN_ERR "scsi%d: (%d:%d) serious DMA problem, dstat=%02x\n",
+			       host->host_no, pun, lun, dstat);
+			NCR_700_scsi_done(hostdata, SCp, DID_ERROR<<16);
+		}
+
+		
+		/* NOTE: selection interrupt processing MUST occur
+		 * after script interrupt processing to correctly cope
+		 * with the case where we process a disconnect and
+		 * then get reselected before we process the
+		 * disconnection */
+		if(sstat0 & SELECTED) {
+			/* FIXME: It currently takes at least FOUR
+			 * interrupts to complete a command that
+			 * disconnects: one for the disconnect, one
+			 * for the reselection, one to get the
+			 * reselection data and one to complete the
+			 * command.  If we guess the reselected
+			 * command here and prepare it, we only need
+			 * to get a reselection data interrupt if we
+			 * guessed wrongly.  Since the interrupt
+			 * overhead is much greater than the command
+			 * setup, this would be an efficient
+			 * optimisation particularly as we probably
+			 * only have one outstanding command on a
+			 * target most of the time */
+
+			resume_offset = process_selection(host, dsp);
+
+		}
+
+	}
+
+	if(resume_offset) {
+		if(hostdata->state != NCR_700_HOST_BUSY) {
+			printk(KERN_ERR "scsi%d: Driver error: resume at %p [%04x] with non busy host!\n",
+			       host->host_no, (void *)resume_offset, resume_offset - hostdata->pScript);
+			hostdata->state = NCR_700_HOST_BUSY;
+		}
+
+		DEBUG(("Attempting to resume at %x\n", resume_offset));
+		NCR_700_writeb(CLR_FIFO, host, DFIFO_REG);
+		NCR_700_writel(resume_offset, host, DSP_REG);
+	} 
+	/* There is probably a technical no-no about this: If we're a
+	 * shared interrupt and we got this interrupt because the
+	 * other device needs servicing not us, we're still going to
+	 * check our queued commands here---of course, there shouldn't
+	 * be any outstanding.... */
+	if(hostdata->state == NCR_700_HOST_FREE) {
+		int i;
+
+		for(i = 0; i < NCR_700_COMMAND_SLOTS_PER_HOST; i++) {
+			/* fairness: always run the queue from the last
+			 * position we left off */
+			int j = (i + hostdata->saved_slot_position)
+				% NCR_700_COMMAND_SLOTS_PER_HOST;
+			
+			if(hostdata->slots[j].state != NCR_700_SLOT_QUEUED)
+				continue;
+			if(NCR_700_start_command(hostdata->slots[j].cmnd)) {
+				DEBUG(("scsi%d: Issuing saved command slot %p, cmd %p\t\n",
+				       host->host_no, &hostdata->slots[j],
+				       hostdata->slots[j].cmnd));
+				hostdata->saved_slot_position = j + 1;
+			}
+
+			break;
+		}
+	}
+ out_unlock:
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/* FIXME: Need to put some proc information in and plumb it
+ * into the scsi proc system */
+STATIC int
+NCR_700_proc_directory_info(char *proc_buf, char **startp,
+			 off_t offset, int bytes_available,
+			 int host_no, int write)
+{
+	static char buf[4096];	/* 1 page should be sufficient */
+	int len = 0;
+	struct Scsi_Host *host = scsi_hostlist;
+	struct NCR_700_Host_Parameters *hostdata;
+	Scsi_Device *SDp;
+
+	while(host != NULL && host->host_no != host_no)
+		host = host->next;
+
+	if(host == NULL)
+		return 0;
+
+	if(write) {
+		/* FIXME: Clear internal statistics here */
+		return 0;
+	}
+	hostdata = (struct NCR_700_Host_Parameters *)host->hostdata[0];
+	len += sprintf(&buf[len], "Total commands outstanding: %d\n", hostdata->command_slot_count);
+	len += sprintf(&buf[len],"\
+Target	Depth  Active  Next Tag\n\
+======	=====  ======  ========\n");
+	for(SDp = host->host_queue; SDp != NULL; SDp = SDp->next) {
+		len += sprintf(&buf[len]," %2d:%2d   %4d    %4d      %4d\n", SDp->id, SDp->lun, SDp->queue_depth, NCR_700_get_depth(SDp), SDp->current_tag);
+	}
+	if((len -= offset) <= 0)
+		return 0;
+	if(len > bytes_available)
+		len = bytes_available;
+	memcpy(proc_buf, buf + offset, len);
+	return len;
+}
+
+STATIC int
+NCR_700_queuecommand(Scsi_Cmnd *SCp, void (*done)(Scsi_Cmnd *))
+{
+	struct NCR_700_Host_Parameters *hostdata = 
+		(struct NCR_700_Host_Parameters *)SCp->host->hostdata[0];
+	__u32 move_ins;
+	struct NCR_700_command_slot *slot;
+	int hash;
+
+	if(hostdata->command_slot_count >= NCR_700_COMMAND_SLOTS_PER_HOST) {
+		/* We're over our allocation, this should never happen
+		 * since we report the max allocation to the mid layer */
+		printk(KERN_WARNING "scsi%d: Command depth has gone over queue depth\n", SCp->host->host_no);
+		return 1;
+	}
+	if(NCR_700_get_depth(SCp->device) != 0 && !(hostdata->tag_negotiated & (1<<SCp->target))) {
+		DEBUG((KERN_ERR "scsi%d (%d:%d) has non zero depth %d\n",
+		       SCp->host->host_no, SCp->target, SCp->lun,
+		       NCR_700_get_depth(SCp->device)));
+		return 1;
+	}
+	if(NCR_700_get_depth(SCp->device) >= NCR_700_MAX_TAGS) {
+		DEBUG((KERN_ERR "scsi%d (%d:%d) has max tag depth %d\n",
+		       SCp->host->host_no, SCp->target, SCp->lun,
+		       NCR_700_get_depth(SCp->device)));
+		return 1;
+	}
+	NCR_700_set_depth(SCp->device, NCR_700_get_depth(SCp->device) + 1);
+
+	/* begin the command here */
+	/* no need to check for NULL, test for command_slot_cound above
+	 * ensures a slot is free */
+	slot = find_empty_slot(hostdata);
+
+	slot->cmnd = SCp;
+
+	SCp->scsi_done = done;
+	SCp->host_scribble = (unsigned char *)slot;
+	SCp->SCp.ptr = NULL;
+	SCp->SCp.buffer = NULL;
+
+#ifdef NCR_700_DEBUG
+	printk("53c700: scsi%d, command ", SCp->host->host_no);
+	print_command(SCp->cmnd);
+#endif
+
+	if(hostdata->tag_negotiated &(1<<SCp->target)) {
+
+		struct NCR_700_command_slot *old =
+			find_ITL_Nexus(hostdata, SCp->target, SCp->lun);
+#ifdef NCR_700_TAG_DEBUG
+		struct NCR_700_command_slot *found;
+#endif
+		
+		if(old != NULL && old->tag == SCp->device->current_tag) {
+			printk(KERN_WARNING "scsi%d (%d:%d) Tag clock back to current, queueing\n", SCp->host->host_no, SCp->target, SCp->lun);
+			return 1;
+		}
+		slot->tag = SCp->device->current_tag++;
+#ifdef NCR_700_TAG_DEBUG
+		while((found = find_ITLQ_Nexus(hostdata, SCp->target, SCp->lun, slot->tag)) != NULL) {
+			printk("\n\n**ERROR** already using tag %d, but oldest is %d\n", slot->tag, (old == NULL) ? -1 : old->tag);
+			printk("  FOUND = %p, tag = %d, pun = %d, lun = %d\n",
+			       found, found->tag, found->cmnd->target, found->cmnd->lun);
+			slot->tag = SCp->device->current_tag++;
+			printk("   Tag list is: ");
+			while(old != NULL) {
+				if(old->cmnd->target == SCp->target &&
+				   old->cmnd->lun == SCp->lun)
+					printk("%d ", old->tag);
+				old = old->ITL_back;
+			}
+			printk("\n\n");
+		}
+#endif
+		hash = hash_ITLQ(SCp->target, SCp->lun, slot->tag);
+		/* link into the ITLQ hash queues */
+		slot->ITLQ_forw = hostdata->ITLQ_Hash_forw[hash];
+		hostdata->ITLQ_Hash_forw[hash] = slot;
+#ifdef NCR_700_TAG_DEBUG
+		if(slot->ITLQ_forw != NULL && slot->ITLQ_forw->ITLQ_back != NULL) {
+			printk(KERN_ERR "scsi%d (%d:%d) ITLQ_back is not NULL!!!!\n", SCp->host->host_no, SCp->target, SCp->lun);
+		}
+#endif
+		if(slot->ITLQ_forw != NULL)
+			slot->ITLQ_forw->ITLQ_back = slot;
+		else
+			hostdata->ITLQ_Hash_back[hash] = slot;
+		slot->ITLQ_back = NULL;
+	} else {
+		slot->tag = NCR_700_NO_TAG;
+	}
+	/* link into the ITL hash queues */
+	hash = hash_ITL(SCp->target, SCp->lun);
+	slot->ITL_forw = hostdata->ITL_Hash_forw[hash];
+	hostdata->ITL_Hash_forw[hash] = slot;
+#ifdef NCR_700_TAG_DEBUG
+	if(slot->ITL_forw != NULL && slot->ITL_forw->ITL_back != NULL) {
+		printk(KERN_ERR "scsi%d (%d:%d) ITL_back is not NULL!!!!\n",
+		       SCp->host->host_no, SCp->target, SCp->lun);
+	}
+#endif
+	if(slot->ITL_forw != NULL)
+		slot->ITL_forw->ITL_back = slot;
+	else
+		hostdata->ITL_Hash_back[hash] = slot;
+	slot->ITL_back = NULL;
+
+		
+	/* This is f****g ridiculous; every low level HBA driver has
+	 * to determine the direction of the commands, why isn't this
+	 * done inside the scsi_lib !!??? */
+	switch (SCp->cmnd[0]) {
+	case REQUEST_SENSE:
+		/* clear the internal sense magic */
+		SCp->cmnd[6] = 0;
+		/* fall through */
+	case INQUIRY:
+	case MODE_SENSE:
+	case READ_6:
+	case READ_10:
+	case READ_12:
+	case READ_CAPACITY:
+	case READ_BLOCK_LIMITS:
+	case READ_TOC:
+		move_ins = SCRIPT_MOVE_DATA_IN;
+		break;
+	case MODE_SELECT:
+	case WRITE_6:
+	case WRITE_10:
+	case WRITE_12:
+		move_ins = SCRIPT_MOVE_DATA_OUT;
+		break;
+	case TEST_UNIT_READY:
+	case ALLOW_MEDIUM_REMOVAL:
+	case START_STOP:
+		move_ins = 0;
+		break;
+	default:
+		/* OK, get it from the command */
+		switch(SCp->sc_data_direction) {
+		case SCSI_DATA_UNKNOWN:
+		default:
+			printk(KERN_ERR "53c700: Unknown command for data direction ");
+			print_command(SCp->cmnd);
+			
+			move_ins = 0;
+			break;
+		case SCSI_DATA_NONE:
+			move_ins = 0;
+			break;
+		case SCSI_DATA_READ:
+			move_ins = SCRIPT_MOVE_DATA_IN;
+			break;
+		case SCSI_DATA_WRITE:
+			move_ins = SCRIPT_MOVE_DATA_OUT;
+			break;
+		}
+	}
+
+	/* now build the scatter gather list */
+	if(move_ins != 0) {
+		int i;
+
+		for(i = 0; i < (SCp->use_sg ? SCp->use_sg : 1); i++) {
+			void *vPtr;
+			__u32 count;
+
+			if(SCp->use_sg) {
+				vPtr = (((struct scatterlist *)SCp->buffer)[i].address);
+				count = ((struct scatterlist *)SCp->buffer)[i].length;
+			} else {
+				vPtr = SCp->request_buffer;
+				count = SCp->request_bufflen;
+			}
+			slot->SG[i].ins = bS_to_host(move_ins | count);
+			DEBUG((" scatter block %d: move %d[%08x] from 0x%lx\n",
+			       i, count, slot->SG[i].ins, 
+			       virt_to_bus(vPtr)));
+			dma_cache_wback_inv((unsigned long)vPtr, count);
+			slot->SG[i].pAddr = bS_to_host(virt_to_bus(vPtr));
+		}
+		slot->SG[i].ins = bS_to_host(SCRIPT_RETURN);
+		slot->SG[i].pAddr = 0;
+		dma_cache_wback((unsigned long)slot->SG, sizeof(slot->SG));
+		DEBUG((" SETTING %08lx to %x\n",
+		       virt_to_bus(&slot->SG[i].ins), 
+		       slot->SG[i].ins));
+	}
+	slot->resume_offset = 0;
+	NCR_700_start_command(SCp);
+	return 0;
+}
+
+STATIC int
+NCR_700_abort(Scsi_Cmnd * SCp)
+{
+	struct NCR_700_command_slot *slot;
+	struct NCR_700_Host_Parameters *hostdata = 
+		(struct NCR_700_Host_Parameters *)SCp->host->hostdata[0];
+
+	printk(KERN_INFO "scsi%d (%d:%d) New error handler wants to abort command\n\t",
+	       SCp->host->host_no, SCp->target, SCp->lun);
+	print_command(SCp->cmnd);
+
+	slot = find_ITL_Nexus(hostdata, SCp->target, SCp->lun);
+	while(slot != NULL && slot->cmnd != SCp)
+		slot = slot->ITL_back;
+
+	if(slot == NULL)
+		/* no outstanding command to abort */
+		return SUCCESS;
+	if(SCp->cmnd[0] == TEST_UNIT_READY) {
+		/* FIXME: This is because of a problem in the new
+		 * error handler.  When it is in error recovery, it
+		 * will send a TUR to a device it thinks may still be
+		 * showing a problem.  If the TUR isn't responded to,
+		 * it will abort it and mark the device off line.
+		 * Unfortunately, it does no other error recovery, so
+		 * this would leave us with an outstanding command
+		 * occupying a slot.  Rather than allow this to
+		 * happen, we issue a bus reset to force all
+		 * outstanding commands to terminate here. */
+		NCR_700_internal_bus_reset(SCp->host);
+		/* still drop through and return failed */
+	}
+	return FAILED;
+
+}
+
+STATIC int
+NCR_700_bus_reset(Scsi_Cmnd * SCp)
+{
+	printk(KERN_INFO "scsi%d (%d:%d) New error handler wants BUS reset, cmd %p\n\t",
+	       SCp->host->host_no, SCp->target, SCp->lun, SCp);
+	print_command(SCp->cmnd);
+	NCR_700_internal_bus_reset(SCp->host);
+	return SUCCESS;
+}
+
+STATIC int
+NCR_700_dev_reset(Scsi_Cmnd * SCp)
+{
+	printk(KERN_INFO "scsi%d (%d:%d) New error handler wants device reset\n\t",
+	       SCp->host->host_no, SCp->target, SCp->lun);
+	print_command(SCp->cmnd);
+	
+	return FAILED;
+}
+
+STATIC int
+NCR_700_host_reset(Scsi_Cmnd * SCp)
+{
+	printk(KERN_INFO "scsi%d (%d:%d) New error handler wants HOST reset\n\t",
+	       SCp->host->host_no, SCp->target, SCp->lun);
+	print_command(SCp->cmnd);
+
+	NCR_700_internal_bus_reset(SCp->host);
+	NCR_700_chip_reset(SCp->host);
+	return SUCCESS;
+}
+
+EXPORT_SYMBOL(NCR_700_detect);
+EXPORT_SYMBOL(NCR_700_release);
+EXPORT_SYMBOL(NCR_700_intr);
diff -urN /md0/kernels/2.4/v2.4.9-ac14/fs/Makefile aio-v2.4.9-ac14.diff/fs/Makefile
--- /md0/kernels/2.4/v2.4.9-ac14/fs/Makefile	Mon Sep 24 02:14:15 2001
+++ aio-v2.4.9-ac14.diff/fs/Makefile	Mon Sep 24 19:09:13 2001
@@ -12,7 +12,7 @@
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
 		super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
-		fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
+		fcntl.o ioctl.o readdir.o select.o fifo.o locks.o aio.o \
 		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
 		filesystems.o jbd-kernel.o namespace.o
 
diff -urN /md0/kernels/2.4/v2.4.9-ac14/fs/aio.c aio-v2.4.9-ac14.diff/fs/aio.c
--- /md0/kernels/2.4/v2.4.9-ac14/fs/aio.c	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/fs/aio.c	Tue Sep 25 20:59:38 2001
@@ -0,0 +1,717 @@
+//#define DEBUG 1
+/* drivers/char/aio.c
+ *	An async IO implementation for Linux
+ *	Written by Benjamin LaHaise <bcrl@redhat.com>
+ *
+ *	Implements /dev/aio, something on top of which it should be possible
+ *	to write a POSIX AIO library.
+ *
+ *	Copyright 2000, 2001 Red Hat, Inc.  All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#define DEBUG 1
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/iobuf.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/brlock.h>
+#include <linux/aio.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+
+#undef KERN_DEBUG
+#define KERN_DEBUG ""
+#define MAX_IOCTXS	0x800
+#define dprintk(x...)	do { ; } while (0)
+
+static spinlock_t aio_read_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t aio_req_lock = SPIN_LOCK_UNLOCKED;
+
+static kmem_cache_t	*kiocb_cachep;
+static kmem_cache_t	*kioctx_cachep;
+
+/* Lockless for reads.  Needs replacement rsn. */
+static struct kioctx	*ioctx_list;
+static unsigned long	new_ioctx_id;
+
+/* tunable.  Needs to be added to sysctl. */
+int max_aio_reqs = 0x10000;
+
+/* aio_setup
+ *	Creates the slab caches used by the aio routines, panic on
+ *	failure as this is done early during the boot sequence.
+ */
+static int __init aio_setup(void)
+{
+	kiocb_cachep = kmem_cache_create("kiocb", sizeof(struct kiocb),
+				0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!kiocb_cachep)
+		panic("unable to create kiocb cache\n");
+
+	kioctx_cachep = kmem_cache_create("kioctx", sizeof(struct kioctx),
+				0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!kioctx_cachep)
+		panic("unable to create kioctx cache");
+
+	printk(KERN_NOTICE "aio_setup: okay!\n");
+	printk(KERN_NOTICE "aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
+
+	return 0;
+}
+
+/* ioctx_alloc
+ *	Allocates and initializes an aioctx.  Returns an ERR_PTR if it failed.
+ */
+static struct kioctx *ioctx_alloc(unsigned nr_reqs)
+{
+	struct kioctx *ctx;
+	unsigned i;
+	long size;
+
+	if (nr_reqs > (0x70000000U / sizeof(struct io_event))) {
+		pr_debug("ENOMEM: nr_reqs too high\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Round off to a power of 2.  Needed for cheap mask operations */
+	for (i=1; i<nr_reqs; i<<=1)
+		;
+
+	nr_reqs = i;
+
+	ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->max_reqs = nr_reqs;
+
+	atomic_set(&ctx->users, 1);
+	spin_lock_init(&ctx->lock);
+	init_waitqueue_head(&ctx->wait);
+
+	size = sizeof(struct kiocb) * nr_reqs;
+	ctx->reqs = kmalloc(size, GFP_KERNEL);
+	if (!ctx->reqs)
+		goto out_freectx;
+
+	memset(ctx->reqs, 0, size);
+	for (i=0; i<nr_reqs; i++) {
+		ctx->reqs[i].ctx = ctx;
+		ctx->reqs[i].user_obj = ctx->reqs + i + 1;
+	}
+	ctx->reqs[nr_reqs-1].user_obj = NULL;
+	ctx->free_req = ctx->reqs;
+	size = sizeof(struct aio_ring);
+	size += sizeof(struct io_event) * nr_reqs;
+	/* This limits things somewhat for now. */
+	ctx->ring = kmalloc(size, GFP_KERNEL);
+	if (!ctx->ring)
+		goto out_freereqs;
+
+	memset(ctx->ring, 0, size);
+	ctx->mm = current->mm;
+	ctx->ring_mask = nr_reqs - 1;		/* trusted copy */
+	ctx->ring->mask = ctx->ring_mask;	/* user copy */
+
+	/* now link into global list.  kludge.  FIXME */
+	spin_lock(&aio_req_lock);			/* FIXME */
+	ctx->ring->id = ctx->user_id = new_ioctx_id++;	/* FIXME */
+	ctx->next = ioctx_list;				/* FIXME */
+	ioctx_list = ctx;				/* FIXME */
+	spin_unlock(&aio_req_lock);			/* FIXME */
+
+	printk("aio: allocated aioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, ctx->mm, ctx->ring->mask);
+	return ctx;
+
+out_freereqs:
+	kfree(ctx->reqs);
+out_freectx:
+	kmem_cache_free(kioctx_cachep, ctx);
+	ctx = ERR_PTR(-ENOMEM);
+
+	printk("aio: error allocating aioctx %p\n", ctx);
+	return ctx;
+}
+
+/* __aioctx_put
+ *	Called when the last user of an aio context has gone away,
+ *	and the struct needs to be freed.
+ */
+void __aioctx_put(struct kioctx *ctx)
+{
+	printk("aio: free aioctx %p\n", ctx);
+
+	kfree(ctx->ring);
+	kfree(ctx->reqs);
+	kmem_cache_free(kioctx_cachep, ctx);
+}
+
+/* aio_get_req
+ *	Allocate a slot for an aio request.  Increments the users count
+ * of the kioctx so that the kioctx stays around until all requests are
+ * complete.  Returns -EAGAIN if no requests are free.
+ */
+static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+{
+	struct kiocb *req;
+
+	/* FIXME: use cmpxchg instead of spin_lock? */
+	spin_lock_irq(&ctx->lock);
+	req = ctx->free_req;
+	if (req) {
+		ctx->free_req = req->user_obj;
+		spin_unlock_irq(&ctx->lock);
+		req->user_obj = NULL;
+
+		atomic_inc(&ctx->users);
+		return req;
+	}
+	spin_unlock_irq(&ctx->lock);
+
+	return NULL;
+}
+
+static void aio_put_req(struct kioctx *ctx, struct kiocb *req)
+{
+	//fput(req->filp);	/* FIXME */
+	if (req->filp && atomic_dec_and_test(&req->filp->f_count))
+		BUG();	/* not really, but... */
+
+	req->filp = NULL;
+	req = ctx->reqs;
+	/* FIXME: use cmpxchg instead of spin_lock? */
+	spin_lock_irq(&ctx->lock);
+	req->cancel = NULL;
+	req->user_obj = ctx->free_req;
+	ctx->free_req = req;
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*	Lookup an ioctx id.  ioctx_list is lockless for reads.
+ *	FIXME: this is O(n) and is only suitable for development.
+ */
+static inline struct kioctx *get_ioctx(unsigned long ctx_id)
+{
+	struct kioctx *ioctx = ioctx_list;
+	struct mm_struct *mm = current->mm;
+
+	do {
+		if (ioctx->user_id == ctx_id && ioctx->mm == mm)
+			return ioctx;
+		ioctx = ioctx->next;
+	} while (ioctx);
+
+	return NULL;
+}
+
+static inline void put_ioctx(struct kioctx *ctx)
+{
+	// FIXME!!!
+	//aioctx_put(ctx);
+}
+
+/* aio_complete
+ *	Called when the io request on the given iocb is complete.
+ */
+void aio_complete(struct kiocb *iocb, long res, long res2)
+{
+	struct kioctx	*ctx = iocb->ctx;
+	struct aio_ring	*ring = ctx->ring;
+	struct io_event	*event;
+	unsigned long	flags;
+	unsigned long	tail;
+
+	/* add a completion event to the ring buffer.
+	 * must be done holding ctx->lock to prevent
+	 * other code from messing with the tail
+	 * pointer since we might be called from irq
+	 * context.
+	 */
+	spin_lock_irqsave(&ctx->lock, flags);
+
+	tail = ring->tail;
+	event = &ring->io_events[tail];
+	tail = (tail + 1) & ring->mask;
+
+	event->obj = (u64)(unsigned long)iocb->user_obj;
+	event->data = iocb->user_data;
+	event->res = res;
+	event->res2 = res2;
+
+	dprintk("aio_complete: %p[%lu]: %p: %Lx %Lx %lx %lx\n",
+		ctx, tail, iocb, iocb->user_obj, iocb->user_data, res, res2);
+
+	/* after flagging the request as done, we
+	 * must never even look at it again
+	 */
+	barrier();
+
+	ring->tail = tail;
+
+	wmb();
+	if (!ring->woke)
+		ring->woke = 1;
+
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
+#if 0
+	if (!wake) {
+		printk("kio_complete: should send user of %p a signal...\n", ctx);
+	}
+#endif
+
+	wake_up(&ctx->wait);
+
+	/* everything turned out well, dispose of the aiocb. */
+	aio_put_req(ctx, iocb);
+}
+
+/* aio_read_evt
+ *	Pull an event off of the aioctx's event ring.
+ *	FIXME: make this use cmpxchg.
+ *	TODO: make the ringbuffer user mmap()able (requires FIXME).
+ */
+static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
+{
+	struct aio_ring *ring = ioctx->ring;
+	unsigned long head;
+	int ret = -EAGAIN;
+
+	pr_debug("in aio_read_evt h%lu t%lu\n",
+		 (unsigned long)ring->head, (unsigned long)ring->tail);
+	barrier();
+	if (ring->head == ring->tail)
+		goto out;
+
+	spin_lock(&aio_read_lock);	/* investigate the value of making this per-ctx */
+
+	head = ring->head;
+	if (head != ring->tail) {
+		*ent = ring->io_events[head];
+		head = (head + 1) & ioctx->ring_mask;
+		barrier();
+		ring->head = head;
+		ret = 0;
+	}
+	spin_unlock(&aio_read_lock);
+
+out:
+	pr_debug("leaving aio_read_evt: %d  h%lu t%lu\n", ret,
+		 (unsigned long)ring->head, (unsigned long)ring->tail);
+	return ret;
+}
+
+struct timeout {
+	struct timer_list	timer;
+	int			timed_out;
+	wait_queue_head_t	wait;
+};
+
+static void timeout_func(unsigned long data)
+{
+	struct timeout *to = (struct timeout *)data;
+
+	to->timed_out = 1;
+	wake_up(&to->wait);
+}
+
+static inline void init_timeout(struct timeout *to)
+{
+	init_timer(&to->timer);
+	to->timer.data = (unsigned long)to;
+	to->timer.function = timeout_func;
+	to->timed_out = 0;
+	init_waitqueue_head(&to->wait);
+}
+
+static inline void set_timeout(struct timeout *to, struct timespec *ts)
+{
+	unsigned long how_long;
+
+	if (!ts->tv_sec && !ts->tv_nsec) {
+		to->timed_out = 1;
+		return;
+	}
+
+	how_long = ts->tv_sec * HZ;
+#define HZ_NS (1000000000 / HZ)
+	how_long += (ts->tv_nsec + HZ_NS - 1) / HZ_NS;
+	
+	to->timer.expires = jiffies + how_long;
+	add_timer(&to->timer);
+}
+
+static inline void clear_timeout(struct timeout *to)
+{
+	del_timer_sync(&to->timer);
+}
+
+static int read_events(struct kioctx *ctx, int nr, struct io_event *event,
+			struct timespec *timeout)
+{
+	struct task_struct	*tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	DECLARE_WAITQUEUE(to_wait, tsk);
+	int			ret;
+	int			i = 0;
+	struct io_event		ent;
+	struct timespec		ts;
+	struct timeout		to;
+
+	init_timeout(&to);
+
+	if (timeout) {
+		ret = -EFAULT;
+		if (copy_from_user(&ts, timeout, sizeof(ts)))
+			goto out;
+
+		set_timeout(&to, &ts);
+	}
+
+	memset(&ent, 0, sizeof(ent));
+	ret = 0;
+
+	while (i < nr) {
+		ret = aio_read_evt(ctx, &ent);
+		if (ret) {
+			if (i)
+				break;
+
+			ret = 0;
+			if (!i && !timeout)
+				break;
+
+			add_wait_queue(&ctx->wait, &wait);
+			add_wait_queue(&to.wait, &to_wait);
+			do {
+				set_task_state(tsk, TASK_INTERRUPTIBLE);
+
+				ret = aio_read_evt(ctx, &ent);
+				if (!ret)
+					break;
+				ret = -ETIMEDOUT;
+				if (to.timed_out)
+					break;
+				schedule();
+				if (to.timed_out)
+					break;
+				if (signal_pending(tsk)) {
+					ret = -EINTR;
+					break;
+				}
+				ret = aio_read_evt(ctx, &ent);
+			} while (ret) ;
+
+			set_task_state(tsk, TASK_RUNNING);
+			remove_wait_queue(&ctx->wait, &wait);
+			remove_wait_queue(&to.wait, &to_wait);
+		}
+
+		if (ret)
+			break;
+
+		pr_debug("read event: %Lx %Lx %Lx %Lx\n",
+			ent.data, ent.obj, ent.res, ent.res2);
+
+		/* FIXME: split checks in two */
+		ret = -EFAULT;
+		if (copy_to_user(event, &ent, sizeof(ent))) {
+			/* FIXME: we lose an event here. */
+			printk(KERN_DEBUG "aio: lost an event due to EFAULT.\n");
+			break;
+		}
+
+		/* Now complete the aio request and copy the result codes to userland. */
+		event ++;
+		i ++;
+	}
+
+	if (timeout)
+		clear_timeout(&to);
+out:
+	return i ? i : ret;
+}
+
+asmlinkage long sys___io_setup(unsigned nr_reqs, aio_context_t *ctxp)
+{
+	struct kioctx *ioctx = NULL;
+	unsigned long ctx;
+	long ret;
+
+	ret = get_user(ctx, ctxp);
+	if (ret)
+		goto out;
+
+	ret = -EINVAL;
+	if (ctx || nr_reqs > max_aio_reqs) {
+		pr_debug("EINVAL: io_setup: !ctx or nr_reqs > max\n");
+		goto out;
+	}
+
+	ioctx = ioctx_alloc(nr_reqs);
+	ret = PTR_ERR(ioctx);
+	if (!IS_ERR(ioctx)) {
+		ret = put_user(ioctx->user_id, ctxp);
+		if (!ret)
+			return 0;
+		aioctx_put(ioctx);
+	}
+
+out:
+	return ret;
+}
+
+/* aio_release
+ *	Free the aioctx associated with the file.  FIXME!
+ */
+asmlinkage long sys___io_destroy(aio_context_t ctx)
+{
+	struct kioctx *ioctx = get_ioctx(ctx);
+	if (ioctx) {
+		dprintk("aio_release(%p)\n", filp->private_data);
+		aioctx_put(ioctx);
+		return 0;
+	}
+	pr_debug("EINVAL: io_destroy: invalid context id\n");
+	return -EINVAL;
+}
+
+/* sys___io_submit
+ *	Copy an aiocb from userspace into kernel space, then convert it to
+ *	a kiocb, submit and repeat until done.  Error codes on copy/submit
+ *	only get returned for the first aiocb copied as otherwise the size
+ *	of aiocbs copied is returned (standard write sematics).
+ */
+asmlinkage long sys___io_submit(aio_context_t ctx_id, int nr, struct iocb **iocbpp)
+{
+	struct kioctx *ctx;
+	long ret = 0;
+	int i;
+
+	ctx = get_ioctx(ctx_id);
+	if (!ctx) {
+		pr_debug("EINVAL: io_submit: invalid context id\n");
+		return -EINVAL;
+	}
+
+	for (i=0; i<nr; i++) {
+		int (*op)(struct file *, struct kiocb *, struct iocb);
+		struct iocb *iocbp, tmp;
+		struct kiocb *req;
+		struct file *file;
+
+		ret = get_user(iocbp, iocbpp + i);
+		if (ret)
+			break;
+
+		ret = copy_from_user(&tmp, iocbp, sizeof(tmp));
+		if (ret)
+			break;
+
+		ret = -EINVAL;
+		/* enforce forwards compatibility on users */
+		if (tmp.aio_reserved1 || tmp.aio_reserved2 || tmp.aio_reserved3) {
+			pr_debug("EINVAL: io_submit: reserve field set\n");
+			break;
+		}
+
+		/* prevent overflows */
+		if ((tmp.aio_buf != (unsigned long)tmp.aio_buf) ||
+		    (tmp.aio_nbytes != (size_t)tmp.aio_nbytes) ||
+		    ((ssize_t)tmp.aio_nbytes < 0)) {
+			pr_debug("EINVAL: io_submit: overflow check\n");
+			break;
+		}
+
+		file = fget(tmp.aio_fildes);
+		ret = -EBADF;
+		if (!file)
+			break;
+
+		req = aio_get_req(ctx);
+		ret = -EAGAIN;
+		if (!req)
+			goto out_fput;
+
+		tmp.aio_key = req - ctx->reqs;
+		ret = put_user(tmp.aio_key, &iocbp->aio_key);
+		if (ret)
+			goto out_put_req;
+
+		req->user_obj = iocbp;
+		req->user_data = tmp.aio_data;
+
+		switch (tmp.aio_lio_opcode) {
+		case IOCB_CMD_PREAD:	op = file->f_op->aio_read; break;
+		case IOCB_CMD_PREADX:	op = file->f_op->aio_readx; break;
+		case IOCB_CMD_PWRITE:	op = file->f_op->aio_write; break;
+		case IOCB_CMD_FSYNC:	op = file->f_op->aio_fsync; break;
+		default:		op = NULL; break;
+		}
+		ret = -EINVAL;
+		if (!op) {
+			pr_debug("EINVAL: io_submit: no operation provided\n");
+			goto out_put_req;
+		}
+
+		ret = op(file, req, tmp);
+		if (!ret)
+			continue;
+
+		pr_debug("io_submit: op returned %ld\n", ret);
+
+	out_put_req:
+		aio_put_req(ctx, req);
+	out_fput:
+		fput(file);
+		break;
+	}
+
+	put_ioctx(ctx);
+	run_task_queue(&tq_disk);
+	return i ? i : ret;
+}
+
+void generic_aio_complete(void *_iocb, struct kvec *vec, ssize_t res)
+{
+	struct kiocb *iocb = _iocb;
+
+	aio_complete(iocb, res, 0);
+}
+
+ssize_t generic_aio_read(struct file *file, struct kiocb *req, struct iocb iocb, size_t min_size)
+{
+	unsigned long buf = iocb.aio_buf;
+	size_t size = iocb.aio_nbytes;
+	ssize_t	nr_read = 0;
+	loff_t pos = iocb.aio_offset;
+	kvec_cb_t cb;
+
+	if (file->f_op->new_read) {
+		nr_read = file->f_op->new_read(file, (void *)buf, size,
+					       &pos, F_ATOMIC);
+		if (-EAGAIN == nr_read)
+			nr_read = 0;
+		if ((nr_read >= min_size) || (nr_read < 0))
+			return nr_read;
+	}
+
+	req->nr_read = nr_read;
+	size -= nr_read;
+	buf += nr_read;
+	cb.vec = map_user_kvec(READ, buf, size);
+	cb.fn = generic_aio_complete;
+	cb.data = req;
+
+	printk("generic_aio_read: cb.vec=%p\n", cb.vec);
+	if (IS_ERR(cb.vec))
+		return nr_read ? nr_read : PTR_ERR(cb.vec);
+
+	return file->f_op->kvec_read(file, cb, size, pos);
+}
+
+ssize_t generic_file_aio_read(struct file *file, struct kiocb *req, struct iocb iocb)
+{
+	return generic_aio_read(file, req, iocb, iocb.aio_nbytes);	
+}
+
+ssize_t generic_aio_write(struct file *file, struct kiocb *req, struct iocb iocb, size_t min_size)
+{
+	unsigned long buf = iocb.aio_buf;
+	size_t size = iocb.aio_nbytes;
+	ssize_t	nr_written = 0;
+	kvec_cb_t cb;
+
+	if (file->f_op->new_write) {
+		nr_written = file->f_op->new_write(file, (void *)buf, size,
+					       &iocb.aio_offset, F_ATOMIC);
+		if (-EAGAIN == nr_written)
+			nr_written = 0;
+		if ((nr_written >= min_size) || (nr_written < 0))
+			return nr_written;
+	}
+
+	size -= nr_written;
+	buf += nr_written;
+	cb.vec = map_user_kvec(WRITE, buf, size);
+	cb.fn = generic_aio_complete;
+	cb.data = req;
+
+	if (IS_ERR(cb.vec))
+		return nr_written ? nr_written : PTR_ERR(cb.vec);
+
+	return file->f_op->kvec_write(file, cb, size, iocb.aio_offset);
+}
+
+ssize_t generic_file_aio_write(struct file *file, struct kiocb *req, struct iocb iocb)
+{
+	return generic_aio_write(file, req, iocb, iocb.aio_nbytes);	
+}
+
+asmlinkage long sys___io_cancel(aio_context_t ctx, struct iocb *iocb)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys___io_wait(aio_context_t ctx_id, struct iocb *iocb, struct timespec *timeout)
+{
+#if 0	/* FIXME.  later. */
+	struct kioctx *ioctx;
+	long ret = -EINVAL;
+	unsigned key;
+	long obj = (long)iocb;
+
+	ioctx = get_ioctx(ctx_id);
+	if (!ioctx)
+		goto out;
+
+	ret = get_user(key, &iocb->aio_key);
+	if (ret)
+		goto out;
+
+	ret = __aio_complete(ioctx, key, obj, !!timeout);
+	put_ioctx(ioctx);
+
+out:
+	return ret;
+#endif
+	return -ENOSYS;
+}
+
+asmlinkage long sys___io_getevents(int ctx_id, int nr, struct io_event *events,
+			struct timespec *timeout)
+{
+	struct kioctx *ioctx = get_ioctx(ctx_id);
+	long ret = -EINVAL;
+
+	if (ioctx) {
+		ret = read_events(ioctx, nr, events, timeout);
+		put_ioctx(ioctx);
+	}
+
+	return ret;
+}
+
+__initcall(aio_setup);
diff -urN /md0/kernels/2.4/v2.4.9-ac14/fs/buffer.c aio-v2.4.9-ac14.diff/fs/buffer.c
--- /md0/kernels/2.4/v2.4.9-ac14/fs/buffer.c	Mon Sep 24 02:14:15 2001
+++ aio-v2.4.9-ac14.diff/fs/buffer.c	Mon Sep 24 21:13:27 2001
@@ -141,8 +141,7 @@
 {
 	clear_bit(BH_Lock, &bh->b_state);
 	smp_mb__after_clear_bit();
-	if (waitqueue_active(&bh->b_wait))
-		wake_up(&bh->b_wait);
+	wake_up(&bh->b_wait);
 }
 
 /*
@@ -2066,6 +2065,7 @@
 	return tmp.b_blocknr;
 }
 
+#if 1
 /*
  * IO completion routine for a buffer_head being used for kiobuf IO: we
  * can't dispatch the kiobuf callback until io_count reaches 0.  
@@ -2242,6 +2242,7 @@
 		return transferred;
 	return err;
 }
+#endif
 
 /*
  * Start I/O on a page.
@@ -2873,3 +2874,223 @@
 
 module_init(bdflush_init)
 
+/* async kio interface */
+struct brw_cb {
+	kvec_cb_t		cb;
+	atomic_t		io_count;
+	int			nr;
+	struct buffer_head	*bh[1];
+};
+
+static inline void brw_cb_put(struct brw_cb *brw_cb)
+{
+	if (atomic_dec_and_test(&brw_cb->io_count)) {
+		ssize_t res = 0, err = 0;
+		int nr;
+
+		/* Walk the buffer heads associated with this kiobuf
+		 * checking for errors and freeing them as we go.
+		 */
+		for (nr=0; nr < brw_cb->nr; nr++) {
+			struct buffer_head *bh = brw_cb->bh[nr];
+			if (!err && buffer_uptodate(bh))
+				res += bh->b_size;
+			else
+				err = -EIO;
+			kmem_cache_free(bh_cachep, bh);
+		}
+
+		if (!res)
+			res = err;
+
+		brw_cb->cb.fn(brw_cb->cb.data, brw_cb->cb.vec, res);
+
+		kfree(brw_cb);
+	}
+}
+
+/*
+ * IO completion routine for a buffer_head being used for kiobuf IO: we
+ * can't dispatch the kiobuf callback until io_count reaches 0.  
+ */
+
+static void end_buffer_io_kiobuf_async(struct buffer_head *bh, int uptodate)
+{
+	struct brw_cb *brw_cb;
+	
+	mark_buffer_uptodate(bh, uptodate);
+
+	brw_cb = bh->b_private;
+	unlock_buffer(bh);
+
+	brw_cb_put(brw_cb);
+}
+
+
+/*
+ * Start I/O on a physical range of kernel memory, defined by a vector
+ * of kiobuf structs (much like a user-space iovec list).
+ *
+ * The kiobuf must already be locked for IO.  IO is submitted
+ * asynchronously: you need to check page->locked, page->uptodate, and
+ * maybe wait on page->wait.
+ *
+ * It is up to the caller to make sure that there are enough blocks
+ * passed in to completely map the iobufs to disk.
+ */
+
+int brw_kvec_async(int rw, kvec_cb_t cb, kdev_t dev, unsigned blocks, unsigned long blknr, int sector_shift)
+{
+	struct kvec	*vec = cb.vec;
+	struct kveclet	*veclet;
+	int		err;
+	int		length;
+	unsigned	sector_size = 1 << sector_shift;
+	int		i;
+
+	struct brw_cb	*brw_cb;
+
+	printk("vec: %p\n", vec);
+	if (!vec->nr)
+		BUG();
+
+	/* 
+	 * First, do some alignment and validity checks 
+	 */
+	length = 0;
+	for (veclet=vec->veclet, i=0; i < vec->nr; i++,veclet++) {
+		length += veclet->length;
+		if ((veclet->offset & (sector_size-1)) ||
+		    (veclet->length & (sector_size-1))) {
+			printk("brw_kiovec_async: tuple[%d]->offset=0x%x length=0x%x sector_size: 0x%x\n", i, veclet->offset, veclet->length, sector_size);
+			return -EINVAL;
+		}
+	}
+
+	if (length < (blocks << sector_shift))
+		BUG();
+
+	/* 
+	 * OK to walk down the iovec doing page IO on each page we find. 
+	 */
+	err = 0;
+
+	if (!blocks) {
+		printk("brw_kiovec_async: !i\n");
+		return -EINVAL;
+	}
+
+	/* FIXME: tie into userbeans here */
+	brw_cb = kmalloc(sizeof(*brw_cb) + (blocks * sizeof(struct buffer_head *)), GFP_KERNEL);
+	if (!brw_cb)
+		return -ENOMEM;
+
+	brw_cb->cb = cb;
+	brw_cb->nr = 0;
+
+	/* This is ugly.  FIXME. */
+	for (i=0, veclet=vec->veclet; i<vec->nr; i++,veclet++) {
+		struct page *page = veclet->page;
+		unsigned offset = veclet->offset;
+		unsigned length = veclet->length;
+
+		if (!page)
+			BUG();
+
+		while (length > 0) {
+			struct buffer_head *tmp;
+			tmp = kmem_cache_alloc(bh_cachep, GFP_NOIO);
+			err = -ENOMEM;
+			if (!tmp)
+				goto error;
+
+			memset(tmp, 0, sizeof(*tmp));
+			init_waitqueue_head(&tmp->b_wait);
+			tmp->b_dev = B_FREE;
+			tmp->b_size = sector_size;
+			set_bh_page(tmp, page, offset);
+			tmp->b_this_page = tmp;
+
+			init_buffer(tmp, end_buffer_io_kiobuf_async, NULL);
+			tmp->b_dev = dev;
+			tmp->b_blocknr = blknr++;
+			tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock)
+					| (1 << BH_Req);
+			tmp->b_private = brw_cb;
+
+			if (rw == WRITE) {
+				set_bit(BH_Uptodate, &tmp->b_state);
+				clear_bit(BH_Dirty, &tmp->b_state);
+			}
+
+			brw_cb->bh[brw_cb->nr++] = tmp;
+			length -= sector_size;
+			offset += sector_size;
+
+			if (offset >= PAGE_SIZE) {
+				offset = 0;
+				break;
+			}
+
+			if (brw_cb->nr >= blocks)
+				goto submit;
+		} /* End of block loop */
+	} /* End of page loop */		
+
+submit:
+	atomic_set(&brw_cb->io_count, brw_cb->nr+1);
+	/* okay, we've setup all our io requests, now fire them off! */
+	for (i=0; i<brw_cb->nr; i++) 
+		submit_bh(rw, brw_cb->bh[i]);
+	brw_cb_put(brw_cb);
+
+	return 0;
+
+error:
+	/* Walk brw_cb_table freeing all the goop associated with each kiobuf */
+	if (brw_cb) {
+		/* We got an error allocating the bh'es.  Just free the current
+		   buffer_heads and exit. */
+		for (i = brw_cb->nr-1; i--; )
+			kmem_cache_free(bh_cachep, brw_cb->bh[i]);
+		kfree(brw_cb);
+	}
+
+	return err;
+}
+#if 0
+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
+		kdev_t dev, int nr_blocks, unsigned long b[], int sector_size)
+{
+	int i;
+	int transferred = 0;
+	int err = 0;
+
+	if (!nr)
+		return 0;
+
+	/* queue up and trigger the io */
+	err = brw_kiovec_async(rw, nr, iovec, dev, nr_blocks, b, sector_size);
+	if (err)
+		goto out;
+
+	/* wait on the last iovec first -- it's more likely to finish last */
+	for (i=nr; --i >= 0; )
+		kiobuf_wait_for_io(iovec[i]);
+
+	run_task_queue(&tq_disk);
+
+	/* okay, how much data actually got through? */
+	for (i=0; i<nr; i++) {
+		if (iovec[i]->errno) {
+			if (!err)
+				err = iovec[i]->errno;
+			break;
+		}
+		transferred += iovec[i]->length;
+	}
+
+out:
+	return transferred ? transferred : err;
+}
+#endif
diff -urN /md0/kernels/2.4/v2.4.9-ac14/fs/ext2/file.c aio-v2.4.9-ac14.diff/fs/ext2/file.c
--- /md0/kernels/2.4/v2.4.9-ac14/fs/ext2/file.c	Mon Sep 24 02:14:15 2001
+++ aio-v2.4.9-ac14.diff/fs/ext2/file.c	Tue Sep 25 14:02:13 2001
@@ -47,6 +47,10 @@
 	open:		generic_file_open,
 	release:	ext2_release_file,
 	fsync:		ext2_sync_file,
+	aio_read:	generic_file_aio_read,
+	aio_write:	generic_file_aio_write,
+	kvec_read:	generic_file_kvec_read,
+	kvec_write:	generic_file_kvec_write,
 };
 
 struct inode_operations ext2_file_inode_operations = {
diff -urN /md0/kernels/2.4/v2.4.9-ac14/fs/nfs/file.c aio-v2.4.9-ac14.diff/fs/nfs/file.c
--- /md0/kernels/2.4/v2.4.9-ac14/fs/nfs/file.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/fs/nfs/file.c	Mon Sep 24 19:09:13 2001
@@ -50,6 +50,7 @@
 	release:	nfs_release,
 	fsync:		nfs_fsync,
 	lock:		nfs_lock,
+        //rw_kiovec:      generic_file_rw_kiovec,
 };
 
 struct inode_operations nfs_file_inode_operations = {
diff -urN /md0/kernels/2.4/v2.4.9-ac14/fs/select.c aio-v2.4.9-ac14.diff/fs/select.c
--- /md0/kernels/2.4/v2.4.9-ac14/fs/select.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/fs/select.c	Mon Sep 24 19:11:26 2001
@@ -12,23 +12,31 @@
  *  24 January 2000
  *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
  *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
+ *  June 2001
+ *    Added async_poll implementation. -ben
  */
 
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/poll.h>
 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
 #include <linux/file.h>
+#include <linux/aio.h>
+#include <linux/init.h>
 
 #include <asm/uaccess.h>
 
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
+static kmem_cache_t *poll_table_cache;
+
 struct poll_table_entry {
-	struct file * filp;
-	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
+	wait_queue_t		wait;
+	wait_queue_head_t	*wait_address;
+	struct file		*filp;
+	poll_table		*p;
 };
 
 struct poll_table_page {
@@ -72,6 +80,72 @@
 	}
 }
 
+void async_poll_complete(void *data)
+{
+	poll_table	*p = data, *pwait;
+	struct kiocb	*iocb = p->iocb;
+	unsigned int	mask;
+
+	pwait = p;
+	p->wake = 0;
+	wmb();
+	do {
+		mask = iocb->filp->f_op->poll(iocb->filp, p);
+		mask &= p->events | POLLERR | POLLHUP;
+		if (mask) {
+			poll_freewait(p);
+			aio_complete(iocb, mask, 0);
+			return;
+		}
+		p->sync = 0;
+		wmb();
+	} while (p->wake);
+
+}
+
+static void async_poll_waiter(wait_queue_t *wait)
+{
+	struct poll_table_entry *entry = (struct poll_table_entry *)wait;
+	poll_table *p = entry->p;
+
+	/* avoid writes to the cacheline if possible for SMP */
+	if (!p->wake) {
+		p->wake = 1;
+		/* ensure only one wake up queues the wtd */
+		if (!p->sync && !test_and_set_bit(0, &p->sync))
+			wtd_queue(&p->wtd);
+	}
+}
+
+int async_poll(struct kiocb *iocb, int events)
+{
+	unsigned int mask;
+	poll_table *p, *pwait;
+
+	p = kmem_cache_alloc(poll_table_cache, SLAB_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	poll_initwait(p);
+	wtd_set_action(&p->wtd, async_poll_complete, p);
+	p->iocb = iocb;
+	p->wake = 0;
+	p->sync = 0;
+	p->events = events;
+	pwait = p;
+
+	mask = DEFAULT_POLLMASK;
+	if (iocb->filp->f_op && iocb->filp->f_op->poll)
+		mask = iocb->filp->f_op->poll(iocb->filp, p);
+	mask &= events | POLLERR | POLLHUP;
+	if (mask) {
+		poll_freewait(p);
+		aio_complete(iocb, mask, 0);
+	}
+
+	return 0;
+}
+
 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 {
 	struct poll_table_page *table = p->table;
@@ -98,7 +172,11 @@
 	 	get_file(filp);
 	 	entry->filp = filp;
 		entry->wait_address = wait_address;
-		init_waitqueue_entry(&entry->wait, current);
+		entry->p = p;
+		if (p->iocb)
+			init_waitqueue_func_entry(&entry->wait, async_poll_waiter);
+		else
+			init_waitqueue_entry(&entry->wait, current);
 		add_wait_queue(wait_address,&entry->wait);
 	}
 }
@@ -494,3 +572,14 @@
 	poll_freewait(&table);
 	return err;
 }
+
+static int __init poll_init(void)
+{
+	poll_table_cache = kmem_cache_create("poll table",
+                        sizeof(poll_table), 0, 0, NULL, NULL);
+	if (!poll_table_cache)
+		panic("unable to alloc poll_table_cache");
+	return 0;
+}
+
+module_init(poll_init);
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/asm-i386/errno.h aio-v2.4.9-ac14.diff/include/asm-i386/errno.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/asm-i386/errno.h	Mon Feb 26 10:20:14 2001
+++ aio-v2.4.9-ac14.diff/include/asm-i386/errno.h	Mon Sep 24 19:09:13 2001
@@ -128,5 +128,6 @@
 
 #define	ENOMEDIUM	123	/* No medium found */
 #define	EMEDIUMTYPE	124	/* Wrong medium type */
+#define	ENOAIO		125	/* fd does not support aio */
 
 #endif
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/asm-i386/unistd.h aio-v2.4.9-ac14.diff/include/asm-i386/unistd.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/asm-i386/unistd.h	Fri Aug 11 17:39:23 2000
+++ aio-v2.4.9-ac14.diff/include/asm-i386/unistd.h	Mon Sep 24 19:09:13 2001
@@ -227,9 +227,18 @@
 #define __NR_madvise1		219	/* delete when C lib stub is removed */
 #define __NR_getdents64		220
 #define __NR_fcntl64		221
+/* reserved for tux	222 */
+#define __NR___io_setup		223
+#define __NR___io_destroy	224
+#define __NR___io_getevents	225
+#define __NR___io_submit	226
+#define __NR___io_cancel	227
+#define __NR___io_wait		228
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
-
+#ifdef NO_SYSCALL_ERRNO
+#define __syscall_return(type, res)	return (type)(res)
+#else
 #define __syscall_return(type, res) \
 do { \
 	if ((unsigned long)(res) >= (unsigned long)(-125)) { \
@@ -238,6 +247,7 @@
 	} \
 	return (type) (res); \
 } while (0)
+#endif
 
 /* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */
 #define _syscall0(type,name) \
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/aio.h aio-v2.4.9-ac14.diff/include/linux/aio.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/aio.h	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/include/linux/aio.h	Mon Sep 24 21:54:55 2001
@@ -0,0 +1,131 @@
+/* linux/aio.h
+ *	Written by Benjamin LaHaise <bcrl@redhat.com>
+ */
+#ifndef __LINUX__AIO_H
+#define __LINUX__AIO_H
+
+#include <asm/byteorder.h>
+
+typedef unsigned long	aio_context_t;
+
+enum {
+	IOCB_CMD_PREAD = 0,
+	IOCB_CMD_PWRITE = 1,
+	IOCB_CMD_FSYNC = 2,
+	IOCB_CMD_FDSYNC = 3,
+	IOCB_CMD_PREADX = 4,
+};
+
+/* read() from /dev/aio returns these structures. */
+struct io_event {
+	__u64		data;		/* the data field from the iocb */
+	__u64		obj;		/* what iocb this event came from */
+	__s64		res;		/* result code for this event */
+	__s64		res2;		/* secondary result */
+};
+
+struct aio_ring {
+	__u32			id;	/* kernel internal index number */
+	__u32			mask;	/* number of io_events - 1 */
+	__u32			head;
+	__u32			tail;
+
+	__u32			woke;	/* set when a wakeup was sent */
+	__u32			pad1;
+	__u32			pad2;
+	__u32			pad3;
+
+	__u32			pad4[24];	/* pad out to 128 bytes */
+
+	struct io_event		io_events[0];
+}; /* 128 bytes + ring size */
+
+#if defined(__LITTLE_ENDIAN)
+#define PADDED(x,y)	x, y
+#elif defined(__BIG_ENDIAN)
+#define PADDED(x,y)	y, x
+#else
+#error edit for your odd byteorder.
+#endif
+
+/*
+ * we always use a 64bit off_t when communicating
+ * with userland.  its up to libraries to do the
+ * proper padding and aio_error abstraction
+ */
+
+struct iocb {
+	/* these are internal to the kernel/libc. */
+	__u64	aio_data;	/* data to be returned in event's data */
+	__u32	PADDED(aio_key, aio_reserved1);
+				/* the kernel sets aio_key to the req # */
+
+	/* common fields */
+	__u16	aio_lio_opcode;	/* see IOCB_CMD_ above */
+	__s16	aio_reqprio;
+	__u32	aio_fildes;
+
+	__u64	aio_buf;
+	__u64	aio_nbytes;
+	__s64	aio_offset;
+
+	/* extra parameters */
+	__u64	aio_reserved2;
+	__u64	aio_reserved3;
+}; /* 64 bytes */
+
+#undef IFBIG
+#undef IFLITTLE
+
+#ifdef __KERNEL__
+#ifndef __LINUX__KIOVEC_H
+#include <linux/kiovec.h>
+#endif
+#include <asm/atomic.h>
+
+#define AIO_MAXSEGS		4
+#define AIO_KIOGRP_NR_ATOMIC	8
+
+struct kioctx;
+
+struct kiocb {
+	void		(*cancel)(void *data, struct kioctx *ctx, int idx);
+	struct file	*filp;
+	struct kioctx	*ctx;
+	void		*user_obj;
+	__u64		user_data;
+	ssize_t		nr_read;
+};
+
+struct kioctx {
+	atomic_t		users;
+
+	/* This needs improving */
+	unsigned long		user_id;
+	struct kioctx		*next;
+	struct mm_struct	*mm;
+
+	wait_queue_head_t	wait;
+
+	spinlock_t		lock;
+
+	struct kiocb		*reqs;
+	struct kiocb		*free_req;
+
+	unsigned		max_reqs;
+	unsigned		ring_mask;
+	struct aio_ring		*ring;
+};
+
+extern struct file_operations aio_fops;
+
+extern void aio_complete(struct kiocb *iocb, long res, long res2);
+extern void __aioctx_put(struct kioctx *ctx);
+
+#define aioctx_get(kioctx)	atomic_inc(&(kioctx)->users)
+#define aioctx_put(kioctx)	do { if (atomic_dec_and_test(&(kioctx)->users)) __aioctx_put(kioctx); } while (0)
+
+#endif /*__KERNEL__*/
+
+#endif /* __AIO_H__ */
+
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/brlock.h aio-v2.4.9-ac14.diff/include/linux/brlock.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/brlock.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/linux/brlock.h	Mon Sep 24 21:55:50 2001
@@ -34,6 +34,7 @@
 enum brlock_indices {
 	BR_GLOBALIRQ_LOCK,
 	BR_NETPROTO_LOCK,
+	BR_AIO_LOCK,
 
 	__BR_END
 };
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/compiler.h aio-v2.4.9-ac14.diff/include/linux/compiler.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/compiler.h	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/include/linux/compiler.h	Mon Sep 24 02:16:05 2001
@@ -0,0 +1,16 @@
+#ifndef __LINUX_COMPILER_H
+#define __LINUX_COMPILER_H
+
+/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
+   a mechanism by which the user can annotate likely branch directions and
+   expect the blocks to be reordered appropriately.  Define __builtin_expect
+   to nothing for earlier compilers.  */
+
+#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
+#define __builtin_expect(x, expected_value) (x)
+#endif
+
+#define likely(x)	__builtin_expect((x),1)
+#define unlikely(x)	__builtin_expect((x),0)
+
+#endif /* __LINUX_COMPILER_H */
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/event.h aio-v2.4.9-ac14.diff/include/linux/event.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/event.h	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/include/linux/event.h	Mon Sep 24 19:09:13 2001
@@ -0,0 +1,21 @@
+#ifndef _LINUX_KEVENTQ_H
+#define _LINUX_KEVENTQ_H
+
+typedef struct file *keventq_t;
+
+keventq_t keventq_get(int qid);
+#define keventq_put(evq)	fput(evq)
+
+keventq_t keventq_get(int qid)
+{
+	struct file *filp = fget(qid);
+	if (filp) {
+		if (&keventq_fops == filp->f_op)
+			return filp;
+		fput(filp);
+	}
+	return NULL;
+}
+
+
+#endif
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/fs.h aio-v2.4.9-ac14.diff/include/linux/fs.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/fs.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/linux/fs.h	Tue Sep 25 14:06:25 2001
@@ -20,7 +20,6 @@
 #include <linux/stat.h>
 #include <linux/cache.h>
 #include <linux/stddef.h>
-#include <linux/string.h>
 
 #include <asm/atomic.h>
 #include <asm/bitops.h>
@@ -803,7 +802,21 @@
  * NOTE:
  * read, write, poll, fsync, readv, writev can be called
  *   without the big kernel lock held in all filesystems.
- */
+ *
+ * rw_kiovec returns the number of bytes that will actually
+ * be transferred into the kiovec, or an error that occurred
+ * during queueing.
+ */
+struct iocb;
+struct kioctx;
+struct kiocb;
+struct kiobuf;
+#include <linux/aio.h>	/* FIXME */
+#include <linux/kiovec.h>
+
+#define F_ATOMIC	0x0001
+#define F_OFFSETOK	0x0002
+
 struct file_operations {
 	struct module *owner;
 	loff_t (*llseek) (struct file *, loff_t, int);
@@ -823,6 +836,20 @@
 	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+
+	/* this will replace read/write ops above in 2.5 */
+	ssize_t (*new_read) (struct file *, char *, size_t, loff_t *, int);
+	ssize_t (*new_write) (struct file *, char *, size_t, loff_t *, int);
+
+	ssize_t (*aio_read)(struct file *, struct kiocb *, struct iocb);
+	ssize_t (*aio_readx)(struct file *, struct kiocb *, struct iocb);
+	ssize_t (*aio_write)(struct file *, struct kiocb *, struct iocb);
+	ssize_t (*aio_fsync)(struct file *, struct kiocb *, struct iocb);
+
+	/* in-kernel async api */
+	int (*kvec_read)(struct file *, kvec_cb_t, size_t, loff_t);
+	int (*kvec_write)(struct file *, kvec_cb_t, size_t, loff_t);
 };
 
 struct inode_operations {
@@ -1401,6 +1428,12 @@
 				unsigned long *);
 extern int block_sync_page(struct page *);
 
+extern int generic_aio_read(struct file *, struct kiocb *, struct iocb, size_t);
+extern int generic_aio_write(struct file *, struct kiocb *, struct iocb, size_t);
+extern int generic_file_aio_read(struct file *, struct kiocb *, struct iocb);
+extern int generic_file_aio_write(struct file *, struct kiocb *, struct iocb);
+extern int generic_file_kvec_read(struct file *, kvec_cb_t, size_t, loff_t);
+extern int generic_file_kvec_write(struct file *, kvec_cb_t, size_t, loff_t);
 int generic_block_bmap(struct address_space *, long, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
@@ -1411,6 +1444,7 @@
 extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
 extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
 extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
+extern int generic_file_rw_kiovec(struct file *filp, int rw, int nr, struct kiobuf **kiovec, int flags, size_t size, loff_t pos);
 
 extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *);
 extern loff_t generic_file_llseek(struct file *, loff_t, int);
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/iobuf.h aio-v2.4.9-ac14.diff/include/linux/iobuf.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/iobuf.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/linux/iobuf.h	Tue Sep 25 14:09:31 2001
@@ -53,8 +53,10 @@
 
 	/* Dynamic state for IO completion: */
 	atomic_t	io_count;	/* IOs still in progress */
+	int		transferred;	/* Number of bytes of completed IO at the beginning of the buffer */
 	int		errno;		/* Status of completed IO */
 	void		(*end_io) (struct kiobuf *); /* Completion callback */
+	void		*end_io_data;
 	wait_queue_head_t wait_queue;
 };
 
@@ -80,6 +82,8 @@
 
 /* fs/buffer.c */
 
+int	brw_kiovec_async(int rw, int nr, struct kiobuf *iovec[], 
+		   kdev_t dev, int nr_blocks, unsigned long b[], int size);
 int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
 		   kdev_t dev, unsigned long b[], int size);
 
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/kiovec.h aio-v2.4.9-ac14.diff/include/linux/kiovec.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/kiovec.h	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/include/linux/kiovec.h	Mon Sep 24 19:14:01 2001
@@ -0,0 +1,36 @@
+#ifndef __LINUX__IOBUF_H
+#define __LINUX__IOBUF_H
+
+struct page;
+
+struct kveclet {
+	struct page	*page;
+	unsigned	offset;
+	unsigned	length;
+};
+
+struct kvec {
+	unsigned	max_nr;
+	unsigned	nr;
+	struct kveclet	veclet[0];
+};
+
+struct kvec_cb {
+	struct kvec	*vec;
+	void		(*fn)(void *data, struct kvec *vec, ssize_t res);
+	void		*data;
+};
+
+#ifndef _LINUX_TYPES_H
+#include <linux/types.h>
+#endif
+#ifndef _LINUX_KDEV_T_H
+#include <linux/kdev_t.h>
+#endif
+
+extern struct kvec *map_user_kvec(int rw, unsigned long va, size_t len);
+extern void unmap_kvec(struct kvec *);
+extern int brw_kvec_async(int rw, kvec_cb_t cb, kdev_t dev, unsigned count,
+			  unsigned long blknr, int sector_shift);
+
+#endif
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/lib_lio.h aio-v2.4.9-ac14.diff/include/linux/lib_lio.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/lib_lio.h	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/include/linux/lib_lio.h	Mon Sep 24 19:09:13 2001
@@ -0,0 +1,108 @@
+#ifndef __LIB_LIO_H
+#define __LIB_LIO_H
+
+struct timespec;
+struct sockaddr;
+struct iovec;
+
+
+typedef enum lio_iocb_cmd {
+
+	LIO_CMD_PREAD,
+	LIO_CMD_PWRITE,
+	LIO_CMD_ACCEPT,
+	LIO_CMD_CONNECT,
+	LIO_CMD_SENDTO,
+	LIO_CMD_RECVFROM,
+
+	LIO_CMD_POLL,
+} lio_iocb_cmd_t;
+
+struct lio_iocb_sendto {
+	void	*msg;
+	int	len;
+	int	flags;
+	struct sockaddr	*addr;
+};
+
+struct lio_iocb_poll {
+	int	events;
+};	/* result code is the set of result flags or -'ve errno */
+
+struct lio_iocb_sockaddr {
+	struct sockaddr *addr;
+	int		len;
+};	/* result code is the length of the sockaddr, or -'ve errno */
+
+struct lio_iocb_common {
+	void		*buf;
+	long		nbytes;
+	long long	offset;
+};	/* result code is the amount read or -'ve errno */
+
+struct lio_iocb_vector {
+	const struct iovec	*vec;
+	int			nr;
+	long long		offset;
+};	/* result code is the amount read or -'ve errno */
+
+typedef struct lio_iocb {
+	long	key;		/* For use in identifying io requests */
+	void	*data;		/* Return in the io completion event */
+	int	aio_fildes;
+	short	aio_reqprio;
+	short	aio_lio_opcode;	
+
+	union {
+		struct lio_iocb_common		c;
+		struct lio_iocb_vector		v;
+		struct lio_iocb_poll		poll;
+		struct lio_iocb_sockaddr	saddr;
+	} u;
+} lio_iocb_t;
+
+typedef void (*lio_callback_t)(int qid, lio_iocb_t *iocb, long result);
+
+extern int lio_queue_init(int maxevents);
+extern int lio_queue_grow(int qid, int new_maxevents);
+extern int lio_queue_release(int qid);
+extern int lio_queue_wait(int qid, struct timespec *timeout);
+extern int lio_queue_run(int qid);
+extern int lio_submit(int qid, int nr, lio_iocb_t *ios[]);
+
+static inline void lio_prep_accept(lio_iocb_t *iocb, int s, struct sockaddr *addr, int addrlen)
+{
+	iocb->aio_fildes = s;
+	iocb->aio_lio_opcode = LIO_CMD_ACCEPT;
+	iocb->aio_reqprio = 0;
+	iocb->u.c.buf = addr;
+	iocb->u.c.nbytes = addrlen;
+	iocb->u.c.offset = 0;
+}
+
+static inline void lio_prep_pread(lio_iocb_t *iocb, int fd, void *buf, long count, long long offset)
+{
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = LIO_CMD_PREAD;
+	iocb->aio_reqprio = 0;
+	iocb->u.c.buf = buf;
+	iocb->u.c.nbytes = count;
+	iocb->u.c.offset = offset;
+}
+
+static inline void lio_prep_poll(lio_iocb_t *iocb, lio_callback_t *cb, int fd, int events)
+{
+	iocb->data = cb;
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = LIO_CMD_POLL;
+	iocb->aio_reqprio = 0;
+	iocb->u.poll.events = events;
+}
+
+static inline int lio_poll(int qid, lio_iocb_t *iocb, lio_callback_t *cb, int fd, int events)
+{
+	lio_prep_poll(iocb, cb, fd, events);
+	return lio_submit(qid, 1, &iocb);
+}
+
+#endif
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/mm.h aio-v2.4.9-ac14.diff/include/linux/mm.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/mm.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/linux/mm.h	Tue Sep 25 14:09:29 2001
@@ -322,8 +322,7 @@
 					smp_mb__before_clear_bit(); \
 					if (!test_and_clear_bit(PG_locked, &(page)->flags)) BUG(); \
 					smp_mb__after_clear_bit(); \
-					if (waitqueue_active(&(page)->wait)) \
-						wake_up(&(page)->wait); \
+					wake_up(&(page)->wait); \
 				} while (0)
 #define PageError(page)		test_bit(PG_error, &(page)->flags)
 #define SetPageError(page)	set_bit(PG_error, &(page)->flags)
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/net.h aio-v2.4.9-ac14.diff/include/linux/net.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/net.h	Mon Sep 24 21:54:50 2001
+++ aio-v2.4.9-ac14.diff/include/linux/net.h	Mon Sep 24 21:54:55 2001
@@ -83,6 +83,9 @@
 struct scm_cookie;
 struct vm_area_struct;
 struct page;
+struct iocb;
+struct kioctx;
+#include <linux/aio.h>		/* shut gcc up */
 
 struct proto_ops {
   int	family;
@@ -110,6 +113,7 @@
   int   (*recvmsg)	(struct socket *sock, struct msghdr *m, int total_len, int flags, struct scm_cookie *scm);
   int	(*mmap)		(struct file *file, struct socket *sock, struct vm_area_struct * vma);
   ssize_t (*sendpage)	(struct socket *sock, struct page *page, int offset, size_t size, int flags);
+  int   (*begin_read)    (struct socket *sock, struct kioctx *ctx, struct iocb iocb, struct iocb *iocbptr);
 };
 
 struct net_proto_family 
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/poll.h aio-v2.4.9-ac14.diff/include/linux/poll.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/poll.h	Mon Sep 24 21:54:56 2001
+++ aio-v2.4.9-ac14.diff/include/linux/poll.h	Tue Sep 25 14:09:31 2001
@@ -7,14 +7,25 @@
 
 #include <linux/wait.h>
 #include <linux/string.h>
+#ifndef __LINUX__MM_H
 #include <linux/mm.h>
+#endif
 #include <asm/uaccess.h>
+#ifndef __LINUX__WORKTODO_H
+#include <linux/worktodo.h>
+#endif
 
 struct poll_table_page;
+struct kiocb;
 
 typedef struct poll_table_struct {
-	int error;
-	struct poll_table_page * table;
+	struct worktodo		wtd;
+	int			error;
+	struct poll_table_page	*table;
+	struct kiocb		*iocb;		/* iocb for async poll */
+	int			events;		/* event mask for async poll */
+	int			wake;
+	long			sync;
 } poll_table;
 
 extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
@@ -29,7 +40,9 @@
 {
 	pt->error = 0;
 	pt->table = NULL;
+	pt->iocb = NULL;
 }
+
 extern void poll_freewait(poll_table* pt);
 
 
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/sched.h aio-v2.4.9-ac14.diff/include/linux/sched.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/sched.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/linux/sched.h	Tue Sep 25 14:09:20 2001
@@ -770,6 +770,7 @@
 
 extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
+extern void FASTCALL(add_wait_queue_exclusive_lifo(wait_queue_head_t *q, wait_queue_t * wait));
 extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
 
 #define __wait_event(wq, condition) 					\
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/tqueue.h aio-v2.4.9-ac14.diff/include/linux/tqueue.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/tqueue.h	Mon Sep 24 21:54:51 2001
+++ aio-v2.4.9-ac14.diff/include/linux/tqueue.h	Mon Sep 24 21:54:55 2001
@@ -67,6 +67,7 @@
 #define TQ_ACTIVE(q)		(!list_empty(&q))
 
 extern task_queue tq_timer, tq_immediate, tq_disk;
+extern struct tq_struct run_disk_tq;
 
 /*
  * To implement your own list of active bottom halfs, use the following
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/types.h aio-v2.4.9-ac14.diff/include/linux/types.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/types.h	Mon Sep 24 21:54:50 2001
+++ aio-v2.4.9-ac14.diff/include/linux/types.h	Mon Sep 24 19:14:01 2001
@@ -127,4 +127,9 @@
 	char			f_fpack[6];
 };
 
+/* kernel typedefs -- they belong here. */
+#ifdef __KERNEL__
+typedef struct kvec_cb kvec_cb_t;
+#endif /* __KERNEL__ */
+
 #endif /* _LINUX_TYPES_H */
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/wait.h aio-v2.4.9-ac14.diff/include/linux/wait.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/wait.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/linux/wait.h	Mon Sep 24 21:54:55 2001
@@ -28,17 +28,20 @@
 #define WAITQUEUE_DEBUG 0
 #endif
 
+typedef struct __wait_queue wait_queue_t;
+typedef void (*wait_queue_func_t)(wait_queue_t *wait);
+
 struct __wait_queue {
 	unsigned int flags;
 #define WQ_FLAG_EXCLUSIVE	0x01
 	struct task_struct * task;
 	struct list_head task_list;
+	wait_queue_func_t func;
 #if WAITQUEUE_DEBUG
 	long __magic;
 	long __waker;
 #endif
 };
-typedef struct __wait_queue wait_queue_t;
 
 /*
  * 'dual' spinlock architecture. Can be switched between spinlock_t and
@@ -137,6 +140,7 @@
 #endif
 
 #define __WAITQUEUE_INITIALIZER(name, tsk) {				\
+	func:		NULL,						\
 	task:		tsk,						\
 	task_list:	{ NULL, NULL },					\
 			 __WAITQUEUE_DEBUG_INIT(name)}
@@ -174,6 +178,22 @@
 #endif
 	q->flags = 0;
 	q->task = p;
+	q->func = NULL;
+#if WAITQUEUE_DEBUG
+	q->__magic = (long)&q->__magic;
+#endif
+}
+
+static inline void init_waitqueue_func_entry(wait_queue_t *q,
+					wait_queue_func_t func)
+{
+#if WAITQUEUE_DEBUG
+	if (!q || !p)
+		WQ_BUG();
+#endif
+	q->flags = 0;
+	q->task = NULL;
+	q->func = func;
 #if WAITQUEUE_DEBUG
 	q->__magic = (long)&q->__magic;
 #endif
@@ -231,6 +251,19 @@
 	list_del(&old->task_list);
 }
 
+#define add_wait_queue_cond(q, wait, cond, fail) \
+	do {							\
+		unsigned long flags;				\
+		wq_write_lock_irqsave(&(q)->lock, flags);	\
+		(wait)->flags = 0;				\
+		if (cond)					\
+			__add_wait_queue((q), (wait));		\
+		else {						\
+			fail;					\
+		}						\
+		wq_write_unlock_irqrestore(&(q)->lock, flags);	\
+	} while (0)
+
 #endif /* __KERNEL__ */
 
 #endif
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/linux/worktodo.h aio-v2.4.9-ac14.diff/include/linux/worktodo.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/linux/worktodo.h	Wed Dec 31 19:00:00 1969
+++ aio-v2.4.9-ac14.diff/include/linux/worktodo.h	Mon Sep 24 21:54:56 2001
@@ -0,0 +1,39 @@
+#ifndef __LINUX__WORKTODO_H
+#define __LINUX__WORKTODO_H
+
+#ifndef _LINUX_WAIT_H
+#include <linux/wait.h>
+#endif
+#ifndef _LINUX_TQUEUE_H
+#include <linux/tqueue.h>
+#endif
+
+struct worktodo {
+	wait_queue_t		wait;
+	struct tq_struct	tq;
+
+	void *data;	/* for use by the wtd_ primatives */
+};
+
+/* FIXME NOTE: factor from kernel/context.c */
+#define wtd_queue(wtd)	schedule_task(&(wtd)->tq)
+
+#define wtd_set_action(wtd, action, wtddata)	\
+	do {					\
+		(wtd)->tq.routine = (action);	\
+		(wtd)->tq.data = (wtddata);	\
+	} while (0)
+
+struct page;
+extern void wtd_wait_page(struct worktodo *wtd, struct page *page);
+extern void wtd_lock_page(struct worktodo *wtd, struct page *page);
+struct buffer_head;
+extern void wtd_wait_on_buffer(struct worktodo *wtd, struct buffer_head *bh);
+
+#if 0	/* not implemented yet */
+extern void wtd_down(struct worktodo *wtd, struct semaphore *sem);
+extern void wtd_down_write(struct worktodo *wtd, struct rw_semaphore *sem);
+extern void wtd_down_read(struct worktodo *wtd, struct rw_semaphore *sem);
+#endif
+
+#endif /* __LINUX__WORKTODO_H */
diff -urN /md0/kernels/2.4/v2.4.9-ac14/include/net/sock.h aio-v2.4.9-ac14.diff/include/net/sock.h
--- /md0/kernels/2.4/v2.4.9-ac14/include/net/sock.h	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/include/net/sock.h	Tue Sep 25 14:09:31 2001
@@ -692,6 +692,10 @@
 	(__skb)->next = NULL;				\
 } while(0)
 
+struct kioctx;
+struct iocb;
+#include <linux/aio.h>	/* FIXME */
+
 /* IP protocol blocks we attach to sockets.
  * socket layer -> transport layer interface
  * transport -> network interface is defined by struct inet_proto
@@ -721,6 +725,8 @@
 	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
 					int len, int noblock, int flags, 
 					int *addr_len);
+	int			(*begin_read)(struct sock *, struct kioctx *,
+					struct iocb, struct iocb *);
 	int			(*bind)(struct sock *sk, 
 					struct sockaddr *uaddr, int addr_len);
 
diff -urN /md0/kernels/2.4/v2.4.9-ac14/kernel/context.c aio-v2.4.9-ac14.diff/kernel/context.c
--- /md0/kernels/2.4/v2.4.9-ac14/kernel/context.c	Fri May 25 22:48:10 2001
+++ aio-v2.4.9-ac14.diff/kernel/context.c	Mon Sep 24 19:09:13 2001
@@ -91,12 +91,18 @@
 	 */
 	for (;;) {
 		set_task_state(curtask, TASK_INTERRUPTIBLE);
-		add_wait_queue(&context_task_wq, &wait);
-		if (TQ_ACTIVE(tq_context))
+		add_wait_queue_exclusive_lifo(&context_task_wq, &wait);
+		if (spin_is_locked(&tqueue_lock) || TQ_ACTIVE(tq_context))
 			set_task_state(curtask, TASK_RUNNING);
-		schedule();
+		else
+			schedule();
 		remove_wait_queue(&context_task_wq, &wait);
 		run_task_queue(&tq_context);
+		while (TQ_ACTIVE(tq_context)) {
+			if (current->need_resched)
+				schedule();
+			run_task_queue(&tq_context);
+		}
 		wake_up(&context_task_done);
 		if (signal_pending(curtask)) {
 			while (waitpid(-1, (unsigned int *)0, __WALL|WNOHANG) > 0)
diff -urN /md0/kernels/2.4/v2.4.9-ac14/kernel/fork.c aio-v2.4.9-ac14.diff/kernel/fork.c
--- /md0/kernels/2.4/v2.4.9-ac14/kernel/fork.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/kernel/fork.c	Mon Sep 24 19:09:13 2001
@@ -46,6 +46,16 @@
 	wq_write_unlock_irqrestore(&q->lock, flags);
 }
 
+void add_wait_queue_exclusive_lifo(wait_queue_head_t *q, wait_queue_t * wait)
+{
+	unsigned long flags;
+
+	wq_write_lock_irqsave(&q->lock, flags);
+	wait->flags = WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(q, wait);
+	wq_write_unlock_irqrestore(&q->lock, flags);
+}
+
 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
 {
 	unsigned long flags;
diff -urN /md0/kernels/2.4/v2.4.9-ac14/kernel/sched.c aio-v2.4.9-ac14.diff/kernel/sched.c
--- /md0/kernels/2.4/v2.4.9-ac14/kernel/sched.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/kernel/sched.c	Mon Sep 24 19:09:13 2001
@@ -714,13 +714,13 @@
 }
 
 /*
- * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just wake everything
- * up.  If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
- * non-exclusive tasks and one exclusive task.
+ * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just
+ * wake everything up.  If it's an exclusive wakeup (nr_exclusive == small
+ * +ve number) then we wake all the non-exclusive tasks and one exclusive task.
  *
  * There are circumstances in which we can try to wake a task which has already
- * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns zero
- * in this (rare) case, and we handle it by contonuing to scan the queue.
+ * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns
+ * zero in this (rare) case, and we handle it by contonuing to scan the queue.
  */
 static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
 			 	     int nr_exclusive, const int sync)
@@ -733,14 +733,25 @@
 	
 	list_for_each(tmp,&q->task_list) {
 		unsigned int state;
-                wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+		wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+		wait_queue_func_t func;
 
 		CHECK_MAGIC(curr->__magic);
+		func = curr->func;
+		if (func) {
+			unsigned flags = curr->flags;
+			func(curr);
+			if ((flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+				break;
+			continue;
+		}
 		p = curr->task;
 		state = p->state;
 		if (state & mode) {
 			WQ_NOTE_WAKER(curr);
-			if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+			if (try_to_wake_up(p, sync) &&
+			    (curr->flags & WQ_FLAG_EXCLUSIVE) &&
+			    !--nr_exclusive)
 				break;
 		}
 	}
diff -urN /md0/kernels/2.4/v2.4.9-ac14/kernel/softirq.c aio-v2.4.9-ac14.diff/kernel/softirq.c
--- /md0/kernels/2.4/v2.4.9-ac14/kernel/softirq.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/kernel/softirq.c	Mon Sep 24 19:09:13 2001
@@ -354,6 +354,7 @@
 		data = p->data;
 		wmb();
 		p->sync = 0;
+		smp_mb();
 		if (f)
 			f(data);
 	}
Binary files /md0/kernels/2.4/v2.4.9-ac14/mm/.filemap.c.swp and aio-v2.4.9-ac14.diff/mm/.filemap.c.swp differ
diff -urN /md0/kernels/2.4/v2.4.9-ac14/mm/filemap.c aio-v2.4.9-ac14.diff/mm/filemap.c
--- /md0/kernels/2.4/v2.4.9-ac14/mm/filemap.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/mm/filemap.c	Tue Sep 25 21:59:56 2001
@@ -22,12 +22,14 @@
 #include <linux/swapctl.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/worktodo.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
 #include <asm/mman.h>
 
 #include <linux/highmem.h>
+#include <linux/iobuf.h>
 
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
@@ -2512,7 +2514,6 @@
  */
 struct page *grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 {
-	struct page *cached_page = NULL;
 	struct page *page, **hash;
 
 	hash = page_hash(mapping, index);
@@ -2799,3 +2800,713 @@
 		panic("Failed to allocate page hash table\n");
 	memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *));
 }
+
+/* address_space_map
+ *	Maps a series of pages from the page cache into the given array.
+ */
+static int address_space_map(struct address_space *as, unsigned long index,
+		int nr, struct page **pages,
+		int *nr_newp, struct page **new_pages)
+{
+	struct page *cached_page = NULL;
+	int nr_new = 0;
+	int ret;
+
+	ret = -EINVAL;
+	if (nr <= 0)
+		goto out;
+
+	ret = 0;
+
+	spin_lock(&pagecache_lock);
+
+	while (nr > 0) {
+		struct page **hash = page_hash(as, index);
+		struct page *page;
+
+		page = __find_page_nolock(as, index, *hash);
+		if (page) {
+			page_cache_get(page);
+got_page:
+			pages[ret++] = page;
+			index++;
+			nr--;
+			continue;
+		}
+
+		if (cached_page) {
+			__add_to_page_cache(cached_page, as, index, hash);
+			nr_new++;
+			*new_pages++ = page = cached_page;
+			cached_page = NULL;
+			goto got_page;
+		}
+		spin_unlock(&pagecache_lock);
+
+		cached_page = page_cache_alloc(as);
+		if (!cached_page)
+			goto out;
+
+		/* Okay, we now have an allocated page.  Retry
+		 * the search and add. */
+		spin_lock(&pagecache_lock);
+	}
+
+	spin_unlock(&pagecache_lock);
+
+out:
+	if (cached_page)
+		page_cache_free(cached_page);
+
+	*nr_newp = nr_new;
+	return ret ? ret : -ENOMEM;
+}
+
+struct iodesc {
+	struct worktodo	wtd;
+
+	struct page	*good_page;	/* the highest Uptodate page */
+	int		good_idx;
+	int		err;
+	int		did_read;
+	int		rw;
+
+	struct page	**pages;
+	struct page	**new_pages;
+	struct page	**cur_pagep;
+	int		nr_pages;
+	int		nr_new_pages;
+
+	struct address_space *as;
+	struct file	*file;
+	kvec_cb_t	cb;
+
+	size_t		size;
+	unsigned long	transferred;
+	unsigned	offset;
+	struct kveclet	*veclet;
+
+	int		sync;
+
+#define READDESC_NR_DEF	3
+	struct page *def_pages[READDESC_NR_DEF];
+	struct page *def_new_pages[READDESC_NR_DEF];
+};
+
+static void __iodesc_free(struct iodesc *io, int unlock)
+{
+	kvec_cb_t cb;
+	ssize_t res;
+
+	if (unlock) {
+		unsigned i;
+		for (i=0; i<io->nr_pages; i++) {
+			struct page *page = io->pages[i];
+			UnlockPage(page);
+			deactivate_page(page);
+			page_cache_release(page);
+		}
+	} else {
+		unsigned i;
+		for (i=0; i<io->nr_pages; i++)
+			page_cache_release(io->pages[i]);
+	}
+
+	if (io->new_pages != io->def_new_pages)
+		kfree(io->new_pages);
+	if (io->pages != io->def_pages)
+		kfree(io->pages);
+
+	cb = io->cb;
+	res = io->transferred ? io->transferred : io->err;
+	kfree(io);
+
+	cb.fn(cb.data, cb.vec, res);
+}
+
+/* By the time this function is called, all of the pages prior to
+ * the current good_idx have been released appropriately.  The remaining
+ * duties are to release any remaining pages and to honour O_SYNC.
+ */
+static void __iodesc_finish_write(struct iodesc *io)
+{
+	pr_debug("__iodesc_finish_write(%p)\n", io);
+
+	__iodesc_free(io, WRITE == io->rw);
+}
+
+/* This is mostly ripped from generic_file_write */
+static int __iodesc_write_page(struct iodesc *io, struct page *page)
+{
+	unsigned long bytes;
+	unsigned long offset, src_offset;
+	struct page *src_page;
+	long status;
+	char *kaddr;
+	int src_bytes;
+	char *src;
+	int done = 0;
+	unsigned left;
+
+	src_page = io->veclet->page;
+	src_bytes = io->veclet->length;
+	src_offset = io->veclet->offset;
+	src = kmap(src_page) + src_offset;
+
+	offset = io->offset;
+	kaddr = kmap(page);
+	kaddr += offset;
+
+	bytes = PAGE_CACHE_SIZE - offset;
+	if (io->size < bytes)
+		bytes = io->size;
+
+	pr_debug("__iodesc_write_page(%p (%lu), %lu %lu %lu)\n", page, page->index, offset, bytes, src_offset);
+
+	io->err = io->as->a_ops->prepare_write(io->file, page,
+						offset, offset + bytes);
+	if (io->err) {
+printk("prepare_write: %d\n", io->err);
+		goto unlock;
+	}
+
+	left = bytes;
+	for (;;) {
+		unsigned this = src_bytes;
+		if (left < this)
+			this = left;
+
+		memcpy(kaddr, src, this);
+		kaddr += this;
+		src += this;
+		left -= this;
+		src_bytes -= this;
+		src_offset += this;
+
+		if (left <= 0)
+			break;
+
+		if (!src_bytes) {
+			io->veclet++;
+			kunmap(src_page);
+			src_page = io->veclet->page;
+			src_bytes = io->veclet->length;
+			src_offset = io->veclet->offset;
+			src = kmap(src_page) + src_offset;
+		}
+	}
+	flush_dcache_page(page);
+	status = io->as->a_ops->commit_write(io->file, page,
+						offset, offset+bytes);
+
+	/* We don't handle short writes */
+	if (status > 0 && status != bytes)
+		done = 1;
+
+	if (!status)
+		status = bytes;
+	else
+		printk("commit_write: %ld\n", status);
+
+	if (status > 0) {
+		io->transferred += status;
+		io->size -= status;
+		io->offset = (offset + status) & (PAGE_CACHE_SIZE - 1);
+
+		if (io->offset)
+			done = 1;
+
+		src_offset += status;
+		src_offset &= PAGE_CACHE_SIZE - 1;
+	} else {
+		io->err = status;
+		done = 1;
+	}
+
+unlock:
+	kunmap(page);
+	kunmap(src_page);
+
+	//UnlockPage(page);
+	//deactivate_page(page);
+	//page_cache_release(page);
+
+	return done;
+}
+
+void __iodesc_sync_wait_page(void *data)
+{
+	struct iodesc *io = data;
+
+	do {
+		struct buffer_head *bh, *head = io->pages[io->good_idx]->buffers;
+
+		if (!head)
+			continue;
+
+		bh = head;
+		do {
+			if (buffer_locked(bh)) {
+				pr_debug("waiting on bh=%pi io=%p\n", bh, io);
+				wtd_wait_on_buffer(&io->wtd, bh);
+				return;
+			}
+			if (buffer_req(bh) && !buffer_uptodate(bh)) {
+				pr_debug("io err bh=%p (%p)\n", bh, io);
+				io->err = -EIO;
+				break;
+			}
+		} while ((bh = bh->b_this_page) != head);
+	} while (!io->err && ++io->good_idx < io->nr_pages) ;
+
+	pr_debug("finish_write(%p)\n", io);
+	__iodesc_finish_write(io);
+}
+
+static void __iodesc_do_write(void *data)
+{
+	struct iodesc *io = data;
+	unsigned i;
+
+	up(&io->file->f_dentry->d_inode->i_sem);
+
+	for (i=0; i<io->nr_pages; i++)
+		if (__iodesc_write_page(io, io->pages[i]))
+			break;
+
+	if (io->sync) {
+		io->good_idx = 0;
+
+		pr_debug("writing out pages(%p)\n", io);
+		for (i=0; i<io->nr_pages; i++) {
+			if (io->pages[i]->buffers)
+				writeout_one_page(io->pages[i]);
+		}
+
+		pr_debug("calling __iodesc_sync_wait_page(%p)\n", io);
+		wtd_set_action(&io->wtd, __iodesc_sync_wait_page, io);
+		__iodesc_sync_wait_page(io);
+		return;
+	}
+
+	__iodesc_finish_write(io);
+}
+
+static void __iodesc_write_lock_next_page(void *data)
+{
+	struct iodesc *io = data;
+	pr_debug("__iodesc_write_next_page(%p)\n", io);
+
+	while (io->good_idx < io->nr_pages) {
+		io->good_page = io->pages[io->good_idx++];
+		if (io->good_page == *io->cur_pagep)
+			io->cur_pagep++;
+		else {
+			wtd_lock_page(&io->wtd, io->good_page);
+			return;
+		}
+	}
+
+	//Is this faster? __iodesc_do_write(io);
+	wtd_set_action(&io->wtd, __iodesc_do_write, io);
+	wtd_queue(&io->wtd);
+}
+
+static void __generic_file_write_iodesc(struct iodesc *io)
+{
+	struct inode *inode = io->file->f_dentry->d_inode;
+	time_t now = CURRENT_TIME;
+
+	remove_suid(inode);
+	if (inode->i_ctime != now || inode->i_mtime != now) {
+		inode->i_ctime = inode->i_mtime = now;
+		mark_inode_dirty_sync(inode);
+	}
+
+	wtd_set_action(&io->wtd, __iodesc_write_lock_next_page, io);
+	io->sync = !!(io->file->f_flags & O_SYNC);
+	io->good_idx = 0;
+	io->cur_pagep = io->new_pages;
+	__iodesc_write_lock_next_page(io);
+}
+
+static void __iodesc_read_finish(struct iodesc *io)
+{
+	struct page **src_pagep;
+	char *dst_addr, *src_addr;
+	int src_off;
+	size_t size;
+	size_t valid;
+
+	struct kveclet *veclet = io->veclet;
+	struct page *dst_page = veclet->page;
+	int dst_len = veclet->length;
+	int dst_off = veclet->offset;
+
+
+	pr_debug("__iodesc_read_finish: good_idx = %d\n", io->good_idx);
+	if (io->good_idx <= 0)
+		goto no_data;
+
+	size = io->size;
+	src_off = io->offset;
+	src_pagep = io->pages;
+	src_addr = kmap(*src_pagep);
+
+	valid = (size_t)io->good_idx << PAGE_CACHE_SHIFT;
+	valid -= src_off;
+	pr_debug("size=%d valid=%d src_off=%d\n", size, valid, src_off);
+
+	if (valid < size)
+		size = valid;
+
+	dst_addr = kmap(veclet->page);
+
+	while (size > 0) {
+		int this = PAGE_CACHE_SIZE - src_off;
+		if ((PAGE_SIZE - dst_off) < this)
+			this = PAGE_SIZE - dst_off;
+		if (size < this)
+			this = size;
+		pr_debug("this=%d src_off=%d dst_off=%d dst_len=%d\n",
+			this, src_off, dst_off, dst_len);
+		memcpy(dst_addr + dst_off, src_addr + src_off, this);
+
+		src_off += this;
+		dst_off += this;
+		dst_len -= this;
+		size -= this;
+		io->transferred += this;
+		pr_debug("read_finish: this=%d transferred=%d\n",
+			 this, io->transferred);
+
+		if (size <= 0)
+			break;
+
+		if (dst_len <= 0) {
+			kunmap(dst_page);
+			veclet++;
+			dst_page = veclet->page;
+			dst_off = veclet->offset;
+			dst_len = veclet->length;
+			dst_addr = kmap(dst_page);
+		}
+
+		if (src_off >= PAGE_SIZE) { /* FIXME: PAGE_CACHE_SIZE */
+			kunmap(*src_pagep);
+			pr_debug("page(%lu)->count = %d\n",
+				 (*src_pagep)->index,
+				 atomic_read(&(*src_pagep)->count));
+			src_pagep++;
+			src_addr = kmap(*src_pagep);
+			src_off = 0;
+		}
+	}
+	kunmap(dst_page);
+	kunmap(*src_pagep);
+no_data:
+	__iodesc_free(io, 0);
+}
+
+static void __iodesc_make_uptodate(void *data)
+{
+	struct iodesc *io = data;
+	struct page *page = io->good_page;
+	int locked = 1;
+
+	pr_debug("__iodesc_make_uptodate: io=%p index=%lu\n", io, page->index);
+	while (Page_Uptodate(page)) {
+again:
+		pr_debug("page index %lu uptodate\n", page->index);
+		if (locked) {
+			UnlockPage(page);
+			locked = 0;
+		}
+		io->did_read = 0;
+		io->good_idx++;
+		if (io->good_idx >= io->nr_pages) {
+			__iodesc_read_finish(io);
+			return;
+		}
+		page = io->good_page = io->pages[io->good_idx];
+		pr_debug("__iodesc_make_uptodate: index=%lu\n", page->index);
+	}
+
+	if (!locked) {
+		wtd_lock_page(&io->wtd, page);
+		return;
+	}
+
+	if (!io->did_read) {
+		/* We haven't tried reading this page before, give it a go. */
+		printk("attempting to read %lu\n", page->index);
+		io->did_read = 1;
+		io->err = page->mapping->a_ops->readpage(io->file, page);
+		if (!io->err) {
+			if (Page_Uptodate(page))
+				goto again;
+			wtd_lock_page(&io->wtd, page);
+			return;
+		}
+	}
+
+	if (locked)
+		UnlockPage(page);
+
+	/* We've already read this page before.  Set err to EIO and quite */
+	if (!io->err)
+		io->err = -EIO;
+	__iodesc_read_finish(io);
+}
+
+static void __wtdgeneric_file_read_iodesc(void *data);
+
+static void __generic_file_read_iodesc(struct iodesc *io, int mayblock)
+{
+	int (*readpage)(struct file *, struct page *);
+	int i;
+
+	wtd_set_action(&io->wtd, __iodesc_make_uptodate, io);
+	readpage = io->as->a_ops->readpage;
+	for (i=0; i<io->nr_new_pages; i++) {
+		int ret;
+		if (!mayblock) {
+			static int zoo; if (zoo++ < 5) printk("read sleep\n");
+			wtd_set_action(&io->wtd, __wtdgeneric_file_read_iodesc, io);
+			wtd_queue(&io->wtd);
+		}
+		ret = readpage(io->file, io->new_pages[i]);
+		if (ret)
+			printk(KERN_DEBUG "__generic_file_read_kiovec: readpage(%lu) = %d\n", io->new_pages[i]->index, ret);
+	}
+
+	for (i=0; i<io->nr_pages; i++) {
+		struct page *page = io->pages[i];
+		if (Page_Uptodate(page)) {
+			pr_debug("__generic_file_read_iodesc: %lu is uptodate\n", page->index);
+			continue;
+		}
+
+		if (!mayblock) {
+			static int zoo; if (zoo++ < 5) printk("read sleep\n");
+			wtd_set_action(&io->wtd, __wtdgeneric_file_read_iodesc, io);
+			wtd_queue(&io->wtd);
+		}
+		if (!TryLockPage(page)) {
+			int ret = readpage(io->file, page);
+			if (ret)
+				printk(KERN_DEBUG "__generic_file_read_iodesc: readpage(%lu): %d\n", page->index, ret);
+		}
+
+		if (!Page_Uptodate(page) && io->good_idx == -1) {
+			pr_debug("first good_idx=%d (%lu)\n", i, page->index);
+			io->good_idx = i;
+			io->good_page = page;
+		}
+	}
+
+	/* Whee, all the pages are uptodate! */
+	if (!io->good_page) {
+		static int zoo; if (!mayblock && zoo++ < 5) printk("all uptodate\n");
+		pr_debug("all pages uptodate!\n");
+		io->good_idx = io->nr_pages;
+		__iodesc_read_finish(io);
+		return;
+	}
+
+	pr_debug("locking good_page\n");
+	wtd_lock_page(&io->wtd, io->good_page);
+	return;
+}
+
+static void __wtdgeneric_file_read_iodesc(void *data)
+{
+	struct iodesc *io = data;
+	__generic_file_read_iodesc(io, 1);
+}
+
+static int generic_file_rw_kvec(struct file *file, int rw, kvec_cb_t cb,
+			 size_t size, loff_t pos);
+
+int generic_file_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
+{
+	return generic_file_rw_kvec(file, READ, cb, size, pos);
+}
+
+int generic_file_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
+{
+	return generic_file_rw_kvec(file, WRITE, cb, size, pos);
+}
+
+int generic_file_rw_kvec(struct file *file, int rw, kvec_cb_t cb,
+			 size_t size, loff_t pos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct address_space *as = inode->i_mapping;
+	unsigned long index;
+	unsigned long eindex;
+	unsigned long nr_pages;
+	struct iodesc *io = NULL;
+	int ret;
+
+	ret = -EINVAL;
+	if (rw != READ && rw != WRITE)
+		goto out;
+
+	ret = -ENOMEM;
+	io = kmalloc(sizeof(*io), GFP_KERNEL);
+	if (!io)
+		goto out;
+
+	memset(io, 0, sizeof(*io));
+	io->size = size;
+
+	if (READ == rw) {
+		pr_debug("pos=%Ld i_size=%Ld\n", pos, inode->i_size);
+
+		if (pos > inode->i_size)
+			size = 0;
+		else if ((pos + size) > inode->i_size)
+			size = inode->i_size - pos;
+
+		if (io->size < size)
+			size = io->size;
+		else if (size < io->size)
+			io->size = size;
+
+		pr_debug("io->size=%d size=%d\n", io->size, size);
+	}
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	eindex = (pos + size - 1) >> PAGE_CACHE_SHIFT;
+	nr_pages = eindex - index + 1;
+
+	pr_debug("nr_pages: %lu\n", nr_pages);
+
+	io->good_idx = -1;
+	io->good_page = NULL;
+	io->did_read = 0;
+	io->err = 0;
+	io->rw = rw;
+	io->as = as;
+	io->offset = (unsigned long)pos & (PAGE_CACHE_SIZE - 1);
+	io->file = file;
+	io->cb = cb;
+	io->veclet = cb.vec->veclet;
+	if (nr_pages < READDESC_NR_DEF) {
+		io->pages = io->def_pages;
+		io->new_pages = io->def_new_pages;
+	} else {
+		io->pages = kmalloc(sizeof(*io->pages) * (nr_pages + 1), GFP_KERNEL);
+		if (!io->pages)
+			goto out_io;
+
+		io->new_pages = kmalloc(sizeof(*io->new_pages) * (nr_pages + 1), GFP_KERNEL);
+		if (!io->new_pages)
+			goto out_pages;
+	}
+
+	/* FIXME: make the down a WTD_op */
+	if (rw == WRITE)
+		down(&io->file->f_dentry->d_inode->i_sem);
+
+	ret = address_space_map(as, index, nr_pages, io->pages,
+			&io->nr_new_pages, io->new_pages);
+	pr_debug("as_map: %d (%d new)\n", ret, io->nr_new_pages);
+	if (ret <= 0)
+		goto out_new_pages;
+
+	io->nr_pages = ret;
+	io->pages[io->nr_pages] = NULL;
+	io->new_pages[io->nr_new_pages] = NULL;
+
+	if (rw == READ)
+		__generic_file_read_iodesc(io, 0);
+	else if (rw == WRITE)
+		__generic_file_write_iodesc(io);
+
+	return 0;
+
+out_new_pages:
+	if (io->new_pages != io->def_new_pages)
+		kfree(io->new_pages);
+out_pages:
+	if (io->pages != io->def_pages)
+		kfree(io->pages);
+out_io:
+	kfree(io);
+out:
+	return ret;
+}
+
+static void __wtd_lock_page_waiter(wait_queue_t *wait)
+{
+	struct worktodo *wtd = (struct worktodo *)wait;
+	struct page *page = (struct page *)wtd->data;
+
+	if (!TryLockPage(page)) {
+		__remove_wait_queue(&page->wait, &wtd->wait);
+		wtd_queue(wtd);
+	} else {
+		schedule_task(&run_disk_tq);
+	}
+}
+
+void wtd_lock_page(struct worktodo *wtd, struct page *page)
+{
+	if (TryLockPage(page)) {
+		int raced = 0;
+		wtd->data = page;
+		init_waitqueue_func_entry(&wtd->wait, __wtd_lock_page_waiter);
+		add_wait_queue_cond(&page->wait, &wtd->wait, TryLockPage(page), raced = 1);
+
+		if (!raced) {
+			run_task_queue(&tq_disk);
+			return;
+		}
+	}
+
+	wtd->tq.routine(wtd->tq.data);
+}
+
+static void __wtd_bh_waiter(wait_queue_t *wait)
+{
+	struct worktodo *wtd = (struct worktodo *)wait;
+	struct buffer_head *bh = (struct buffer_head *)wtd->data;
+
+	if (!buffer_locked(bh)) {
+		__remove_wait_queue(&bh->b_wait, &wtd->wait);
+		wtd_queue(wtd);
+	} else {
+		schedule_task(&run_disk_tq);
+	}
+}
+
+void wtd_wait_on_buffer(struct worktodo *wtd, struct buffer_head *bh)
+{
+	int raced = 0;
+
+	if (!buffer_locked(bh)) {
+		wtd->tq.routine(wtd->tq.data);
+		return;
+	}
+	wtd->data = bh;
+	init_waitqueue_func_entry(&wtd->wait, __wtd_bh_waiter);
+	add_wait_queue_cond(&bh->b_wait, &wtd->wait, buffer_locked(bh), raced = 1);
+
+	if (raced)
+		wtd->tq.routine(wtd->tq.data);
+	else
+		run_task_queue(&tq_disk);
+}
+
+void do_run_tq_disk(void *data)
+{
+	run_task_queue(&tq_disk);
+}
+
+struct tq_struct run_disk_tq = {
+	routine: do_run_tq_disk,
+	data: NULL
+};
+
diff -urN /md0/kernels/2.4/v2.4.9-ac14/mm/memory.c aio-v2.4.9-ac14.diff/mm/memory.c
--- /md0/kernels/2.4/v2.4.9-ac14/mm/memory.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/mm/memory.c	Mon Sep 24 21:36:43 2001
@@ -44,6 +44,7 @@
 #include <linux/iobuf.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -1469,3 +1470,135 @@
 	} while (addr < end);
 	return 0;
 }
+
+/*
+ * Force in an entire range of pages from the current process's user VA,
+ * and pin them in physical memory.  
+ */
+
+#define dprintk(x...)
+struct kvec *map_user_kvec(int rw, unsigned long ptr, size_t len)
+{
+	struct kvec		*vec;
+	struct kveclet		*veclet;
+	unsigned long		end;
+	int			err;
+	struct mm_struct *	mm;
+	struct vm_area_struct *	vma = 0;
+	int			i;
+	int			datain = (rw == READ);
+	unsigned		nr_pages;
+
+	end = ptr + len;
+	if (end < ptr) {
+		printk(KERN_DEBUG "map_user_kvec: end < ptr\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	nr_pages = (ptr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	nr_pages -= ptr >> PAGE_SHIFT;
+	vec = kmalloc(sizeof(struct kvec) + nr_pages * sizeof(struct kveclet),
+			GFP_KERNEL);
+	if (!vec)
+		return ERR_PTR(-ENOMEM);
+	vec->nr = 0;
+	vec->max_nr = nr_pages;
+	veclet = vec->veclet;
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	mm = current->mm;
+	dprintk ("map_user_kiobuf: begin\n");
+	
+	down_read(&mm->mmap_sem);
+
+	err = -EFAULT;
+	
+	i = 0;
+
+	/* 
+	 * First of all, try to fault in all of the necessary pages
+	 */
+	while (ptr < end) {
+		struct page *map;
+		veclet->offset = ptr & ~PAGE_MASK;
+		veclet->length = PAGE_SIZE - veclet->offset;
+		if (len < veclet->length)
+			veclet->length = len;
+		ptr &= PAGE_MASK;
+
+		if (!vma || ptr >= vma->vm_end) {
+			vma = find_vma(current->mm, ptr);
+			if (!vma) 
+				goto out_unlock;
+			if (vma->vm_start > ptr) {
+				if (!(vma->vm_flags & VM_GROWSDOWN))
+					goto out_unlock;
+				if (expand_stack(vma, ptr))
+					goto out_unlock;
+			}
+			if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
+					(!(vma->vm_flags & VM_READ))) {
+				err = -EACCES;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&mm->page_table_lock);
+		while (!(map = follow_page(ptr, datain))) {
+			int ret;
+
+			spin_unlock(&mm->page_table_lock);
+			ret = handle_mm_fault(current->mm, vma, ptr, datain);
+			if (ret <= 0) {
+				if (!ret)
+					goto out_unlock;
+				else {
+					err = -ENOMEM;
+					goto out_unlock;
+				}
+			}
+			spin_lock(&mm->page_table_lock);
+		}			
+		map = get_page_map(map);
+		if (map) {
+			flush_dcache_page(map);
+			atomic_inc(&map->count);
+		} else
+			printk (KERN_INFO "Mapped page missing [%d]\n", i);
+		spin_unlock(&mm->page_table_lock);
+		veclet->page = map;
+		veclet++;
+
+		ptr += PAGE_SIZE;
+		len -= PAGE_SIZE;
+		vec->nr = ++i;
+	}
+
+	up_read(&mm->mmap_sem);
+	dprintk ("map_user_kiobuf: end OK\n");
+	return vec;
+
+ out_unlock:
+	up_read(&mm->mmap_sem);
+	unmap_kvec(vec);
+	printk(KERN_DEBUG "map_user_kvec: err(%d)\n", err);
+	kfree(vec);
+	return ERR_PTR(err);
+}
+
+/*
+ * Unmap all of the pages referenced by a kiobuf.  We release the pages,
+ * and unlock them if they were locked. 
+ */
+
+void unmap_kvec (struct kvec *vec) 
+{
+	struct kveclet *veclet, *end = vec->veclet + vec->nr;
+
+	for (veclet=vec->veclet; veclet<end; veclet++) {
+		struct page *map = veclet->page;
+		if (map)
+			__free_page(map);
+	}
+
+	vec->nr = 0;
+}
diff -urN /md0/kernels/2.4/v2.4.9-ac14/net/ipv4/af_inet.c aio-v2.4.9-ac14.diff/net/ipv4/af_inet.c
--- /md0/kernels/2.4/v2.4.9-ac14/net/ipv4/af_inet.c	Mon Aug 13 15:12:09 2001
+++ aio-v2.4.9-ac14.diff/net/ipv4/af_inet.c	Mon Sep 24 19:09:13 2001
@@ -732,6 +732,13 @@
 }
 
 
+int inet_begin_read(struct socket *sock, struct kioctx *ctx,
+		   struct iocb iocb, struct iocb *iocbptr)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->prot->begin_read(sk, ctx, iocb, iocbptr);
+}
 
 int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
 		 int flags, struct scm_cookie *scm)
@@ -973,6 +980,7 @@
 	recvmsg:	inet_recvmsg,
 	mmap:		sock_no_mmap,
 	sendpage:	sock_no_sendpage,
+	begin_read:	inet_begin_read,
 };
 
 struct net_proto_family inet_family_ops = {
diff -urN /md0/kernels/2.4/v2.4.9-ac14/net/ipv4/udp.c aio-v2.4.9-ac14.diff/net/ipv4/udp.c
--- /md0/kernels/2.4/v2.4.9-ac14/net/ipv4/udp.c	Mon Sep 24 02:14:16 2001
+++ aio-v2.4.9-ac14.diff/net/ipv4/udp.c	Mon Sep 24 19:09:13 2001
@@ -619,6 +619,15 @@
 		__udp_checksum_complete(skb);
 }
 
+
+static int udp_begin_read(struct sock *sk, struct kioctx *ctx,
+			 struct iocb iocb, struct iocb *iocbptr)
+{
+	struct sk_buff *skb;
+	printk("udp_begin_read\n");
+}
+
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
@@ -1016,6 +1025,7 @@
 	getsockopt:	ip_getsockopt,
 	sendmsg:	udp_sendmsg,
 	recvmsg:	udp_recvmsg,
+	begin_read:	udp_begin_read,
 	backlog_rcv:	udp_queue_rcv_skb,
 	hash:		udp_v4_hash,
 	unhash:		udp_v4_unhash,
diff -urN /md0/kernels/2.4/v2.4.9-ac14/net/socket.c aio-v2.4.9-ac14.diff/net/socket.c
--- /md0/kernels/2.4/v2.4.9-ac14/net/socket.c	Mon Sep 24 02:14:17 2001
+++ aio-v2.4.9-ac14.diff/net/socket.c	Mon Sep 24 19:09:13 2001
@@ -107,6 +107,8 @@
 			  unsigned long count, loff_t *ppos);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
+static int sock_begin_read(struct file *file, struct kioctx *ctx,
+			  struct iocb iocb, struct iocb *iocbptr);
 
 
 /*
@@ -126,6 +128,7 @@
 	fasync:		sock_fasync,
 	readv:		sock_readv,
 	writev:		sock_writev,
+	//begin_read:	sock_begin_read,
 	sendpage:	sock_sendpage
 };
 
@@ -623,6 +626,16 @@
 	return sock->ops->sendpage(sock, page, offset, size, flags);
 }
 
+static int sock_begin_read(struct file *file, struct kioctx *ctx,
+			  struct iocb iocb, struct iocb *iocbptr)
+{
+	struct socket *sock;
+	sock = socki_lookup(file->f_dentry->d_inode);
+	if (sock->ops->begin_read)
+		sock->ops->begin_read(sock, ctx, iocb, iocbptr);
+	return -EINVAL;
+}
+
 int sock_readv_writev(int type, struct inode * inode, struct file * file,
 		      const struct iovec * iov, long count, long size)
 {