diff -urNp x-ref/drivers/char/raw.c xx/drivers/char/raw.c --- x-ref/drivers/char/raw.c 2003-02-14 07:28:49.000000000 +0100 +++ xx/drivers/char/raw.c 2003-02-14 07:29:00.000000000 +0100 @@ -40,12 +40,16 @@ int raw_ctl_ioctl(struct inode *, struct int raw_ioctl(struct inode *, struct file *, unsigned int, unsigned long); int raw_kvec_read(struct file *filp, kvec_cb_t cb, size_t size, loff_t pos); int raw_kvec_write(struct file *filp, kvec_cb_t cb, size_t size, loff_t pos); +ssize_t raw_readv(struct file *, const struct iovec *, unsigned long, loff_t *); +ssize_t raw_writev(struct file *, const struct iovec *, unsigned long, loff_t *); static struct file_operations raw_fops = { read: raw_read, write: raw_write, open: raw_open, release: raw_release, + readv: raw_readv, + writev: raw_writev, ioctl: raw_ioctl, aio_read: generic_file_aio_read, aio_write: generic_file_aio_write, @@ -58,6 +62,8 @@ static struct file_operations raw_ctl_fo open: raw_open, }; +extern int map_user_kiobuf_iovecs(int, struct kiobuf *, const struct iovec *, int); + static int __init raw_init(void) { int i; @@ -490,3 +496,106 @@ out: return err; } +/* + * Process the readv/writev request by coalescing the iovecs into a + * single kiobuf. Some restrictions apply: iov_base and iov_len must + * be aligned to or a multiple of pagesize, and the total io size must be + * small enough (i.e., less than max_sectors) to go to brw_kiovec in one + * shot. If these restrictions are not met, a return of -ENOSYS tells + * the caller to use the default readv/writev method for this request. + */ +ssize_t rwvec_raw_dev(int rw, struct file * filp, const struct iovec *iov, + ulong iov_count, loff_t *offp) +{ + kdev_t dev; + struct kiobuf *iobuf; + ulong blocknr, blocks, limit; + int i, minor, err; + int sector_size, sector_bits, sector_mask, max_sectors; + ssize_t tot_len; + + for (i = 0, tot_len = 0; i < iov_count; i++) { + ssize_t tmp = tot_len; + ssize_t len = (ssize_t) iov[i].iov_len; + ulong base = (ulong) iov[i].iov_base; + if ((len & ~PAGE_MASK) || (base & ~PAGE_MASK)) + return -ENOSYS; + if (len < 0) /* size_t not fitting a ssize_t */ + return -EINVAL; + tot_len += len; + if (tot_len < tmp) /* math overflow on the ssize_t */ + return -EINVAL; + } + + minor = MINOR(filp->f_dentry->d_inode->i_rdev); + + err = alloc_kiovec(1, &iobuf); + if (err) + return err; + iobuf->varyio = raw_devices[minor].varyio; + + dev = to_kdev_t(raw_devices[minor].binding->bd_dev); + sector_size = raw_devices[minor].sector_size; + sector_bits = raw_devices[minor].sector_bits; + sector_mask = sector_size- 1; + max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); + + if (blk_size[MAJOR(dev)]) + limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; + else + limit = INT_MAX; + + dprintk("rwvec_raw_dev: dev %d:%d (+%d)\n", + MAJOR(dev), MINOR(dev), limit); + + err = -EINVAL; + if ((*offp & sector_mask) || (tot_len & sector_mask)) + goto out; + err = 0; + if (tot_len) + err = -ENXIO; + if ((*offp >> sector_bits) >= limit) + goto out; + + blocknr = *offp >> sector_bits; + blocks = tot_len >> sector_bits; + + if (!blocks) + goto out; + + err = -ENOSYS; + if ((blocks > max_sectors) || (blocks > limit - blocknr)) + goto out; + + err = map_user_kiobuf_iovecs(rw, iobuf, iov, iov_count); + if (err) + goto out; + + for (i=0; i < blocks; i++) + iobuf->blocks[i] = blocknr++; + + err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + + if (rw == READ && err > 0) + mark_dirty_kiobuf(iobuf, err); + + unmap_kiobuf(iobuf); + + if (err > 0) + *offp += err; +out: + free_kiovec(1, &iobuf); + return err; +} + +ssize_t raw_readv(struct file *filp, const struct iovec *iov, + unsigned long nr, loff_t *offp) +{ + return rwvec_raw_dev(READ, filp, iov, nr, offp); +} + +ssize_t raw_writev(struct file *filp, const struct iovec *iov, + unsigned long nr, loff_t *offp) +{ + return rwvec_raw_dev(WRITE, filp, iov, nr, offp); +} diff -urNp x-ref/fs/read_write.c xx/fs/read_write.c --- x-ref/fs/read_write.c 2003-02-14 07:28:45.000000000 +0100 +++ xx/fs/read_write.c 2003-02-14 07:28:50.000000000 +0100 @@ -210,7 +210,6 @@ asmlinkage ssize_t sys_write(unsigned in return ret; } - static ssize_t do_readv_writev(int type, struct file *file, const struct iovec * vector, unsigned long count) @@ -285,7 +284,8 @@ static ssize_t do_readv_writev(int type, fnv = (type == VERIFY_WRITE ? file->f_op->readv : file->f_op->writev); if (fnv) { ret = fnv(file, iov, count, &file->f_pos); - goto out; + if (ret != -ENOSYS) + goto out; } /* VERIFY_WRITE actually means a read, as we write to user space */ diff -urNp x-ref/mm/memory.c xx/mm/memory.c --- x-ref/mm/memory.c 2003-02-14 07:28:45.000000000 +0100 +++ xx/mm/memory.c 2003-02-14 07:28:50.000000000 +0100 @@ -1890,3 +1890,73 @@ int copy_user_to_kvec(struct kvec *to, s return ret; } + +/* + * Map the iovecs into a kiobuf. + * Note iov_base and iov_len must be aligned to or a multiple of page size. + */ +int map_user_kiobuf_iovecs(int rw, struct kiobuf *iobuf, const struct iovec *iov, int iov_count) +{ + size_t len; + unsigned long va; + struct mm_struct *mm; + int i, err, iovpages, pgcount; + + /* Make sure the iobuf is not already mapped somewhere. */ + if (iobuf->nr_pages) + return -EINVAL; + + mm = current->mm; + dprintk ("map_user_kiobuf_iovecs: begin\n"); + + for (i=0, pgcount=0; ilocked = 0; + iobuf->offset = 0; + iobuf->length = 0; + + for (i=0, pgcount=0; ilength += len; + + iovpages = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; + + /* Try to fault in all of the necessary pages */ + down_read(&mm->mmap_sem); + /* rw==READ means read from disk, write into memory area */ + err = get_user_pages(current, mm, va, iovpages, + (rw==READ), 0, &iobuf->maplist[pgcount], NULL); + up_read(&mm->mmap_sem); + + /* return on error or in the case of a partially-mapped iovec */ + if ((err < 0) || (err < iovpages)) { + iobuf->nr_pages = pgcount + (err > 0 ? err : 0); + unmap_kiobuf(iobuf); + dprintk ("map_user_kiobuf_iovecs: end %d\n", err); + return err < 0 ? err : -ENOMEM; + } + pgcount += iovpages; + } + + iobuf->nr_pages = pgcount; + while (pgcount--) { + /* FIXME: flush superflous for rw==READ, + * probably wrong function for rw==WRITE + */ + flush_dcache_page(iobuf->maplist[pgcount]); + } + dprintk ("map_user_kiobuf_iovecs: end OK\n"); + return 0; +}