diff -urN rawioref/drivers/char/mem.c rawio-6/drivers/char/mem.c --- rawioref/drivers/char/mem.c Thu Apr 26 02:04:21 2001 +++ rawio-6/drivers/char/mem.c Thu Apr 26 04:35:52 2001 @@ -616,7 +616,6 @@ printk("unable to get major %d for memory devs\n", MEM_MAJOR); memory_devfs_register(); rand_initialize(); - raw_init(); #ifdef CONFIG_I2C i2c_init_all(); #endif diff -urN rawioref/drivers/char/raw.c rawio-6/drivers/char/raw.c --- rawioref/drivers/char/raw.c Thu Nov 16 15:37:28 2000 +++ rawio-6/drivers/char/raw.c Thu Apr 26 04:35:52 2001 @@ -19,10 +19,15 @@ #define dprintk(x...) -static struct block_device *raw_device_bindings[256]; -static int raw_device_inuse[256]; -static int raw_device_sector_size[256]; -static int raw_device_sector_bits[256]; +typedef struct raw_device_data_s { + struct kiobuf * iobuf; + long iobuf_lock; + struct block_device *binding; + int inuse, sector_size, sector_bits; + struct semaphore mutex; +} raw_device_data_t; + +static raw_device_data_t raw_devices[256]; static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *); @@ -45,11 +50,19 @@ open: raw_open, }; -void __init raw_init(void) +static int __init raw_init(void) { + int i; register_chrdev(RAW_MAJOR, "raw", &raw_fops); + + for (i = 0; i < 256; i++) + init_MUTEX(&raw_devices[i].mutex); + + return 0; } +__initcall(raw_init); + /* * Open/close code for raw IO. */ @@ -74,28 +87,43 @@ return 0; } + down(&raw_devices[minor].mutex); /* * No, it is a normal raw device. All we need to do on open is * to check that the device is bound, and force the underlying * block device to a sector-size blocksize. */ - bdev = raw_device_bindings[minor]; + bdev = raw_devices[minor].binding; + err = -ENODEV; if (!bdev) - return -ENODEV; + goto out; rdev = to_kdev_t(bdev->bd_dev); err = blkdev_get(bdev, filp->f_mode, 0, BDEV_RAW); if (err) - return err; + goto out; /* * Don't change the blocksize if we already have users using * this device */ - if (raw_device_inuse[minor]++) - return 0; + if (raw_devices[minor].inuse++) + goto out; + + /* + * We'll just use one kiobuf + */ + + err = alloc_kiovec(1, &raw_devices[minor].iobuf); + if (err) { + raw_devices[minor].inuse--; + up(&raw_devices[minor].mutex); + blkdev_put(bdev, BDEV_RAW); + return err; + } + /* * Don't interfere with mounted devices: we cannot safely set @@ -112,13 +140,16 @@ } set_blocksize(rdev, sector_size); - raw_device_sector_size[minor] = sector_size; + raw_devices[minor].sector_size = sector_size; for (sector_bits = 0; !(sector_size & 1); ) sector_size>>=1, sector_bits++; - raw_device_sector_bits[minor] = sector_bits; + raw_devices[minor].sector_bits = sector_bits; + + out: + up(&raw_devices[minor].mutex); - return 0; + return err; } int raw_release(struct inode *inode, struct file *filp) @@ -127,11 +158,12 @@ struct block_device *bdev; minor = MINOR(inode->i_rdev); - lock_kernel(); - bdev = raw_device_bindings[minor]; + down(&raw_devices[minor].mutex); + bdev = raw_devices[minor].binding; + if (!--raw_devices[minor].inuse) + free_kiovec(1, &raw_devices[minor].iobuf); + up(&raw_devices[minor].mutex); blkdev_put(bdev, BDEV_RAW); - raw_device_inuse[minor]--; - unlock_kernel(); return 0; } @@ -184,26 +216,30 @@ * major/minor numbers make sense. */ - if (rq.block_major == NODEV || + if ((rq.block_major == NODEV && + rq.block_minor != NODEV) || rq.block_major > MAX_BLKDEV || rq.block_minor > MINORMASK) { err = -EINVAL; break; } - if (raw_device_inuse[minor]) { + down(&raw_devices[minor].mutex); + if (raw_devices[minor].inuse) { + up(&raw_devices[minor].mutex); err = -EBUSY; break; } - if (raw_device_bindings[minor]) - bdput(raw_device_bindings[minor]); - raw_device_bindings[minor] = + if (raw_devices[minor].binding) + bdput(raw_devices[minor].binding); + raw_devices[minor].binding = bdget(kdev_t_to_nr(MKDEV(rq.block_major, rq.block_minor))); + up(&raw_devices[minor].mutex); } else { struct block_device *bdev; kdev_t dev; - bdev = raw_device_bindings[minor]; + bdev = raw_devices[minor].binding; if (bdev) { dev = to_kdev_t(bdev->bd_dev); rq.block_major = MAJOR(dev); @@ -244,9 +280,9 @@ size_t size, loff_t *offp) { struct kiobuf * iobuf; - int err; + int new_iobuf; + int err = 0; unsigned long blocknr, blocks; - unsigned long b[KIO_MAX_SECTORS]; size_t transferred; int iosize; int i; @@ -262,9 +298,23 @@ */ minor = MINOR(filp->f_dentry->d_inode->i_rdev); - dev = to_kdev_t(raw_device_bindings[minor]->bd_dev); - sector_size = raw_device_sector_size[minor]; - sector_bits = raw_device_sector_bits[minor]; + + new_iobuf = 0; + iobuf = raw_devices[minor].iobuf; + if (test_and_set_bit(0, &raw_devices[minor].iobuf_lock)) { + /* + * A parallel read/write is using the preallocated iobuf + * so just run slow and allocate a new one. + */ + err = alloc_kiovec(1, &iobuf); + if (err) + goto out; + new_iobuf = 1; + } + + dev = to_kdev_t(raw_devices[minor].binding->bd_dev); + sector_size = raw_devices[minor].sector_size; + sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); @@ -275,18 +325,14 @@ dprintk ("rw_raw_dev: dev %d:%d (+%d)\n", MAJOR(dev), MINOR(dev), limit); + err = -EINVAL; if ((*offp & sector_mask) || (size & sector_mask)) - return -EINVAL; - if ((*offp >> sector_bits) > limit) - return 0; - - /* - * We'll just use one kiobuf - */ - - err = alloc_kiovec(1, &iobuf); - if (err) - return err; + goto out_free; + err = 0; + if (size) + err = -ENXIO; + if ((*offp >> sector_bits) >= limit) + goto out_free; /* * Split the IO into KIO_MAX_SECTORS chunks, mapping and @@ -310,35 +356,37 @@ err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); if (err) break; -#if 0 - err = lock_kiovec(1, &iobuf, 1); - if (err) - break; -#endif - + for (i=0; i < blocks; i++) - b[i] = blocknr++; + iobuf->blocks[i] = blocknr++; - err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + if (rw == READ && err > 0) + mark_dirty_kiobuf(iobuf, err); + if (err >= 0) { transferred += err; size -= err; buf += err; } - unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */ + unmap_kiobuf(iobuf); if (err != iosize) break; } - free_kiovec(1, &iobuf); - if (transferred) { *offp += transferred; - return transferred; + err = transferred; } - + + out_free: + if (!new_iobuf) + clear_bit(0, &raw_devices[minor].iobuf_lock); + else + free_kiovec(1, &iobuf); + out: return err; } diff -urN rawioref/drivers/md/lvm-snap.c rawio-6/drivers/md/lvm-snap.c --- rawioref/drivers/md/lvm-snap.c Thu Apr 26 04:35:42 2001 +++ rawio-6/drivers/md/lvm-snap.c Thu Apr 26 04:35:52 2001 @@ -65,14 +65,6 @@ const char **reason); static void _disable_snapshot(vg_t *vg, lv_t *lv); - -static inline int __brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size, - lv_t *lv) { - return brw_kiovec(rw, nr, iovec, dev, b, size); -} - - static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn) { uint p; for(p = 0; p < vg->pv_max; p++) { @@ -320,7 +312,6 @@ unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size; struct kiobuf * iobuf; - unsigned long blocks[KIO_MAX_SECTORS]; int blksize_snap, blksize_org, min_blksize, max_blksize; int max_sectors, nr_sectors; @@ -368,16 +359,14 @@ iobuf->length = nr_sectors << 9; - lvm_snapshot_prepare_blocks(blocks, org_start, + lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, nr_sectors, blksize_org); - if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks, - blksize_org, lv_snap) != (nr_sectors<<9)) + if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, iobuf->blocks, blksize_org) != (nr_sectors<<9)) goto fail_raw_read; - lvm_snapshot_prepare_blocks(blocks, snap_start, + lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, nr_sectors, blksize_snap); - if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks, - blksize_snap, lv_snap) != (nr_sectors<<9)) + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, iobuf->blocks, blksize_snap) != (nr_sectors<<9)) goto fail_raw_write; } @@ -573,7 +562,6 @@ uint pvn; ulong snap_pe_start, COW_table_sector_offset, COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; - ulong blocks[1]; kdev_t snap_phys_dev; lv_block_exception_t *be; struct kiobuf *COW_table_iobuf = lv_snap->lv_COW_table_iobuf; @@ -598,7 +586,7 @@ COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); /* COW table block to write next */ - blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); + COW_table_iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); /* store new COW_table entry */ be = lv_snap->lv_block_exception + idx; @@ -618,8 +606,8 @@ COW_table_iobuf->length = blksize_snap; /* COW_table_iobuf->nr_pages = 1; */ - if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, - blocks, blksize_snap, lv_snap) != blksize_snap) + if (brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, + COW_table_iobuf->blocks, blksize_snap) != blksize_snap) goto fail_raw_write; /* initialization of next COW exception table block with zeroes */ @@ -637,12 +625,11 @@ snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; blksize_snap = lvm_get_blksize(snap_phys_dev); - blocks[0] = snap_pe_start >> (blksize_snap >> 10); - } else blocks[0]++; + COW_table_iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else COW_table_iobuf->blocks[0]++; - if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, - blocks, blksize_snap, lv_snap) != - blksize_snap) + if (brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, + COW_table_iobuf->blocks, blksize_snap) != blksize_snap) goto fail_raw_write; } diff -urN rawioref/fs/buffer.c rawio-6/fs/buffer.c --- rawioref/fs/buffer.c Thu Apr 26 04:35:42 2001 +++ rawio-6/fs/buffer.c Thu Apr 26 04:35:52 2001 @@ -1187,10 +1187,10 @@ kmem_cache_free(bh_cachep, bh); } else { bh->b_blocknr = -1; - init_waitqueue_head(&bh->b_wait); + bh->b_this_page = NULL; + nr_unused_buffer_heads++; bh->b_next_free = unused_list; - bh->b_this_page = NULL; unused_list = bh; } } @@ -1219,8 +1219,8 @@ * more buffer-heads itself. Thus SLAB_BUFFER. */ if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) { - memset(bh, 0, sizeof(*bh)); - init_waitqueue_head(&bh->b_wait); + bh->b_blocknr = -1; + bh->b_this_page = NULL; return bh; } @@ -1983,7 +1983,6 @@ end_kio_request(kiobuf, uptodate); } - /* * For brw_kiovec: submit a set of buffer_head temporary IOs and wait * for them to complete. Clean up the buffer_heads afterwards. @@ -1991,21 +1990,18 @@ static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) { - int iosize; + int iosize, err; int i; struct buffer_head *tmp; - iosize = 0; - spin_lock(&unused_list_lock); + err = 0; for (i = nr; --i >= 0; ) { iosize += size; tmp = bh[i]; if (buffer_locked(tmp)) { - spin_unlock(&unused_list_lock); wait_on_buffer(tmp); - spin_lock(&unused_list_lock); } if (!buffer_uptodate(tmp)) { @@ -2013,13 +2009,13 @@ clearing iosize on error calculates the amount of IO before the first error. */ iosize = 0; + err = -EIO; } - __put_unused_buffer_head(tmp); } - spin_unlock(&unused_list_lock); - - return iosize; + if (iosize) + return iosize; + return err; } /* @@ -2048,7 +2044,7 @@ unsigned long blocknr; struct kiobuf * iobuf = NULL; struct page * map; - struct buffer_head *tmp, *bh[KIO_MAX_SECTORS]; + struct buffer_head *tmp, **bhs = NULL; if (!nr) return 0; @@ -2074,22 +2070,20 @@ offset = iobuf->offset; length = iobuf->length; iobuf->errno = 0; + if (!bhs) + bhs = iobuf->bh; for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { map = iobuf->maplist[pageind]; if (!map) { err = -EFAULT; - goto error; + goto finished; } while (length > 0) { blocknr = b[bufind++]; - tmp = get_unused_buffer_head(0); - if (!tmp) { - err = -ENOMEM; - goto error; - } - + tmp = bhs[bhind++]; + tmp->b_dev = B_FREE; tmp->b_size = size; set_bh_page(tmp, map, offset); @@ -2103,9 +2097,9 @@ if (rw == WRITE) { set_bit(BH_Uptodate, &tmp->b_state); clear_bit(BH_Dirty, &tmp->b_state); - } + } else + set_bit(BH_Uptodate, &tmp->b_state); - bh[bhind++] = tmp; length -= size; offset += size; @@ -2116,7 +2110,8 @@ * Wait for IO if we have got too much */ if (bhind >= KIO_MAX_SECTORS) { - err = wait_kio(rw, bhind, bh, size); + kiobuf_wait_for_io(iobuf); /* wake-one */ + err = wait_kio(rw, bhind, bhs, size); if (err >= 0) transferred += err; else @@ -2134,7 +2129,8 @@ /* Is there any IO still left to submit? */ if (bhind) { - err = wait_kio(rw, bhind, bh, size); + kiobuf_wait_for_io(iobuf); /* wake-one */ + err = wait_kio(rw, bhind, bhs, size); if (err >= 0) transferred += err; else @@ -2145,16 +2141,6 @@ if (transferred) return transferred; return err; - - error: - /* We got an error allocating the bh'es. Just free the current - buffer_heads and exit. */ - spin_lock(&unused_list_lock); - for (i = bhind; --i >= 0; ) { - __put_unused_buffer_head(bh[i]); - } - spin_unlock(&unused_list_lock); - goto finished; } /* diff -urN rawioref/fs/dcache.c rawio-6/fs/dcache.c --- rawioref/fs/dcache.c Thu Apr 26 04:35:41 2001 +++ rawio-6/fs/dcache.c Thu Apr 26 04:35:52 2001 @@ -1227,6 +1227,18 @@ } while (i); } +static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + { + struct buffer_head * bh = (struct buffer_head *) foo; + + memset(bh, 0, sizeof(*bh)); + init_waitqueue_head(&bh->b_wait); + } +} + /* SLAB cache for __getname() consumers */ kmem_cache_t *names_cachep; @@ -1244,7 +1256,7 @@ { bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, init_buffer_head, NULL); if(!bh_cachep) panic("Cannot create buffer head SLAB cache"); diff -urN rawioref/fs/iobuf.c rawio-6/fs/iobuf.c --- rawioref/fs/iobuf.c Sun Apr 1 01:17:30 2001 +++ rawio-6/fs/iobuf.c Thu Apr 26 04:35:52 2001 @@ -8,9 +8,7 @@ #include #include - -static kmem_cache_t *kiobuf_cachep; - +#include void end_kio_request(struct kiobuf *kiobuf, int uptodate) { @@ -24,18 +22,7 @@ } } - -void __init kiobuf_setup(void) -{ - kiobuf_cachep = kmem_cache_create("kiobuf", - sizeof(struct kiobuf), - 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if(!kiobuf_cachep) - panic("Cannot create kernel iobuf cache\n"); -} - -void kiobuf_init(struct kiobuf *iobuf) +static void kiobuf_init(struct kiobuf *iobuf) { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); @@ -43,18 +30,48 @@ iobuf->maplist = iobuf->map_array; } +int alloc_kiobuf_bhs(struct kiobuf * kiobuf) +{ + int i; + + for (i = 0; i < KIO_MAX_SECTORS; i++) + if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { + while (i--) { + kmem_cache_free(bh_cachep, kiobuf->bh[i]); + kiobuf->bh[i] = NULL; + } + return -ENOMEM; + } + return 0; +} + +void free_kiobuf_bhs(struct kiobuf * kiobuf) +{ + int i; + + for (i = 0; i < KIO_MAX_SECTORS; i++) { + kmem_cache_free(bh_cachep, kiobuf->bh[i]); + kiobuf->bh[i] = NULL; + } +} + int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL); + iobuf = vmalloc(sizeof(struct kiobuf)); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); + if (alloc_kiobuf_bhs(iobuf)) { + vfree(iobuf); + free_kiovec(i, bufp); + return -ENOMEM; + } bufp[i] = iobuf; } @@ -72,7 +89,8 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - kmem_cache_free(kiobuf_cachep, bufp[i]); + free_kiobuf_bhs(iobuf); + vfree(bufp[i]); } } @@ -115,11 +133,12 @@ add_wait_queue(&kiobuf->wait_queue, &wait); repeat: - run_task_queue(&tq_disk); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (atomic_read(&kiobuf->io_count) != 0) { + run_task_queue(&tq_disk); schedule(); - goto repeat; + if (atomic_read(&kiobuf->io_count) != 0) + goto repeat; } tsk->state = TASK_RUNNING; remove_wait_queue(&kiobuf->wait_queue, &wait); diff -urN rawioref/include/linux/iobuf.h rawio-6/include/linux/iobuf.h --- rawioref/include/linux/iobuf.h Tue Apr 24 06:15:48 2001 +++ rawio-6/include/linux/iobuf.h Thu Apr 26 04:35:52 2001 @@ -24,8 +24,7 @@ * entire iovec. */ -#define KIO_MAX_ATOMIC_IO 64 /* in kb */ -#define KIO_MAX_ATOMIC_BYTES (64 * 1024) +#define KIO_MAX_ATOMIC_IO 512 /* in kb */ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) #define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) @@ -47,8 +46,10 @@ unsigned int locked : 1; /* If set, pages has been locked */ - /* Always embed enough struct pages for 64k of IO */ + /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; + struct buffer_head * bh[KIO_MAX_SECTORS]; + unsigned long blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -64,17 +65,18 @@ void unmap_kiobuf(struct kiobuf *iobuf); int lock_kiovec(int nr, struct kiobuf *iovec[], int wait); int unlock_kiovec(int nr, struct kiobuf *iovec[]); +void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes); /* fs/iobuf.c */ -void __init kiobuf_setup(void); -void kiobuf_init(struct kiobuf *); void end_kio_request(struct kiobuf *, int); void simple_wakeup_kiobuf(struct kiobuf *); int alloc_kiovec(int nr, struct kiobuf **); void free_kiovec(int nr, struct kiobuf **); int expand_kiobuf(struct kiobuf *, int); void kiobuf_wait_for_io(struct kiobuf *); +extern int alloc_kiobuf_bhs(struct kiobuf *); +extern void free_kiobuf_bhs(struct kiobuf *); /* fs/buffer.c */ diff -urN rawioref/include/linux/raw.h rawio-6/include/linux/raw.h --- rawioref/include/linux/raw.h Tue Apr 24 06:15:57 2001 +++ rawio-6/include/linux/raw.h Thu Apr 26 04:35:52 2001 @@ -13,11 +13,4 @@ __u64 block_minor; }; -#ifdef __KERNEL__ - -/* drivers/char/raw.c */ -extern void raw_init(void); - -#endif /* __KERNEL__ */ - #endif /* __LINUX_RAW_H */ diff -urN rawioref/init/main.c rawio-6/init/main.c --- rawioref/init/main.c Thu Apr 26 02:04:44 2001 +++ rawio-6/init/main.c Thu Apr 26 04:35:52 2001 @@ -583,7 +583,6 @@ vfs_caches_init(mempages); buffer_init(mempages); page_cache_init(mempages); - kiobuf_setup(); signals_init(); bdev_init(); inode_init(mempages); diff -urN rawioref/kernel/ksyms.c rawio-6/kernel/ksyms.c --- rawioref/kernel/ksyms.c Thu Apr 26 02:04:44 2001 +++ rawio-6/kernel/ksyms.c Thu Apr 26 04:35:52 2001 @@ -386,8 +386,6 @@ #endif /* Kiobufs */ -EXPORT_SYMBOL(kiobuf_init); - EXPORT_SYMBOL(alloc_kiovec); EXPORT_SYMBOL(free_kiovec); EXPORT_SYMBOL(expand_kiobuf); diff -urN rawioref/mm/highmem.c rawio-6/mm/highmem.c --- rawioref/mm/highmem.c Thu Apr 26 04:35:42 2001 +++ rawio-6/mm/highmem.c Thu Apr 26 04:35:54 2001 @@ -236,9 +236,13 @@ nr_emergency_pages++; } - if (nr_emergency_bhs >= POOL_SIZE) + if (nr_emergency_bhs >= POOL_SIZE) { +#ifdef HIGHMEM_DEBUG + /* Don't clobber the constructed slab cache */ + init_waitqueue_head(&bh->b_wait); +#endif kmem_cache_free(bh_cachep, bh); - else { + } else { /* * Ditto in the bh case, here we abuse b_inode_buffers: */ @@ -395,12 +399,14 @@ bh->b_count = bh_orig->b_count; bh->b_rdev = bh_orig->b_rdev; bh->b_state = bh_orig->b_state; +#ifdef HIGHMEM_DEBUG bh->b_flushtime = jiffies; bh->b_next_free = NULL; bh->b_prev_free = NULL; /* bh->b_this_page */ bh->b_reqnext = NULL; bh->b_pprev = NULL; +#endif /* bh->b_page */ if (rw == WRITE) { bh->b_end_io = bounce_end_io_write; @@ -409,7 +415,9 @@ bh->b_end_io = bounce_end_io_read; bh->b_private = (void *)bh_orig; bh->b_rsector = bh_orig->b_rsector; +#ifdef HIGHMEM_DEBUG memset(&bh->b_wait, -1, sizeof(bh->b_wait)); +#endif return bh; } diff -urN rawioref/mm/memory.c rawio-6/mm/memory.c --- rawioref/mm/memory.c Thu Apr 26 02:04:45 2001 +++ rawio-6/mm/memory.c Thu Apr 26 04:35:52 2001 @@ -389,20 +389,33 @@ /* * Do a quick page-table lookup for a single page. */ -static struct page * follow_page(unsigned long address) +static struct page * follow_page(unsigned long address, int write) { pgd_t *pgd; pmd_t *pmd; + pte_t *ptep, pte; pgd = pgd_offset(current->mm, address); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + goto out; + pmd = pmd_offset(pgd, address); - if (pmd) { - pte_t * pte = pte_offset(pmd, address); - if (pte && pte_present(*pte)) - return pte_page(*pte); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + goto out; + + ptep = pte_offset(pmd, address); + if (!ptep) + goto out; + + pte = *ptep; + if (pte_present(pte)) { + if (!write || + (pte_write(pte) && pte_dirty(pte))) + return pte_page(pte); } - - return NULL; + +out: + return 0; } /* @@ -476,15 +489,22 @@ goto out_unlock; } } - if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) - goto out_unlock; spin_lock(&mm->page_table_lock); - map = follow_page(ptr); - if (!map) { + while (!(map = follow_page(ptr, datain))) { + int ret; + spin_unlock(&mm->page_table_lock); - dprintk (KERN_ERR "Missing page in map_user_kiobuf\n"); - goto out_unlock; - } + ret = handle_mm_fault(current->mm, vma, ptr, datain); + if (ret <= 0) { + if (!ret) + goto out_unlock; + else { + err = -ENOMEM; + goto out_unlock; + } + } + spin_lock(&mm->page_table_lock); + } map = get_page_map(map); if (map) { flush_dcache_page(map); @@ -509,6 +529,37 @@ return err; } +/* + * Mark all of the pages in a kiobuf as dirty + * + * We need to be able to deal with short reads from disk: if an IO error + * occurs, the number of bytes read into memory may be less than the + * size of the kiobuf, so we have to stop marking pages dirty once the + * requested byte count has been reached. + */ + +void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes) +{ + int index, offset, remaining; + struct page *page; + + index = iobuf->offset >> PAGE_SHIFT; + offset = iobuf->offset & ~PAGE_MASK; + remaining = bytes; + if (remaining > iobuf->length) + remaining = iobuf->length; + + while (remaining > 0 && index < iobuf->nr_pages) { + page = iobuf->maplist[index]; + + if (!PageReserved(page)) + SetPageDirty(page); + + remaining -= (PAGE_SIZE - offset); + offset = 0; + index++; + } +} /* * Unmap all of the pages referenced by a kiobuf. We release the pages, @@ -559,7 +610,6 @@ if (iobuf->locked) continue; - iobuf->locked = 1; ppage = iobuf->maplist; for (j = 0; j < iobuf->nr_pages; ppage++, j++) { @@ -567,9 +617,16 @@ if (!page) continue; - if (TryLockPage(page)) + if (TryLockPage(page)) { + while (j--) { + page = *(--ppage); + if (page) + UnlockPage(page); + } goto retry; + } } + iobuf->locked = 1; } return 0;