diff -urN dontuse/fs/block_dev.c backout/fs/block_dev.c --- dontuse/fs/block_dev.c Fri Sep 28 04:00:20 2001 +++ backout/fs/block_dev.c Fri Sep 28 05:30:50 2001 @@ -859,6 +859,7 @@ sync_page: block_sync_page, prepare_write: blkdev_prepare_write, commit_write: blkdev_commit_write, + direct_IO: blkdev_direct_IO, }; struct file_operations def_blk_fops = { diff -urN dontuse/fs/buffer.c backout/fs/buffer.c --- dontuse/fs/buffer.c Fri Sep 28 04:00:20 2001 +++ backout/fs/buffer.c Fri Sep 28 05:30:50 2001 @@ -1970,6 +1970,47 @@ return tmp.b_blocknr; } +int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block) +{ + int i, nr_blocks, retval; + unsigned long * blocks = iobuf->blocks; + + nr_blocks = iobuf->length / blocksize; + /* build the blocklist */ + for (i = 0; i < nr_blocks; i++, blocknr++) { + struct buffer_head bh; + + bh.b_state = 0; + bh.b_dev = inode->i_dev; + bh.b_size = blocksize; + + retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); + if (retval) + goto out; + + if (rw == READ) { + if (buffer_new(&bh)) + BUG(); + if (!buffer_mapped(&bh)) { + /* there was an hole in the filesystem */ + blocks[i] = -1UL; + continue; + } + } else { + if (buffer_new(&bh)) + unmap_underlying_metadata(&bh); + if (!buffer_mapped(&bh)) + BUG(); + } + blocks[i] = bh.b_blocknr; + } + + retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); + + out: + return retval; +} + /* * IO completion routine for a buffer_head being used for kiobuf IO: we * can't dispatch the kiobuf callback until io_count reaches 0. @@ -2364,7 +2405,7 @@ ll_rw_block(WRITE, 1, &p); tryagain = 0; } else if (buffer_locked(p)) { - if (gfp_mask & __GFP_WAITBUF) { + if (gfp_mask & __GFP_WAIT) { wait_on_buffer(p); tryagain = 1; } else diff -urN dontuse/fs/ext2/inode.c backout/fs/ext2/inode.c --- dontuse/fs/ext2/inode.c Fri Sep 28 04:00:21 2001 +++ backout/fs/ext2/inode.c Fri Sep 28 05:30:50 2001 @@ -586,6 +586,10 @@ { return generic_block_bmap(mapping,block,ext2_get_block); } +static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) +{ + return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block); +} struct address_space_operations ext2_aops = { readpage: ext2_readpage, writepage: ext2_writepage, @@ -593,6 +597,7 @@ prepare_write: ext2_prepare_write, commit_write: generic_commit_write, bmap: ext2_bmap, + direct_IO: ext2_direct_IO, }; /* diff -urN dontuse/include/linux/fs.h backout/include/linux/fs.h --- dontuse/include/linux/fs.h Fri Sep 28 04:00:21 2001 +++ backout/include/linux/fs.h Fri Sep 28 05:31:08 2001 @@ -1353,6 +1353,7 @@ int generic_block_bmap(struct address_space *, long, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); +extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); extern void create_empty_buffers(struct page *, kdev_t, unsigned long); extern int waitfor_one_page(struct page*); diff -urN dontuse/include/linux/mm.h backout/include/linux/mm.h --- dontuse/include/linux/mm.h Fri Sep 28 04:00:21 2001 +++ backout/include/linux/mm.h Fri Sep 28 05:30:50 2001 @@ -550,17 +550,16 @@ #define __GFP_IO 0x40 /* Can start low memory physical IO? */ #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ #define __GFP_FS 0x100 /* Can call down to low-level FS? */ -#define __GFP_WAITBUF 0x200 /* Can we wait for buffers to complete? */ #define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) #define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_WAITBUF) +#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO) #define GFP_ATOMIC (__GFP_HIGH) -#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_WAITBUF | __GFP_FS) -#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_WAITBUF | __GFP_FS | __GFP_HIGHMEM) -#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_WAITBUF | __GFP_FS) -#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_WAITBUF | __GFP_FS) -#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_WAITBUF | __GFP_FS) +#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) +#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM) +#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) +#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) +#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ diff -urN dontuse/include/linux/slab.h backout/include/linux/slab.h --- dontuse/include/linux/slab.h Fri Sep 28 04:00:21 2001 +++ backout/include/linux/slab.h Fri Sep 28 05:30:50 2001 @@ -24,7 +24,7 @@ #define SLAB_NFS GFP_NFS #define SLAB_DMA GFP_DMA -#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_WAITBUF|__GFP_FS) +#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ /* flags to pass to kmem_cache_create(). diff -urN dontuse/mm/filemap.c backout/mm/filemap.c --- dontuse/mm/filemap.c Fri Sep 28 04:00:21 2001 +++ backout/mm/filemap.c Fri Sep 28 05:30:50 2001 @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -1348,6 +1347,92 @@ UPDATE_ATIME(inode); } +static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) +{ + ssize_t retval; + int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; + struct kiobuf * iobuf; + struct inode * inode = filp->f_dentry->d_inode; + struct address_space * mapping = inode->i_mapping; + + new_iobuf = 0; + iobuf = filp->f_iobuf; + if (test_and_set_bit(0, &filp->f_iobuf_lock)) { + /* + * A parallel read/write is using the preallocated iobuf + * so just run slow and allocate a new one. + */ + retval = alloc_kiovec(1, &iobuf); + if (retval) + goto out; + new_iobuf = 1; + } + + if (!S_ISBLK(inode->i_mode)) { + blocksize = inode->i_sb->s_blocksize; + blocksize_bits = inode->i_sb->s_blocksize_bits; + } else { + blocksize = BUFFERED_BLOCKSIZE; + blocksize_bits = BUFFERED_BLOCKSIZE_BITS; + } + blocksize_mask = blocksize - 1; + chunk_size = KIO_MAX_ATOMIC_IO << 10; + + retval = -EINVAL; + if ((offset & blocksize_mask) || (count & blocksize_mask)) + goto out_free; + if (!mapping->a_ops->direct_IO) + goto out_free; + + /* + * Flush to disk exlusively the _data_, metadata must remains + * completly asynchronous or performance will go to /dev/null. + */ + filemap_fdatasync(mapping); + retval = fsync_inode_data_buffers(inode); + filemap_fdatawait(mapping); + if (retval < 0) + goto out_free; + + progress = retval = 0; + while (count > 0) { + iosize = count; + if (iosize > chunk_size) + iosize = chunk_size; + + retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); + if (retval) + break; + + retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); + + if (rw == READ && retval > 0) + mark_dirty_kiobuf(iobuf, retval); + + if (retval >= 0) { + count -= retval; + buf += retval; + progress += retval; + } + + unmap_kiobuf(iobuf); + + if (retval != iosize) + break; + } + + if (progress) + retval = progress; + + out_free: + if (!new_iobuf) + clear_bit(0, &filp->f_iobuf_lock); + else + free_kiovec(1, &iobuf); + out: + return retval; +} + int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { char *kaddr; @@ -1381,6 +1466,9 @@ if ((ssize_t) count < 0) return -EINVAL; + if (filp->f_flags & O_DIRECT) + goto o_direct; + retval = -EFAULT; if (access_ok(VERIFY_WRITE, buf, count)) { retval = 0; @@ -1399,7 +1487,28 @@ retval = desc.error; } } + out: return retval; + + o_direct: + { + loff_t pos = *ppos, size; + struct inode * inode = filp->f_dentry->d_inode; + + retval = 0; + if (!count) + goto out; /* skip atime */ + size = calc_rsize(inode); + if (pos < size) { + if (pos + count > size) + count = size - pos; + retval = generic_file_direct_IO(READ, filp, buf, count, pos); + if (retval > 0) + *ppos = pos + retval; + } + UPDATE_ATIME(filp->f_dentry->d_inode); + goto out; + } } static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) @@ -2601,7 +2710,8 @@ written = 0; - if (file->f_flags & O_APPEND) + /* FIXME: this is for backwards compatibility with 2.4 */ + if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) pos = inode->i_size; /* @@ -2681,6 +2791,9 @@ inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); + if (file->f_flags & O_DIRECT) + goto o_direct; + do { unsigned long index, offset; long page_fault; @@ -2755,6 +2868,7 @@ if ((status >= 0) && (file->f_flags & O_SYNC)) status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); +out_status: err = written ? written : status; out: @@ -2763,6 +2877,25 @@ fail_write: status = -EFAULT; goto unlock; + +o_direct: + written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); + if (written > 0) { + loff_t end = pos + written; + if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { + inode->i_size = end; + mark_inode_dirty(inode); + } + *ppos = end; + invalidate_inode_pages2(mapping); + } + /* + * Sync the fs metadata but not the minor inode changes and + * of course not the data as we did direct DMA for the IO. + */ + if (written >= 0 && file->f_flags & O_SYNC) + status = generic_osync_inode(inode, OSYNC_METADATA); + goto out_status; } void __init page_cache_init(unsigned long mempages) diff -urN dontuse/mm/memory.c backout/mm/memory.c --- dontuse/mm/memory.c Fri Sep 28 04:00:21 2001 +++ backout/mm/memory.c Fri Sep 28 05:30:50 2001 @@ -1103,10 +1103,6 @@ return; } -/* Swap 80% full? Release the pages as they are paged in.. */ -#define vm_swap_full() \ - (swapper_space.nrpages*5 > total_swap_pages*4) - /* * We hold the mm semaphore and the page_table_lock on entry and exit. */ @@ -1164,12 +1160,10 @@ swap_free(entry); mark_page_accessed(page); if (exclusive_swap_page(page)) { - if (write_access || vm_swap_full()) { - pte = pte_mkdirty(pte); - if (vma->vm_flags & VM_WRITE) - pte = pte_mkwrite(pte); - delete_from_swap_cache(page); - } + if (vma->vm_flags & VM_WRITE) + pte = pte_mkwrite(pte); + pte = pte_mkdirty(pte); + delete_from_swap_cache(page); } UnlockPage(page);