diff -urN 2.4.13pre2/fs/block_dev.c o_direct/fs/block_dev.c --- 2.4.13pre2/fs/block_dev.c Sat Oct 13 04:07:09 2001 +++ o_direct/fs/block_dev.c Sat Oct 13 19:41:03 2001 @@ -113,6 +113,11 @@ return 0; } +static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) +{ + return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block); +} + static int blkdev_writepage(struct page * page) { return block_write_full_page(page, blkdev_get_block); @@ -632,6 +637,7 @@ sync_page: block_sync_page, prepare_write: blkdev_prepare_write, commit_write: blkdev_commit_write, + direct_IO: blkdev_direct_IO, }; struct file_operations def_blk_fops = { diff -urN 2.4.13pre2/fs/buffer.c o_direct/fs/buffer.c --- 2.4.13pre2/fs/buffer.c Sat Oct 13 04:07:09 2001 +++ o_direct/fs/buffer.c Sat Oct 13 19:41:03 2001 @@ -1937,6 +1937,47 @@ return tmp.b_blocknr; } +int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block) +{ + int i, nr_blocks, retval; + unsigned long * blocks = iobuf->blocks; + + nr_blocks = iobuf->length / blocksize; + /* build the blocklist */ + for (i = 0; i < nr_blocks; i++, blocknr++) { + struct buffer_head bh; + + bh.b_state = 0; + bh.b_dev = inode->i_dev; + bh.b_size = blocksize; + + retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); + if (retval) + goto out; + + if (rw == READ) { + if (buffer_new(&bh)) + BUG(); + if (!buffer_mapped(&bh)) { + /* there was an hole in the filesystem */ + blocks[i] = -1UL; + continue; + } + } else { + if (buffer_new(&bh)) + unmap_underlying_metadata(&bh); + if (!buffer_mapped(&bh)) + BUG(); + } + blocks[i] = bh.b_blocknr; + } + + retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); + + out: + return retval; +} + /* * IO completion routine for a buffer_head being used for kiobuf IO: we * can't dispatch the kiobuf callback until io_count reaches 0. diff -urN 2.4.13pre2/fs/ext2/inode.c o_direct/fs/ext2/inode.c --- 2.4.13pre2/fs/ext2/inode.c Sat Oct 13 04:07:09 2001 +++ o_direct/fs/ext2/inode.c Sat Oct 13 19:41:03 2001 @@ -592,13 +592,18 @@ { return generic_block_bmap(mapping,block,ext2_get_block); } +static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) +{ + return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block); +} struct address_space_operations ext2_aops = { readpage: ext2_readpage, writepage: ext2_writepage, sync_page: block_sync_page, prepare_write: ext2_prepare_write, commit_write: generic_commit_write, - bmap: ext2_bmap + bmap: ext2_bmap, + direct_IO: ext2_direct_IO, }; /* diff -urN 2.4.13pre2/include/linux/fs.h o_direct/include/linux/fs.h --- 2.4.13pre2/include/linux/fs.h Sat Oct 13 04:07:12 2001 +++ o_direct/include/linux/fs.h Sat Oct 13 19:41:03 2001 @@ -1368,6 +1368,7 @@ int generic_block_bmap(struct address_space *, long, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); +extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); extern void create_empty_buffers(struct page *, kdev_t, unsigned long); extern int waitfor_one_page(struct page*); diff -urN 2.4.13pre2/mm/filemap.c o_direct/mm/filemap.c --- 2.4.13pre2/mm/filemap.c Sat Oct 13 04:07:12 2001 +++ o_direct/mm/filemap.c Sat Oct 13 19:41:14 2001 @@ -1356,6 +1356,87 @@ UPDATE_ATIME(inode); } +static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) +{ + ssize_t retval; + int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; + struct kiobuf * iobuf; + struct inode * inode = filp->f_dentry->d_inode; + struct address_space * mapping = inode->i_mapping; + + new_iobuf = 0; + iobuf = filp->f_iobuf; + if (test_and_set_bit(0, &filp->f_iobuf_lock)) { + /* + * A parallel read/write is using the preallocated iobuf + * so just run slow and allocate a new one. + */ + retval = alloc_kiovec(1, &iobuf); + if (retval) + goto out; + new_iobuf = 1; + } + + blocksize = 1 << inode->i_blkbits; + blocksize_bits = inode->i_blkbits; + blocksize_mask = blocksize - 1; + chunk_size = KIO_MAX_ATOMIC_IO << 10; + + retval = -EINVAL; + if ((offset & blocksize_mask) || (count & blocksize_mask)) + goto out_free; + if (!mapping->a_ops->direct_IO) + goto out_free; + + /* + * Flush to disk exlusively the _data_, metadata must remains + * completly asynchronous or performance will go to /dev/null. + */ + filemap_fdatasync(mapping); + retval = fsync_inode_data_buffers(inode); + filemap_fdatawait(mapping); + if (retval < 0) + goto out_free; + + progress = retval = 0; + while (count > 0) { + iosize = count; + if (iosize > chunk_size) + iosize = chunk_size; + + retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); + if (retval) + break; + + retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); + + if (rw == READ && retval > 0) + mark_dirty_kiobuf(iobuf, retval); + + if (retval >= 0) { + count -= retval; + buf += retval; + progress += retval; + } + + unmap_kiobuf(iobuf); + + if (retval != iosize) + break; + } + + if (progress) + retval = progress; + + out_free: + if (!new_iobuf) + clear_bit(0, &filp->f_iobuf_lock); + else + free_kiovec(1, &iobuf); + out: + return retval; +} + int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { char *kaddr; @@ -1389,6 +1470,9 @@ if ((ssize_t) count < 0) return -EINVAL; + if (filp->f_flags & O_DIRECT) + goto o_direct; + retval = -EFAULT; if (access_ok(VERIFY_WRITE, buf, count)) { retval = 0; @@ -1407,7 +1491,29 @@ retval = desc.error; } } + out: return retval; + + o_direct: + { + loff_t pos = *ppos, size; + struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + + retval = 0; + if (!count) + goto out; /* skip atime */ + size = inode->i_size; + if (pos < size) { + if (pos + count > size) + count = size - pos; + retval = generic_file_direct_IO(READ, filp, buf, count, pos); + if (retval > 0) + *ppos = pos + retval; + } + UPDATE_ATIME(filp->f_dentry->d_inode); + goto out; + } } static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) @@ -2608,7 +2714,8 @@ written = 0; - if (file->f_flags & O_APPEND) + /* FIXME: this is for backwards compatibility with 2.4 */ + if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) pos = inode->i_size; /* @@ -2688,6 +2795,9 @@ inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); + if (file->f_flags & O_DIRECT) + goto o_direct; + do { unsigned long index, offset; long page_fault; @@ -2762,6 +2872,7 @@ if ((status >= 0) && (file->f_flags & O_SYNC)) status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); +out_status: err = written ? written : status; out: @@ -2770,6 +2881,25 @@ fail_write: status = -EFAULT; goto unlock; + +o_direct: + written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); + if (written > 0) { + loff_t end = pos + written; + if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { + inode->i_size = end; + mark_inode_dirty(inode); + } + *ppos = end; + invalidate_inode_pages2(mapping); + } + /* + * Sync the fs metadata but not the minor inode changes and + * of course not the data as we did direct DMA for the IO. + */ + if (written >= 0 && file->f_flags & O_SYNC) + status = generic_osync_inode(inode, OSYNC_METADATA); + goto out_status; } void __init page_cache_init(unsigned long mempages)