XFS deadlocks, and doesn't need the fancy locking anyway. So provide a separate direct-IO path for XFS, which avoids all the extra locking. 25-akpm/fs/direct-io.c | 30 +++++++++++++++++------------- 25-akpm/fs/xfs/linux/xfs_aops.c | 3 ++- 25-akpm/include/linux/fs.h | 29 ++++++++++++++++++++++++++--- 3 files changed, 45 insertions(+), 17 deletions(-) diff -puN fs/direct-io.c~O_DIRECT-race-fixes-rework-XFS-fix fs/direct-io.c --- 25/fs/direct-io.c~O_DIRECT-race-fixes-rework-XFS-fix Wed Oct 22 12:42:50 2003 +++ 25-akpm/fs/direct-io.c Wed Oct 22 12:42:50 2003 @@ -52,6 +52,10 @@ * * If blkfactor is zero then the user's request was aligned to the filesystem's * blocksize. + * + * needs_locking is set for regular files on direct-IO-naive filesystems. It + * determines whether we need to do the fancy locking which prevents direct-IO + * from being able to read uninitialised disk blocks. */ struct dio { @@ -59,6 +63,7 @@ struct dio { struct bio *bio; /* bio under assembly */ struct inode *inode; int rw; + int needs_locking; /* doesn't change */ unsigned blkbits; /* doesn't change */ unsigned blkfactor; /* When we're using an alignment which is finer than the filesystem's soft @@ -122,11 +127,6 @@ struct dio { int result; /* IO result */ }; -static inline int dio_is_reg(struct dio *dio) -{ - return S_ISREG(dio->inode->i_mode); -} - /* * How many pages are in the queue? */ @@ -211,7 +211,7 @@ static void dio_complete(struct dio *dio { if (dio->end_io) dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); - if (dio_is_reg(dio)) + if (dio->needs_locking) up_read(&dio->inode->i_alloc_sem); } @@ -474,7 +474,7 @@ static int get_more_blocks(struct dio *d if (dio_count & blkmask) fs_count++; - if (dio_is_reg(dio)) { + if (dio->needs_locking) { if (dio->block_in_file >= (i_size_read(dio->inode) >> dio->blkbits)) beyond_eof = 1; @@ -972,7 +972,7 @@ direct_io_worker(int rw, struct kiocb *i * All new block allocations have been performed. We can let i_sem * go now. */ - if (dio_is_reg(dio)) + if (dio->needs_locking) up(&dio->inode->i_sem); /* @@ -1017,9 +1017,10 @@ direct_io_worker(int rw, struct kiocb *i * For writes to S_ISBLK files, i_sem is not held on entry; it is never taken. */ int -blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io) + unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, + int needs_special_locking) { int seg; size_t size; @@ -1029,6 +1030,7 @@ blockdev_direct_IO(int rw, struct kiocb unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; struct dio *dio; + int needs_locking; if (bdev) bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); @@ -1064,7 +1066,9 @@ blockdev_direct_IO(int rw, struct kiocb * readers need to grab i_sem and i_alloc_sem * writers need to grab i_alloc_sem only (i_sem is already held) */ - if (S_ISREG(inode->i_mode)) { + needs_locking = 0; + if (S_ISREG(inode->i_mode) && needs_special_locking) { + needs_locking = 1; if (rw == READ) { down(&inode->i_sem); retval = filemap_write_and_wait(inode->i_mapping); @@ -1076,13 +1080,13 @@ blockdev_direct_IO(int rw, struct kiocb } down_read(&inode->i_alloc_sem); } + dio->needs_locking = needs_locking; retval = direct_io_worker(rw, iocb, inode, iov, offset, nr_segs, blkbits, get_blocks, end_io, dio); - if (S_ISREG(inode->i_mode) && rw == WRITE) + if (needs_locking && rw == WRITE) down(&inode->i_sem); out: return retval; } - EXPORT_SYMBOL(blockdev_direct_IO); diff -puN fs/xfs/linux/xfs_aops.c~O_DIRECT-race-fixes-rework-XFS-fix fs/xfs/linux/xfs_aops.c --- 25/fs/xfs/linux/xfs_aops.c~O_DIRECT-race-fixes-rework-XFS-fix Wed Oct 22 12:42:50 2003 +++ 25-akpm/fs/xfs/linux/xfs_aops.c Wed Oct 22 12:42:50 2003 @@ -984,7 +984,8 @@ linvfs_direct_IO( if (error) return -error; - return blockdev_direct_IO(rw, iocb, inode, pbmap.pbm_target->pbr_bdev, + return blockdev_direct_IO_no_locking(rw, iocb, inode, + pbmap.pbm_target->pbr_bdev, iov, offset, nr_segs, linvfs_get_blocks_direct, linvfs_unwritten_convert_direct); diff -puN include/linux/fs.h~O_DIRECT-race-fixes-rework-XFS-fix include/linux/fs.h --- 25/include/linux/fs.h~O_DIRECT-race-fixes-rework-XFS-fix Wed Oct 22 12:42:50 2003 +++ 25-akpm/include/linux/fs.h Wed Oct 22 12:42:50 2003 @@ -1318,9 +1318,6 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs); -extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io); extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos); ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, @@ -1342,6 +1339,32 @@ static inline void do_generic_file_read( actor); } +int __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, + int needs_special_locking); + +/* + * For filesystems which need locking between buffered and direct access + */ +static inline int blockdev_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_blocks, end_io, 1); +} + +static inline int blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_blocks, end_io, 0); +} + extern struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) _