XFS deadlocks, and doesn't need the fancy locking anyway. So provide a separate direct-IO path for XFS, which avoids all the extra locking. fs/direct-io.c | 30 ++++++++++++++++++------------ fs/xfs/linux/xfs_aops.c | 2 +- include/linux/fs.h | 29 ++++++++++++++++++++++++++--- kernel/ksyms.c | 1 - 4 files changed, 45 insertions(+), 17 deletions(-) diff -puN fs/direct-io.c~O_DIRECT-race-fixes-rework-XFS-fix fs/direct-io.c --- 25/fs/direct-io.c~O_DIRECT-race-fixes-rework-XFS-fix 2003-10-03 22:17:19.000000000 -0700 +++ 25-akpm/fs/direct-io.c 2003-10-03 22:17:19.000000000 -0700 @@ -51,6 +51,10 @@ * * If blkfactor is zero then the user's request was aligned to the filesystem's * blocksize. + * + * needs_locking is set for regular files on direct-IO-naive filesystems. It + * determines whether we need to do the fancy locking which prevents direct-IO + * from being able to read uninitialised disk blocks. */ struct dio { @@ -58,6 +62,7 @@ struct dio { struct bio *bio; /* bio under assembly */ struct inode *inode; int rw; + int needs_locking; /* doesn't change */ unsigned blkbits; /* doesn't change */ unsigned blkfactor; /* When we're using an alignment which is finer than the filesystem's soft @@ -121,11 +126,6 @@ struct dio { int result; /* IO result */ }; -static inline int dio_is_reg(struct dio *dio) -{ - return S_ISREG(dio->inode->i_mode); -} - /* * How many pages are in the queue? */ @@ -210,7 +210,7 @@ static void dio_complete(struct dio *dio { if (dio->end_io) dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); - if (dio_is_reg(dio)) + if (dio->needs_locking) up_read(&dio->inode->i_alloc_sem); } @@ -473,7 +473,7 @@ static int get_more_blocks(struct dio *d if (dio_count & blkmask) fs_count++; - if (dio_is_reg(dio)) { + if (dio->needs_locking) { if (dio->block_in_file >= (i_size_read(dio->inode) >> dio->blkbits)) beyond_eof = 1; @@ -971,7 +971,7 @@ direct_io_worker(int rw, struct kiocb *i * All new block allocations have been performed. We can let i_sem * go now. */ - if (dio_is_reg(dio)) + if (dio->needs_locking) up(&dio->inode->i_sem); /* @@ -1016,9 +1016,10 @@ direct_io_worker(int rw, struct kiocb *i * For writes to S_ISBLK files, i_sem is not held on entry; it is never taken. */ int -blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io) + unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, + int needs_special_locking) { int seg; size_t size; @@ -1028,6 +1029,7 @@ blockdev_direct_IO(int rw, struct kiocb unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; struct dio *dio; + int needs_locking; if (bdev) bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); @@ -1063,7 +1065,9 @@ blockdev_direct_IO(int rw, struct kiocb * readers need to grab i_sem and i_alloc_sem * writers need to grab i_alloc_sem only (i_sem is already held) */ - if (S_ISREG(inode->i_mode)) { + needs_locking = 0; + if (S_ISREG(inode->i_mode) && needs_special_locking) { + needs_locking = 1; if (rw == READ) { down(&inode->i_sem); retval = filemap_write_and_wait(inode->i_mapping); @@ -1075,11 +1079,13 @@ blockdev_direct_IO(int rw, struct kiocb } down_read(&inode->i_alloc_sem); } + dio->needs_locking = needs_locking; retval = direct_io_worker(rw, iocb, inode, iov, offset, nr_segs, blkbits, get_blocks, end_io, dio); - if (S_ISREG(inode->i_mode) && rw == WRITE) + if (needs_locking && rw == WRITE) down(&inode->i_sem); out: return retval; } +EXPORT_SYMBOL(__blockdev_direct_IO); diff -puN include/linux/fs.h~O_DIRECT-race-fixes-rework-XFS-fix include/linux/fs.h --- 25/include/linux/fs.h~O_DIRECT-race-fixes-rework-XFS-fix 2003-10-03 22:17:19.000000000 -0700 +++ 25-akpm/include/linux/fs.h 2003-10-03 22:17:19.000000000 -0700 @@ -1316,9 +1316,6 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs); -extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io); extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos); ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, @@ -1340,6 +1337,32 @@ static inline void do_generic_file_read( actor); } +int __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, + int needs_special_locking); + +/* + * For filesystems which need locking between buffered and direct access + */ +static inline int blockdev_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_blocks, end_io, 1); +} + +static inline int blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_blocks, end_io, 0); +} + extern struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) diff -puN fs/xfs/linux/xfs_aops.c~O_DIRECT-race-fixes-rework-XFS-fix fs/xfs/linux/xfs_aops.c --- 25/fs/xfs/linux/xfs_aops.c~O_DIRECT-race-fixes-rework-XFS-fix 2003-10-03 22:17:19.000000000 -0700 +++ 25-akpm/fs/xfs/linux/xfs_aops.c 2003-10-03 22:17:19.000000000 -0700 @@ -984,7 +984,7 @@ linvfs_direct_IO( if (error) return -error; - return blockdev_direct_IO(rw, iocb, inode, + return blockdev_direct_IO_no_locking(rw, iocb, inode, pbmap.pbm_target->pbr_bdev, iov, offset, nr_segs, linvfs_get_blocks_direct, diff -puN kernel/ksyms.c~O_DIRECT-race-fixes-rework-XFS-fix kernel/ksyms.c --- 25/kernel/ksyms.c~O_DIRECT-race-fixes-rework-XFS-fix 2003-10-03 22:17:19.000000000 -0700 +++ 25-akpm/kernel/ksyms.c 2003-10-03 22:17:19.000000000 -0700 @@ -157,7 +157,6 @@ EXPORT_SYMBOL(inode_setattr); EXPORT_SYMBOL(inode_change_ok); EXPORT_SYMBOL(write_inode_now); EXPORT_SYMBOL(notify_change); -EXPORT_SYMBOL(blockdev_direct_IO); EXPORT_SYMBOL(file_ra_state_init); EXPORT_SYMBOL(generic_ro_fops); EXPORT_SYMBOL(get_unused_fd); _