From: Chuck Lever O_DIRECT|O_APPEND cannot possibly work on NFS, so NFS needs some way of preventing the user from setting this combination. We felt that the best way of implementing this restriction is to allow the filesytem to implement its own fcntl() handler. This patch does, that, and provide the appropriate handler for NFS. Additional details from Chuck: Forgetting O_DIRECT for a moment, O_APPEND writes on NFS don't work in any case when multiple clients are writing to a file, since an NFS client can never guarantee it knows where the true end of file is 100% of the time. it works as expected iff only one client writes to an O_APPEND file at a time. Multi-client O_APPEND writing doesn't seem to be a problem for any application I'm aware of. Since it can be made to behave in the multi-client case with careful application logic or by using file locking, I don't think we should disallow it. I want to drop the inode semaphore when doing NFS direct I/O because it is synchronous; holding the i_sem means we reduce direct I/O concurrency to one I/O per file at a time. the important thing sct was worried about was the case where a single client is writing with O_APPEND and O_DIRECT, and we don't hold the i_sem during the write. We must at least hold the i_sem when determining where the end of file is to do the O_APPEND write. In 2.6, I believe that is handled correctly in the VFS layer, so this is not an issue for 2.6, right? --- 25-akpm/fs/fcntl.c | 12 +++++++++++- 25-akpm/fs/nfs/file.c | 28 ++++++++++++++++++++++++++++ 25-akpm/include/linux/fs.h | 4 ++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff -puN fs/fcntl.c~file-operations-fcntl fs/fcntl.c --- 25/fs/fcntl.c~file-operations-fcntl 2004-03-21 21:56:20.498799192 -0800 +++ 25-akpm/fs/fcntl.c 2004-03-21 21:56:20.504798280 -0800 @@ -282,7 +282,7 @@ void f_delown(struct file *filp) EXPORT_SYMBOL(f_delown); -static long do_fcntl(unsigned int fd, unsigned int cmd, +long generic_file_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file * filp) { long err = -EINVAL; @@ -353,6 +353,16 @@ static long do_fcntl(unsigned int fd, un return err; } +EXPORT_SYMBOL(generic_file_fcntl); + +static long do_fcntl(unsigned int fd, unsigned int cmd, + unsigned long arg, struct file *filp) +{ + if (filp->f_op && filp->f_op->fcntl) + return filp->f_op->fcntl(fd, cmd, arg, filp); + return generic_file_fcntl(fd, cmd, arg, filp); +} + asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { struct file * filp; diff -puN fs/nfs/file.c~file-operations-fcntl fs/nfs/file.c --- 25/fs/nfs/file.c~file-operations-fcntl 2004-03-21 21:56:20.500798888 -0800 +++ 25-akpm/fs/nfs/file.c 2004-03-21 21:56:20.505798128 -0800 @@ -33,6 +33,8 @@ #define NFSDBG_FACILITY NFSDBG_FILE +static long nfs_file_fcntl(unsigned int, unsigned int, unsigned long, + struct file *); static int nfs_file_open(struct inode *, struct file *); static int nfs_file_release(struct inode *, struct file *); static int nfs_file_mmap(struct file *, struct vm_area_struct *); @@ -55,6 +57,7 @@ struct file_operations nfs_file_operatio .fsync = nfs_fsync, .lock = nfs_lock, .sendfile = nfs_file_sendfile, + .fcntl = nfs_file_fcntl, }; struct inode_operations nfs_file_inode_operations = { @@ -68,6 +71,28 @@ struct inode_operations nfs_file_inode_o # define IS_SWAPFILE(inode) (0) #endif +#define nfs_invalid_flags (O_APPEND | O_DIRECT) + +/* + * Check for special cases that NFS doesn't support, and + * pass the rest to the generic fcntl function. + */ +static long +nfs_file_fcntl(unsigned int fd, unsigned int cmd, + unsigned long arg, struct file *filp) +{ + switch (cmd) { + case F_SETFL: + if ((filp->f_flags & nfs_invalid_flags) == nfs_invalid_flags) + return -EINVAL; + break; + default: + break; + } + + return generic_file_fcntl(fd, cmd, arg, filp); +} + /* * Open file */ @@ -78,6 +103,9 @@ nfs_file_open(struct inode *inode, struc int (*open)(struct inode *, struct file *); int res = 0; + if ((filp->f_flags & nfs_invalid_flags) == nfs_invalid_flags) + return -EINVAL; + lock_kernel(); /* Do NFSv4 open() call */ if ((open = server->rpc_ops->file_open) != NULL) diff -puN include/linux/fs.h~file-operations-fcntl include/linux/fs.h --- 25/include/linux/fs.h~file-operations-fcntl 2004-03-21 21:56:20.502798584 -0800 +++ 25-akpm/include/linux/fs.h 2004-03-21 21:56:20.746761496 -0800 @@ -625,6 +625,9 @@ extern struct list_head file_lock_list; #include +extern long generic_file_fcntl(unsigned int, unsigned int, unsigned long, + struct file *); + extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_setlk(struct file *, unsigned int, struct flock __user *); @@ -835,6 +838,7 @@ struct file_operations { ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void __user *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + long (*fcntl)(unsigned int, unsigned int, unsigned long, struct file *); }; struct inode_operations { _