From: Trond Myklebust NFSv4: Atomic open(). Fixes races w.r.t. opening files. --- fs/nfs/dir.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/inode.c | 54 +++++++++++++++++- fs/nfs/nfs3proc.c | 2 fs/nfs/nfs4proc.c | 134 +++++++++++++++++++++++++++++++++------------- fs/nfs/nfs4state.c | 2 fs/nfs/proc.c | 2 include/linux/nfs_fs.h | 6 ++ include/linux/nfs_xdr.h | 2 8 files changed, 295 insertions(+), 45 deletions(-) diff -puN fs/nfs/dir.c~nfs-27-atomic_open fs/nfs/dir.c --- 25/fs/nfs/dir.c~nfs-27-atomic_open 2004-01-14 02:09:59.000000000 -0800 +++ 25-akpm/fs/nfs/dir.c 2004-01-14 02:10:00.000000000 -0800 @@ -72,6 +72,26 @@ struct inode_operations nfs_dir_inode_op .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* * Open file */ @@ -670,7 +690,7 @@ static struct dentry *nfs_lookup(struct goto out; error = -ENOMEM; - dentry->d_op = &nfs_dentry_operations; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); @@ -702,6 +722,119 @@ out: return ERR_PTR(error); } +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static int is_atomic_open(struct inode *dir, struct nameidata *nd) +{ + if (!nd) + return 0; + /* Check that we are indeed trying to open this file */ + if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN)) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = NULL; + int error = 0; + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(dir, nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + error = -ENAMETOOLONG; + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ + if (nd->intent.open.flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ + lock_kernel(); + inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + inode = NULL; + break; + /* This turned out not to be a regular file */ + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ + default: + goto out; + } + } +no_entry: + d_add(dentry, inode); + nfs_renew_times(dentry); +out: + BUG_ON(error > 0); + return ERR_PTR(error); +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + int openflags, ret = 0; + + /* NFS only supports OPEN for regular files */ + if (inode && !S_ISREG(inode->i_mode)) + goto no_open; + parent = dget_parent(dentry); + if (!is_atomic_open(parent->d_inode, nd)) + goto no_open; + openflags = nd->intent.open.flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) + goto out; + /* If this is exclusive open, just revalidate */ + if (openflags & O_EXCL) + goto no_open; + } + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + lock_kernel(); + ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags); + unlock_kernel(); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open: + dput(parent); + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + static inline int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) { @@ -1306,6 +1439,9 @@ nfs_permission(struct inode *inode, int /* We only need to check permissions on file open() and access() */ if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) return 0; + /* NFSv4 has atomic_open... */ + if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN)) + return 0; } lock_kernel(); diff -puN fs/nfs/inode.c~nfs-27-atomic_open fs/nfs/inode.c --- 25/fs/nfs/inode.c~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/fs/nfs/inode.c 2004-01-14 02:10:00.000000000 -0800 @@ -303,7 +303,6 @@ nfs_sb_init(struct super_block *sb, rpc_ server = NFS_SB(sb); sb->s_magic = NFS_SUPER_MAGIC; - sb->s_op = &nfs_sops; /* Did getting the root inode fail? */ if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) @@ -312,7 +311,7 @@ nfs_sb_init(struct super_block *sb, rpc_ if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &nfs_dentry_operations; + sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { @@ -513,6 +512,7 @@ nfs_fill_super(struct super_block *sb, s goto out_shutdown; } + sb->s_op = &nfs_sops; err = nfs_sb_init(sb, authflavor); if (err != 0) goto out_noinit; @@ -745,7 +745,7 @@ nfs_fhget(struct super_block *sb, struct inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) @@ -837,7 +837,12 @@ printk("nfs_setattr: revalidate failed, filemap_fdatawait(inode->i_mapping); if (error) goto out; + /* Optimize away unnecessary truncates */ + if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size) + attr->ia_valid &= ~ATTR_SIZE; } + if (!attr->ia_valid) + goto out; error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error) @@ -1357,6 +1362,48 @@ static struct file_system_type nfs_fs_ty #ifdef CONFIG_NFS_V4 +static void nfs4_clear_inode(struct inode *); + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (!list_empty(&nfsi->open_states)) { + struct nfs4_state *state; + + state = list_entry(nfsi->open_states.next, + struct nfs4_state, + inode_states); + dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", + __FUNCTION__, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + state); + list_del(&state->inode_states); + nfs4_put_open_state(state); + } + /* Now call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + + static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; @@ -1481,6 +1528,7 @@ static int nfs4_fill_super(struct super_ if ((server->idmap = nfs_idmap_new(server)) == NULL) printk(KERN_WARNING "NFS: couldn't start IDmap\n"); + sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; diff -puN fs/nfs/nfs3proc.c~nfs-27-atomic_open fs/nfs/nfs3proc.c --- 25/fs/nfs/nfs3proc.c~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/fs/nfs/nfs3proc.c 2004-01-14 02:10:00.000000000 -0800 @@ -898,6 +898,8 @@ nfs3_request_compatible(struct nfs_page struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, diff -puN fs/nfs/nfs4proc.c~nfs-27-atomic_open fs/nfs/nfs4proc.c --- 25/fs/nfs/nfs4proc.c~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/fs/nfs/nfs4proc.c 2004-01-14 02:10:00.000000000 -0800 @@ -45,6 +45,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC @@ -509,6 +510,9 @@ nfs4_open_reclaim(struct nfs4_state_owne return status; } +/* + * Returns an nfs4_state + an referenced inode + */ struct nfs4_state * nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) { @@ -617,19 +621,23 @@ retry: up(&sp->so_sema); nfs4_put_state_owner(sp); - iput(inode); return state; out_up: up(&sp->so_sema); nfs4_put_state_owner(sp); - if (state) + if (state) { nfs4_put_open_state(state); - if (inode) + state = NULL; + } + if (inode) { iput(inode); + inode = NULL; + } status = nfs4_handle_error(server, status); if (!status) goto retry; + BUG_ON(status < -1000 || status > 0); out: return ERR_PTR(status); } @@ -718,6 +726,56 @@ nfs4_do_close(struct inode *inode, struc return status; } +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + put_rpccred(cred); + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + return 1; + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_put_open_state(state); + iput(inode); + return 0; +} + static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) @@ -808,28 +866,39 @@ nfs4_proc_setattr(struct dentry *dentry, struct inode * inode = dentry->d_inode; int size_change = sattr->ia_valid & ATTR_SIZE; struct nfs4_state *state = NULL; - int status; + int need_iput = 0; + int status; fattr->valid = 0; if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dentry->d_parent->d_inode, + state = nfs4_find_state_bypid(inode, current->pid); + + if (!state) { + struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + state = nfs4_do_open(dentry->d_parent->d_inode, &dentry->d_name, FMODE_WRITE, NULL, cred); - put_rpccred(cred); + put_rpccred(cred); + need_iput = 1; + } if (IS_ERR(state)) return PTR_ERR(state); if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n"); - nfs4_put_open_state(state); - return -EIO; + printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); + status = -EIO; + goto out; } } status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); - if (state) +out: + if (state) { + inode = state->inode; nfs4_put_open_state(state); + if (need_iput) + iput(inode); + } return status; } @@ -1085,18 +1154,18 @@ nfs4_proc_create(struct inode *dir, stru state = nfs4_do_open(dir, name, flags, sattr, cred); put_rpccred(cred); if (!IS_ERR(state)) { - inode = igrab(state->inode); + inode = state->inode; if (flags & O_EXCL) { struct nfs_fattr fattr; int status; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(inode), sattr, state); if (status != 0) { + nfs4_put_open_state(state); iput(inode); inode = ERR_PTR(status); } } - nfs4_put_open_state(state); } else inode = (struct inode *)state; return inode; @@ -1672,43 +1741,28 @@ static int nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct inode *dir = dentry->d_parent->d_inode; - struct rpc_cred *cred; struct nfs4_state *state; - int flags = filp->f_flags; - int status = 0; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, (int)dentry->d_name.len, dentry->d_name.name); - if ((flags + 1) & O_ACCMODE) - flags++; - - lock_kernel(); -/* -* We have already opened the file "O_EXCL" in nfs4_proc_create!! -* This ugliness will go away with lookup-intent... -*/ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, flags, NULL, cred); - if (IS_ERR(state)) { - status = PTR_ERR(state); - state = NULL; - } else if (filp->f_mode & FMODE_WRITE) - nfs_set_mmcred(inode, cred); - if (inode != filp->f_dentry->d_inode) { + /* Find our open stateid */ + state = nfs4_find_state_bypid(inode, current->pid); + if (state == NULL) { printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - status = -EIO; /* ERACE actually */ - nfs4_put_open_state(state); - state = NULL; + return -EIO; /* ERACE actually */ + } + nfs4_put_open_state(state); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_set_mmcred(inode, state->owner->so_cred); + unlock_kernel(); } filp->private_data = state; - put_rpccred(cred); - unlock_kernel(); - return status; + return 0; } /* @@ -1922,6 +1976,8 @@ nfs4_proc_setclientid_confirm(struct nfs struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, diff -puN fs/nfs/nfs4state.c~nfs-27-atomic_open fs/nfs/nfs4state.c --- 25/fs/nfs/nfs4state.c~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/fs/nfs/nfs4state.c 2004-01-14 02:10:00.000000000 -0800 @@ -349,7 +349,6 @@ nfs4_get_open_state(struct inode *inode, atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = inode; - atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); @@ -384,7 +383,6 @@ nfs4_put_open_state(struct nfs4_state *s } while (!status); } up(&owner->so_sema); - iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } diff -puN fs/nfs/proc.c~nfs-27-atomic_open fs/nfs/proc.c --- 25/fs/nfs/proc.c~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/fs/nfs/proc.c 2004-01-14 02:10:00.000000000 -0800 @@ -656,6 +656,8 @@ nfs_request_compatible(struct nfs_page * struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff -puN include/linux/nfs_fs.h~nfs-27-atomic_open include/linux/nfs_fs.h --- 25/include/linux/nfs_fs.h~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/include/linux/nfs_fs.h 2004-01-14 02:10:00.000000000 -0800 @@ -558,6 +558,9 @@ struct nfs4_state { }; +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); @@ -566,6 +569,8 @@ extern int nfs4_proc_async_renew(struct extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); @@ -581,6 +586,7 @@ extern struct nfs4_state_owner * nfs4_ge extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); +extern struct nfs4_state *nfs4_find_state_bypid(struct inode *, pid_t); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); diff -puN include/linux/nfs_xdr.h~nfs-27-atomic_open include/linux/nfs_xdr.h --- 25/include/linux/nfs_xdr.h~nfs-27-atomic_open 2004-01-14 02:10:00.000000000 -0800 +++ 25-akpm/include/linux/nfs_xdr.h 2004-01-14 02:10:00.000000000 -0800 @@ -637,6 +637,8 @@ struct nfs_page; */ struct nfs_rpc_ops { int version; /* Protocol version */ + struct dentry_operations *dentry_ops; + struct inode_operations *dir_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); _