From: Trond Myklebust NFSv4: Add support for POSIX file locking. --- fs/nfs/file.c | 23 +-- fs/nfs/nfs3proc.c | 8 + fs/nfs/nfs4proc.c | 290 +++++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs4state.c | 174 ++++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 309 ++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/proc.c | 8 + include/linux/nfs4.h | 3 include/linux/nfs_fs.h | 37 +++++ include/linux/nfs_page.h | 1 include/linux/nfs_xdr.h | 65 +++++++++ 10 files changed, 878 insertions(+), 40 deletions(-) diff -puN fs/nfs/file.c~nfs-30-lock fs/nfs/file.c --- 25/fs/nfs/file.c~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/fs/nfs/file.c 2004-01-14 02:10:06.000000000 -0800 @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -278,21 +277,17 @@ nfs_lock(struct file *filp, int cmd, str if (!inode) return -EINVAL; - /* This will be in a forthcoming patch. */ - if (NFS_PROTO(inode)->version == 4) { - printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n"); - return -EIO; - } - /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + if (NFS_PROTO(inode)->version != 4) { + /* Fake OK code if mounted without NLM support */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { + if (IS_GETLK(cmd)) + status = LOCK_USE_CLNT; + goto out_ok; + } } /* @@ -302,7 +297,7 @@ nfs_lock(struct file *filp, int cmd, str * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || (fl->fl_flags & FL_POSIX) != FL_POSIX) + if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) return -ENOLCK; /* @@ -322,7 +317,7 @@ nfs_lock(struct file *filp, int cmd, str return status; lock_kernel(); - status = nlmclnt_proc(inode, cmd, fl); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); unlock_kernel(); if (status < 0) return status; diff -puN fs/nfs/nfs3proc.c~nfs-30-lock fs/nfs/nfs3proc.c --- 25/fs/nfs/nfs3proc.c~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/fs/nfs/nfs3proc.c 2004-01-14 02:10:06.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -896,6 +897,12 @@ nfs3_request_compatible(struct nfs_page return 1; } +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -931,4 +938,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_release = nfs_release, .request_init = nfs3_request_init, .request_compatible = nfs3_request_compatible, + .lock = nfs3_proc_lock, }; diff -puN fs/nfs/nfs4proc.c~nfs-30-lock fs/nfs/nfs4proc.c --- 25/fs/nfs/nfs4proc.c~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/fs/nfs/nfs4proc.c 2004-01-14 02:10:06.000000000 -0800 @@ -598,9 +598,7 @@ retry: .fh = &o_res.fh, .seqid = sp->so_seqid, }; - struct nfs_open_confirmres oc_res = { - .status = 0, - }; + struct nfs_open_confirmres oc_res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], .rpc_argp = &oc_arg, @@ -692,7 +690,7 @@ retry: fattr->valid = 0; if (state) - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + nfs4_copy_stateid(&arg.stateid, state, 0); else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); @@ -724,9 +722,7 @@ nfs4_do_close(struct inode *inode, struc struct nfs_closeargs arg = { .fh = NFS_FH(inode), }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &arg, @@ -758,9 +754,7 @@ nfs4_do_downgrade(struct inode *inode, s .seqid = sp->so_seqid, .share_access = mode, }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], .rpc_argp = &arg, @@ -1085,7 +1079,7 @@ nfs4_proc_read(struct nfs_read_data *rda if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&rdata->args.stateid, &state->stateid, sizeof(rdata->args.stateid)); + nfs4_copy_stateid(&rdata->args.stateid, state, rdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&rdata->args.stateid, &zero_stateid, sizeof(rdata->args.stateid)); @@ -1127,7 +1121,7 @@ nfs4_proc_write(struct nfs_write_data *w if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&wdata->args.stateid, &state->stateid, sizeof(wdata->args.stateid)); + nfs4_copy_stateid(&wdata->args.stateid, state, wdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&wdata->args.stateid, &zero_stateid, sizeof(wdata->args.stateid)); @@ -1163,7 +1157,7 @@ nfs4_proc_commit(struct nfs_write_data * if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&cdata->args.stateid, &state->stateid, sizeof(cdata->args.stateid)); + nfs4_copy_stateid(&cdata->args.stateid, state, cdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&cdata->args.stateid, &zero_stateid, sizeof(cdata->args.stateid)); @@ -1513,7 +1507,7 @@ nfs4_restart_read(struct rpc_task *task) rpc_restart_call(task); req = nfs_list_entry(data->pages.next); if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); } @@ -1564,8 +1558,9 @@ nfs4_proc_read_setup(struct nfs_read_dat data->res.eof = 0; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1605,7 +1600,7 @@ nfs4_restart_write(struct rpc_task *task rpc_restart_call(task); req = nfs_list_entry(data->pages.next); if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); } @@ -1661,8 +1656,9 @@ nfs4_proc_write_setup(struct nfs_write_d data->res.verf = &data->verf; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1846,6 +1842,7 @@ nfs4_request_init(struct nfs_page *req, state = (struct nfs4_state *)filp->private_data; req->wb_state = state; req->wb_cred = get_rpccred(state->owner->so_cred); + req->wb_lockowner = current->files; } static int @@ -1975,6 +1972,8 @@ nfs4_request_compatible(struct nfs_page state = (struct nfs4_state *)filp->private_data; if (req->wb_state != state) return 0; + if (req->wb_lockowner != current->files) + return 0; cred = state->owner->so_cred; if (req->wb_cred != cred) return 0; @@ -2032,6 +2031,262 @@ nfs4_proc_setclientid_confirm(struct nfs return status; } +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static inline int +nfs4_lck_type(int cmd, struct file_lock *request) +{ + /* set lock type */ + switch (request->fl_type) { + case F_RDLCK: + return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; + case F_WRLCK: + return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; + case F_UNLCK: + return NFS4_WRITE_LT; + } + BUG(); +} + +static inline uint64_t +nfs4_lck_length(struct file_lock *request) +{ + if (request->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return request->fl_end - request->fl_start + 1; +} + +int +nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lowner nlo; + struct nfs4_lock_state *lsp; + int status; + + nlo.clientid = clp->cl_clientid; + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp) + nlo.id = lsp->ls_id; + else { + spin_lock(&clp->cl_lock); + nlo.id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + } + arg.u.lockt = &nlo; + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + request->fl_type = F_UNLCK; + } else if (status == -NFS4ERR_DENIED) { + int64_t len, start, end; + start = res.u.denied.offset; + len = res.u.denied.length; + end = start + len - 1; + if (end < 0 || len == 0) + request->fl_end = OFFSET_MAX; + else + request->fl_end = (loff_t)end; + request->fl_start = (loff_t)start; + request->fl_type = F_WRLCK; + if (res.u.denied.type & 1) + request->fl_type = F_RDLCK; + request->fl_pid = 0; + status = 0; + } + if (lsp) + nfs4_put_lock_state(lsp); + up(&state->lock_sema); + return status; +} + +int +nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + struct nfs_locku_opargs luargs; + int status = 0; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_lock_seqid(status, lsp); + + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, + sizeof(lsp->ls_stateid)); + nfs4_notify_unlck(inode, request, lsp); + } + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_lock_state *lsp; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lock_opargs largs = { + .new_lock_owner = 0, + }; + int status; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp == NULL) { + struct nfs4_state_owner *owner = state->owner; + struct nfs_open_to_lock otl = { + .lock_owner.clientid = server->nfs4_state->cl_clientid, + }; + status = -ENOMEM; + lsp = nfs4_alloc_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + otl.lock_seqid = lsp->ls_seqid; + otl.lock_owner.id = lsp->ls_id; + memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); + largs.u.open_lock = &otl; + largs.new_lock_owner = 1; + arg.u.lock = &largs; + down(&owner->so_sema); + otl.open_seqid = owner->so_seqid; + status = rpc_call_sync(server->client, &msg, 0); + /* increment open_owner seqid on success, and + * seqid mutating errors */ + nfs4_increment_seqid(status, owner); + up(&owner->so_sema); + } else { + struct nfs_exist_lock el = { + .seqid = lsp->ls_seqid, + }; + memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); + largs.u.exist_lock = ⪙ + largs.new_lock_owner = 0; + arg.u.lock = &largs; + status = rpc_call_sync(server->client, &msg, 0); + } + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); + /* save the returned stateid. */ + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); + nfs4_notify_setlk(inode, request, lsp); + } else if (status == -NFS4ERR_DENIED) + status = -EAGAIN; + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + state = (struct nfs4_state *)filp->private_data; + BUG_ON(!state); + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) + return nfs4_proc_getlk(state, F_GETLK, request); + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) + return nfs4_proc_unlck(state, cmd, request); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + + return status; +} + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, @@ -2067,6 +2322,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_release = nfs4_proc_file_release, .request_init = nfs4_request_init, .request_compatible = nfs4_request_compatible, + .lock = nfs4_proc_lock, }; /* diff -puN fs/nfs/nfs4state.c~nfs-30-lock fs/nfs/nfs4state.c --- 25/fs/nfs/nfs4state.c~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/fs/nfs/nfs4state.c 2004-01-14 02:10:06.000000000 -0800 @@ -43,6 +43,7 @@ #include #include #include +#include #define OPENOWNER_POOL_SIZE 8 @@ -168,7 +169,7 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } -static inline u32 +u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { return clp->cl_lockowner_id ++; @@ -304,8 +305,12 @@ nfs4_alloc_open_state(void) state->state = 0; state->nreaders = 0; state->nwriters = 0; + state->flags = 0; memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + init_MUTEX(&state->lock_sema); + rwlock_init(&state->state_lock); return state; } @@ -453,7 +458,7 @@ nfs4_close_state(struct nfs4_state *stat list_del_init(&state->inode_states); spin_unlock(&inode->i_lock); do { - newstate = 0; + newstate = 0; if (state->state == 0) break; if (state->nreaders) @@ -479,6 +484,171 @@ nfs4_close_state(struct nfs4_state *stat } /* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +struct nfs4_lock_state * +nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + read_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + read_unlock(&state->state_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema + */ +struct nfs4_lock_state * +nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs4_client *clp = state->owner->so_client; + + lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + lsp->ls_seqid = 0; /* arbitrary */ + lsp->ls_id = -1; + memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + atomic_set(&lsp->ls_count, 1); + lsp->ls_owner = fl_owner; + lsp->ls_parent = state; + INIT_LIST_HEAD(&lsp->ls_locks); + spin_lock(&clp->cl_lock); + lsp->ls_id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + return lsp; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + struct nfs4_lock_state *lsp; + + lsp = nfs4_find_lock_state(state, fl_owner); + if (lsp) { + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_put_lock_state(lsp); + return; + } + } + memcpy(dst, &state->stateid, sizeof(*dst)); +} + +/* +* Called with state->lock_sema held. +*/ +void +nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; +} + +/* +* Check to see if the request lock (type FL_UNLK) effects the fl lock. +* +* fl and request must have the same posix owner +* +* return: +* 0 -> fl not effected by request +* 1 -> fl consumed by request +*/ + +static int +nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +{ + if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) + return 1; + return 0; +} + +/* + * Post an initialized lock_state on the state->lock_states list. + */ +void +nfs4_notify_setlk(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + + if (!list_empty(&lsp->ls_locks)) + return; + write_lock(&state->state_lock); + list_add(&lsp->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * to decide to 'reap' lock state: + * 1) search i_flock for file_locks with fl.lock_state = to ls. + * 2) determine if unlock will consume found lock. + * if so, reap + * + * else, don't reap. + * + */ +void +nfs4_notify_unlck(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + struct file_lock *fl; + + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != lsp->ls_owner) + continue; + /* Exit if we find at least one lock which is not consumed */ + if (nfs4_check_unlock(fl,request) == 0) + return; + } + + write_lock(&state->state_lock); + list_del_init(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void +nfs4_put_lock_state(struct nfs4_lock_state *lsp) +{ + if (!atomic_dec_and_test(&lsp->ls_count)) + return; + if (!list_empty(&lsp->ls_locks)) + return; + kfree(lsp); +} + +/* * Called with sp->so_sema held. * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or diff -puN fs/nfs/nfs4xdr.c~nfs-30-lock fs/nfs/nfs4xdr.c --- 25/fs/nfs/nfs4xdr.c~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/fs/nfs/nfs4xdr.c 2004-01-14 02:10:06.000000000 -0800 @@ -66,6 +66,10 @@ static int nfs_stat_to_errno(int); #define NFS4_MAXTAGLEN 0 #endif +/* lock,open owner id: + * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + */ +#define owner_id_maxsz 1 + 1 #define compound_encode_hdr_maxsz 3 + (NFS4_MAXTAGLEN >> 2) #define compound_decode_hdr_maxsz 2 + (NFS4_MAXTAGLEN >> 2) #define op_encode_hdr_maxsz 1 @@ -222,6 +226,36 @@ static int nfs_stat_to_errno(int); decode_setclientid_confirm_maxsz + \ decode_putrootfh_maxsz + \ decode_fsinfo_maxsz +#define NFS4_enc_lock_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 2 + 2 + \ + 1 + 4 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lock_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + \ + 2 + 2 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_enc_lockt_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 2 + 2 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lockt_sz NFS4_dec_lock_sz +#define NFS4_enc_locku_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 4 + 2 + 2 +#define NFS4_dec_locku_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 4 + static struct { @@ -596,6 +630,80 @@ encode_link(struct xdr_stream *xdr, stru return 0; } +/* + * opcode,type,reclaim,offset,length,new_lock_owner = 32 + * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 + */ +static int +encode_lock(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lock_opargs *opargs = arg->u.lock; + + RESERVE_SPACE(32); + WRITE32(OP_LOCK); + WRITE32(arg->type); + WRITE32(opargs->reclaim); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE32(opargs->new_lock_owner); + if (opargs->new_lock_owner){ + struct nfs_open_to_lock *ol = opargs->u.open_lock; + + RESERVE_SPACE(40); + WRITE32(ol->open_seqid); + WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); + WRITE32(ol->lock_seqid); + WRITE64(ol->lock_owner.clientid); + WRITE32(4); + WRITE32(ol->lock_owner.id); + } + else { + struct nfs_exist_lock *el = opargs->u.exist_lock; + + RESERVE_SPACE(20); + WRITEMEM(&el->stateid, sizeof(el->stateid)); + WRITE32(el->seqid); + } + + return 0; +} + +static int +encode_lockt(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lowner *opargs = arg->u.lockt; + + RESERVE_SPACE(40); + WRITE32(OP_LOCKT); + WRITE32(arg->type); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE64(opargs->clientid); + WRITE32(4); + WRITE32(opargs->id); + + return 0; +} + +static int +encode_locku(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_locku_opargs *opargs = arg->u.locku; + + RESERVE_SPACE(44); + WRITE32(OP_LOCKU); + WRITE32(arg->type); + WRITE32(opargs->seqid); + WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITE64(arg->offset); + WRITE64(arg->length); + + return 0; +} + static int encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup) { @@ -1176,6 +1284,72 @@ out: } /* + * Encode a LOCK request + */ +static int +nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lock(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKT request + */ +static int +nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lockt(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKU request + */ +static int +nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_locku(&xdr, args); +out: + return status; +} + +/* * Encode a READ request */ static int @@ -1997,6 +2171,66 @@ decode_link(struct xdr_stream *xdr, stru return decode_change_info(xdr, link->ln_cinfo); } +/* + * We create the owner, so we know a proper owner.id length is 4. + */ +static int +decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +{ + uint32_t *p; + uint32_t namelen; + + READ_BUF(32); + READ64(denied->offset); + READ64(denied->length); + READ32(denied->type); + READ64(denied->owner.clientid); + READ32(namelen); + READ_BUF(namelen); + if (namelen == 4) + READ32(denied->owner.id); + return -NFS4ERR_DENIED; +} + +static int +decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCK); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } else if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + int status; + status = decode_op_hdr(xdr, OP_LOCKT); + if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCKU); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } + return status; +} + static int decode_lookup(struct xdr_stream *xdr) { @@ -2037,10 +2271,11 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) { uint32_t *p; + int status; - res->status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); - if (res->status) - return res->status; + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status) + return status; READ_BUF(sizeof(res->stateid.data)); COPYMEM(res->stateid.data, sizeof(res->stateid.data)); return 0; @@ -2619,6 +2854,71 @@ out: return status; } +/* + * Decode LOCK response + */ +static int +nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lock(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKT response + */ +static int +nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lockt(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKU response + */ +static int +nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_locku(&xdr, res); +out: + return status; +} /* * Decode Read response @@ -2915,6 +3215,9 @@ struct rpc_procinfo nfs4_procedures[] = PROC(RENEW, enc_renew, dec_renew), PROC(SETCLIENTID, enc_setclientid, dec_setclientid), PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), }; struct rpc_version nfs_version4 = { diff -puN fs/nfs/proc.c~nfs-30-lock fs/nfs/proc.c --- 25/fs/nfs/proc.c~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/fs/nfs/proc.c 2004-01-14 02:10:06.000000000 -0800 @@ -42,6 +42,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -653,6 +654,12 @@ nfs_request_compatible(struct nfs_page * return 1; } +static int +nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ @@ -689,4 +696,5 @@ struct nfs_rpc_ops nfs_v2_clientops = { .file_release = nfs_release, .request_init = nfs_request_init, .request_compatible = nfs_request_compatible, + .lock = nfs_proc_lock, }; diff -puN include/linux/nfs4.h~nfs-30-lock include/linux/nfs4.h --- 25/include/linux/nfs4.h~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/include/linux/nfs4.h 2004-01-14 02:10:06.000000000 -0800 @@ -297,6 +297,9 @@ enum { NFSPROC4_CLNT_RENEW, NFSPROC4_CLNT_SETCLIENTID, NFSPROC4_CLNT_SETCLIENTID_CONFIRM, + NFSPROC4_CLNT_LOCK, + NFSPROC4_CLNT_LOCKT, + NFSPROC4_CLNT_LOCKU, }; #endif diff -puN include/linux/nfs_fs.h~nfs-30-lock include/linux/nfs_fs.h --- 25/include/linux/nfs_fs.h~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/include/linux/nfs_fs.h 2004-01-14 02:10:06.000000000 -0800 @@ -542,19 +542,43 @@ struct nfs4_state_owner { /* * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple. + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). * + * OPEN: * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, * we need to know how many files are open for reading or writing on a * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ + struct nfs4_state * ls_parent; /* Parent nfs4_state */ + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, +}; + struct nfs4_state { struct list_head open_states; /* List of states for the same state_owner */ struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ struct nfs4_state_owner *owner; /* Pointer to the open owner */ struct inode *inode; /* Pointer to the inode */ + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ + nfs4_stateid stateid; unsigned int nreaders; @@ -589,6 +613,8 @@ extern void init_nfsv4_state(struct nfs_ extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); @@ -598,6 +624,15 @@ extern struct nfs4_state *nfs4_find_stat extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + + struct nfs4_mount_data; #else diff -puN include/linux/nfs_page.h~nfs-30-lock include/linux/nfs_page.h --- 25/include/linux/nfs_page.h~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/include/linux/nfs_page.h 2004-01-14 02:10:06.000000000 -0800 @@ -26,6 +26,7 @@ struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ struct file *wb_file; + fl_owner_t wb_lockowner; struct inode *wb_inode; struct rpc_cred *wb_cred; struct nfs4_state *wb_state; diff -puN include/linux/nfs_xdr.h~nfs-30-lock include/linux/nfs_xdr.h --- 25/include/linux/nfs_xdr.h~nfs-30-lock 2004-01-14 02:10:06.000000000 -0800 +++ 25-akpm/include/linux/nfs_xdr.h 2004-01-14 02:10:06.000000000 -0800 @@ -109,7 +109,6 @@ struct nfs_openargs { }; struct nfs_openres { - __u32 status; nfs4_stateid stateid; struct nfs_fh fh; struct nfs4_change_info * cinfo; @@ -129,7 +128,6 @@ struct nfs_open_confirmargs { }; struct nfs_open_confirmres { - __u32 status; nfs4_stateid stateid; }; @@ -157,10 +155,68 @@ struct nfs_closeargs { }; struct nfs_closeres { - __u32 status; nfs4_stateid stateid; }; +/* + * * Arguments to the lock,lockt, and locku call. + * */ +struct nfs_lowner { + __u64 clientid; + u32 id; +}; + +struct nfs_open_to_lock { + __u32 open_seqid; + nfs4_stateid open_stateid; + __u32 lock_seqid; + struct nfs_lowner lock_owner; +}; + +struct nfs_exist_lock { + nfs4_stateid stateid; + __u32 seqid; +}; +struct nfs_lock_opargs { + __u32 reclaim; + __u32 new_lock_owner; + union { + struct nfs_open_to_lock *open_lock; + struct nfs_exist_lock *exist_lock; + } u; +}; + +struct nfs_locku_opargs { + __u32 seqid; + nfs4_stateid stateid; +}; + +struct nfs_lockargs { + struct nfs_fh * fh; + __u32 type; + __u64 offset; + __u64 length; + union { + struct nfs_lock_opargs *lock; /* LOCK */ + struct nfs_lowner *lockt; /* LOCKT */ + struct nfs_locku_opargs *locku; /* LOCKU */ + } u; +}; + +struct nfs_lock_denied { + __u64 offset; + __u64 length; + __u32 type; + struct nfs_lowner owner; +}; + +struct nfs_lockres { + union { + nfs4_stateid stateid;/* LOCK success, LOCKU */ + struct nfs_lock_denied denied; /* LOCK failed, LOCKT success */ + } u; + struct nfs_server * server; +}; /* * Arguments to the read call. @@ -605,6 +661,7 @@ struct nfs_read_data { struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ struct page *pagevec[NFS_READ_MAXIOV]; @@ -620,6 +677,7 @@ struct nfs_write_data { struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ @@ -686,6 +744,7 @@ struct nfs_rpc_ops { int (*file_release) (struct inode *, struct file *); void (*request_init)(struct nfs_page *, struct file *); int (*request_compatible)(struct nfs_page *, struct file *, struct page *); + int (*lock)(struct file *, int, struct file_lock *); }; /* _