aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 10:57:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 10:57:06 -0700
commit8bda95577627dc0633c48d581ea3605c27efe829 (patch)
tree6fbf3fb021e9ee67d4f14eb19f3ff2059912e506
parent4529fb1546b9cd3f5dbd8a36595aa4159331c963 (diff)
parent0bcc7ca40bd823193224e9f38bafbd8325aaf566 (diff)
downloadlinux-8bda95577627dc0633c48d581ea3605c27efe829.tar.gz
Merge tag 'nfsd-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: "New features: - Support for server-side disconnect injection via debugfs - Protocol definitions for new RPC_AUTH_TLS authentication flavor Performance improvements: - Reduce page allocator traffic in the NFSD splice read actor - Reduce CPU utilization in svcrdma's Send completion handler Notable bug fixes: - Stabilize lockd operation when re-exporting NFS mounts - Fix the use of %.*s in NFSD tracepoints - Fix /proc/sys/fs/nfs/nsm_use_hostnames" * tag 'nfsd-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (31 commits) nfsd: fix crash on LOCKT on reexported NFSv3 nfs: don't allow reexport reclaims lockd: don't attempt blocking locks on nfs reexports nfs: don't atempt blocking locks on nfs reexports Keep read and write fds with each nlm_file lockd: update nlm_lookup_file reexport comment nlm: minor refactoring nlm: minor nlm_lookup_file argument change lockd: lockd server-side shouldn't set fl_ops SUNRPC: Add documentation for the fail_sunrpc/ directory SUNRPC: Server-side disconnect injection SUNRPC: Move client-side disconnect injection SUNRPC: Add a /sys/kernel/debug/fail_sunrpc/ directory svcrdma: xpt_bc_xprt is already clear in __svc_rdma_free() nfsd4: Fix forced-expiry locking rpc: fix gss_svc_init cleanup on failure SUNRPC: Add RPC_AUTH_TLS protocol numbers lockd: change the proc_handler for nsm_use_hostnames sysctl: introduce new proc handler proc_dobool SUNRPC: Fix a NULL pointer deref in trace_svc_stats_latency() ...
-rw-r--r--Documentation/fault-injection/fault-injection.rst18
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c7
-rw-r--r--fs/lockd/svclock.c82
-rw-r--r--fs/lockd/svcproc.c6
-rw-r--r--fs/lockd/svcsubs.c114
-rw-r--r--fs/nfs/export.c2
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfsd/lockd.c8
-rw-r--r--fs/nfsd/nfs4state.c20
-rw-r--r--fs/nfsd/nfsproc.c1
-rw-r--r--fs/nfsd/trace.h17
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--include/linux/errno.h1
-rw-r--r--include/linux/exportfs.h2
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/lockd/bind.h3
-rw-r--r--include/linux/lockd/lockd.h11
-rw-r--r--include/linux/sunrpc/msg_prot.h1
-rw-r--r--include/linux/sunrpc/svc.h5
-rw-r--r--include/linux/sunrpc/svc_rdma.h7
-rw-r--r--include/linux/sunrpc/xdr.h1
-rw-r--r--include/linux/sunrpc/xprt.h18
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/trace/events/sunrpc.h8
-rw-r--r--include/trace/trace_events.h22
-rw-r--r--include/uapi/linux/nfsd/nfsfh.h1
-rw-r--r--kernel/sysctl.c42
-rw-r--r--lib/Kconfig.debug7
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/debugfs.c73
-rw-r--r--net/sunrpc/fail.h25
-rw-r--r--net/sunrpc/svc.c44
-rw-r--r--net/sunrpc/svc_xprt.c3
-rw-r--r--net/sunrpc/xprt.c14
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c56
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
-rw-r--r--samples/trace_events/trace-events-sample.h27
39 files changed, 489 insertions, 240 deletions
diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index f47d05ed0d948e..4a25c5eb6f0726 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -24,6 +24,10 @@ Available fault injection capabilities
injects futex deadlock and uaddr fault errors.
+- fail_sunrpc
+
+ injects kernel RPC client and server failures.
+
- fail_make_request
injects disk IO errors on devices permitted by setting
@@ -151,6 +155,20 @@ configuration of fault-injection capabilities.
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.
+- /sys/kernel/debug/fail_sunrpc/ignore-client-disconnect:
+
+ Format: { 'Y' | 'N' }
+
+ default is 'N', setting it to 'Y' will disable disconnect
+ injection on the RPC client.
+
+- /sys/kernel/debug/fail_sunrpc/ignore-server-disconnect:
+
+ Format: { 'Y' | 'N' }
+
+ default is 'N', setting it to 'Y' will disable disconnect
+ injection on the RPC server.
+
- /sys/kernel/debug/fail_function/inject:
Format: { 'function-name' | '!function-name' | '' }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2de048f80eb8c3..0ab9756ed2359f 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -584,7 +584,7 @@ static struct ctl_table nlm_sysctls[] = {
.data = &nsm_use_hostnames,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dobool,
},
{
.procname = "nsm_local_state",
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4c10fb5138f101..e10ae2c41279e4 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -40,12 +40,15 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
+ int mode = lock_to_openmode(&lock->fl);
+
+ error = nlm_lookup_file(rqstp, &file, lock);
+ if (error)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_file = file->f_file;
+ lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 61d3cc2283dc84..e9b85d8fd5fe75 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -31,6 +31,7 @@
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
+#include <linux/exportfs.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
@@ -395,28 +396,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock)
nlmsvc_put_lockowner(lock->fl.fl_owner);
}
-static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
-{
- struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
- new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
-}
-
-static void nlmsvc_locks_release_private(struct file_lock *fl)
-{
- nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
-}
-
-static const struct file_lock_operations nlmsvc_lock_ops = {
- .fl_copy_lock = nlmsvc_locks_copy_lock,
- .fl_release_private = nlmsvc_locks_release_private,
-};
-
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
fl->fl_owner = nlmsvc_find_lockowner(host, pid);
- if (fl->fl_owner != NULL)
- fl->fl_ops = &nlmsvc_lock_ops;
}
/*
@@ -488,17 +471,24 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_cookie *cookie, int reclaim)
{
struct nlm_block *block = NULL;
+ struct inode *inode = nlmsvc_file_inode(file);
int error;
+ int mode;
+ int async_block = 0;
__be32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ inode->i_sb->s_id, inode->i_ino,
lock->fl.fl_type, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
wait);
+ if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
+ async_block = wait;
+ wait = 0;
+ }
+
/* Lock file against concurrent access */
mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting)
@@ -542,7 +532,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
if (!wait)
lock->fl.fl_flags &= ~FL_SLEEP;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
@@ -558,7 +549,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
if (wait)
break;
- ret = nlm_lck_denied;
+ ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
goto out;
case FILE_LOCK_DEFERRED:
if (wait)
@@ -595,12 +586,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *conflock, struct nlm_cookie *cookie)
{
int error;
+ int mode;
__be32 ret;
struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -613,7 +605,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
/* If there's a conflicting lock, remember to clean up the test lock */
test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
- error = vfs_test_lock(file->f_file, &lock->fl);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_test_lock(file->f_file[mode], &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
if (error == FILE_LOCK_DEFERRED)
@@ -634,7 +627,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
+ conflock->svid = lock->fl.fl_pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
@@ -659,11 +652,11 @@ out:
__be32
nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
{
- int error;
+ int error = 0;
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -672,7 +665,12 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
nlmsvc_cancel_blocked(net, file, lock);
lock->fl.fl_type = F_UNLCK;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ if (file->f_file[O_RDONLY])
+ error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
+ &lock->fl, NULL);
+ if (file->f_file[O_WRONLY])
+ error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
+ &lock->fl, NULL);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}
@@ -689,10 +687,11 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
{
struct nlm_block *block;
int status = 0;
+ int mode;
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -704,7 +703,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
block = nlmsvc_lookup_block(file, lock);
mutex_unlock(&file->f_mutex);
if (block != NULL) {
- vfs_cancel_lock(block->b_file->f_file,
+ mode = lock_to_openmode(&lock->fl);
+ vfs_cancel_lock(block->b_file->f_file[mode],
&block->b_call->a_args.lock.fl);
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
@@ -788,9 +788,21 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
+static fl_owner_t nlmsvc_get_owner(fl_owner_t owner)
+{
+ return nlmsvc_get_lockowner(owner);
+}
+
+static void nlmsvc_put_owner(fl_owner_t owner)
+{
+ nlmsvc_put_lockowner(owner);
+}
+
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
+ .lm_get_owner = nlmsvc_get_owner,
+ .lm_put_owner = nlmsvc_put_owner,
};
/*
@@ -809,6 +821,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
+ int mode;
int error;
loff_t fl_start, fl_end;
@@ -834,7 +847,8 @@ nlmsvc_grant_blocked(struct nlm_block *block)
lock->fl.fl_flags |= FL_SLEEP;
fl_start = lock->fl.fl_start;
fl_end = lock->fl.fl_end;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
lock->fl.fl_start = fl_start;
lock->fl.fl_end = fl_end;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 4ae4b63b539253..99696d3f6dd665 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -55,6 +55,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
struct nlm_host *host = NULL;
struct nlm_file *file = NULL;
struct nlm_lock *lock = &argp->lock;
+ int mode;
__be32 error = 0;
/* nfsd callbacks must have been installed for this procedure */
@@ -69,13 +70,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
+ error = cast_status(nlm_lookup_file(rqstp, &file, lock));
if (error != 0)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_file = file->f_file;
+ mode = lock_to_openmode(&lock->fl);
+ lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 028fc152da22fc..cb3a7512c33ec8 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
{
- struct inode *inode = locks_inode(file->f_file);
+ struct inode *inode = nlmsvc_file_inode(file);
dprintk("lockd: %s %s/%ld\n",
msg, inode->i_sb->s_id, inode->i_ino);
@@ -71,56 +71,75 @@ static inline unsigned int file_hash(struct nfs_fh *f)
return tmp & (FILE_NRHASH - 1);
}
+int lock_to_openmode(struct file_lock *lock)
+{
+ return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY;
+}
+
+/*
+ * Open the file. Note that if we're reexporting, for example,
+ * this could block the lockd thread for a while.
+ *
+ * We have to make sure we have the right credential to open
+ * the file.
+ */
+static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
+ struct nlm_file *file, int mode)
+{
+ struct file **fp = &file->f_file[mode];
+ __be32 nfserr;
+
+ if (*fp)
+ return 0;
+ nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
+ if (nfserr)
+ dprintk("lockd: open failed (error %d)\n", nfserr);
+ return nfserr;
+}
+
/*
* Lookup file info. If it doesn't exist, create a file info struct
* and open a (VFS) file for the given inode.
- *
- * FIXME:
- * Note that we open the file O_RDONLY even when creating write locks.
- * This is not quite right, but for now, we assume the client performs
- * the proper R/W checking.
*/
__be32
nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
- struct nfs_fh *f)
+ struct nlm_lock *lock)
{
struct nlm_file *file;
unsigned int hash;
__be32 nfserr;
+ int mode;
- nlm_debug_print_fh("nlm_lookup_file", f);
+ nlm_debug_print_fh("nlm_lookup_file", &lock->fh);
- hash = file_hash(f);
+ hash = file_hash(&lock->fh);
+ mode = lock_to_openmode(&lock->fl);
/* Lock file table */
mutex_lock(&nlm_file_mutex);
hlist_for_each_entry(file, &nlm_files[hash], f_list)
- if (!nfs_compare_fh(&file->f_handle, f))
+ if (!nfs_compare_fh(&file->f_handle, &lock->fh)) {
+ mutex_lock(&file->f_mutex);
+ nfserr = nlm_do_fopen(rqstp, file, mode);
+ mutex_unlock(&file->f_mutex);
goto found;
-
- nlm_debug_print_fh("creating file for", f);
+ }
+ nlm_debug_print_fh("creating file for", &lock->fh);
nfserr = nlm_lck_denied_nolocks;
file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file)
- goto out_unlock;
+ goto out_free;
- memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
+ memcpy(&file->f_handle, &lock->fh, sizeof(struct nfs_fh));
mutex_init(&file->f_mutex);
INIT_HLIST_NODE(&file->f_list);
INIT_LIST_HEAD(&file->f_blocks);
- /* Open the file. Note that this must not sleep for too long, else
- * we would lock up lockd:-) So no NFS re-exports, folks.
- *
- * We have to make sure we have the right credential to open
- * the file.
- */
- if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
- dprintk("lockd: open failed (error %d)\n", nfserr);
- goto out_free;
- }
+ nfserr = nlm_do_fopen(rqstp, file, mode);
+ if (nfserr)
+ goto out_unlock;
hlist_add_head(&file->f_list, &nlm_files[hash]);
@@ -128,7 +147,6 @@ found:
dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
*result = file;
file->f_count++;
- nfserr = 0;
out_unlock:
mutex_unlock(&nlm_file_mutex);
@@ -148,13 +166,34 @@ nlm_delete_file(struct nlm_file *file)
nlm_debug_print_file("closing file", file);
if (!hlist_unhashed(&file->f_list)) {
hlist_del(&file->f_list);
- nlmsvc_ops->fclose(file->f_file);
+ if (file->f_file[O_RDONLY])
+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
+ if (file->f_file[O_WRONLY])
+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
kfree(file);
} else {
printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
}
}
+static int nlm_unlock_files(struct nlm_file *file)
+{
+ struct file_lock lock;
+ struct file *f;
+
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
+ for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+ if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+ pr_warn("lockd: unlock failure in %s:%d\n",
+ __FILE__, __LINE__);
+ return 1;
+ }
+ }
+ return 0;
+}
+
/*
* Loop over all locks on the given file and perform the specified
* action.
@@ -182,17 +221,10 @@ again:
lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
- struct file_lock lock = *fl;
spin_unlock(&flctx->flc_lock);
- lock.fl_type = F_UNLCK;
- lock.fl_start = 0;
- lock.fl_end = OFFSET_MAX;
- if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
- printk("lockd: unlock failure in %s:%d\n",
- __FILE__, __LINE__);
+ if (nlm_unlock_files(file))
return 1;
- }
goto again;
}
}
@@ -246,6 +278,15 @@ nlm_file_inuse(struct nlm_file *file)
return 0;
}
+static void nlm_close_files(struct nlm_file *file)
+{
+ struct file *f;
+
+ for (f = file->f_file[0]; f <= file->f_file[1]; f++)
+ if (f)
+ nlmsvc_ops->fclose(f);
+}
+
/*
* Loop over all files in the file table.
*/
@@ -276,7 +317,7 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
if (list_empty(&file->f_blocks) && !file->f_locks
&& !file->f_shares && !file->f_count) {
hlist_del(&file->f_list);
- nlmsvc_ops->fclose(file->f_file);
+ nlm_close_files(file);
kfree(file);
}
}
@@ -410,12 +451,13 @@ nlmsvc_invalidate_all(void)
nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
}
+
static int
nlmsvc_match_sb(void *datap, struct nlm_file *file)
{
struct super_block *sb = datap;
- return sb == locks_inode(file->f_file)->i_sb;
+ return sb == nlmsvc_file_inode(file)->i_sb;
}
/**
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 37a1a88df7717e..d772c20bbfd153 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -180,5 +180,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR,
+ EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 514be5d28d7059..aa353fd5824041 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
+ if (fl->fl_flags & FL_RECLAIM)
+ return -ENOGRACE;
+
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
is_local = 1;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 3f5b3d7b62b714..606fa155c28adf 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,9 +25,11 @@
* Note: we hold the dentry use count while the file is open.
*/
static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
+ int mode)
{
__be32 nfserr;
+ int access;
struct svc_fh fh;
/* must initialize before using! but maxsize doesn't matter */
@@ -36,7 +38,9 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
fh.fh_export = NULL;
- nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
+ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+ access |= NFSD_MAY_LOCK;
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
/* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8313e1dbb5dc49..42356416f0a0e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2687,9 +2687,9 @@ static void force_expire_client(struct nfs4_client *clp)
trace_nfsd_clid_admin_expired(&clp->cl_clientid);
- spin_lock(&clp->cl_lock);
+ spin_lock(&nn->client_lock);
clp->cl_time = 0;
- spin_unlock(&clp->cl_lock);
+ spin_unlock(&nn->client_lock);
wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
spin_lock(&nn->client_lock);
@@ -6821,6 +6821,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
+ struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -6842,6 +6843,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
+ sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -6887,10 +6889,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!locks_in_grace(net) && lock->lk_reclaim)
goto out;
+ if (lock->lk_reclaim)
+ fl_flags |= FL_RECLAIM;
+
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate))
+ if (nfsd4_has_session(cstate) &&
+ !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
@@ -6902,7 +6908,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate))
+ if (nfsd4_has_session(cstate) &&
+ !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
@@ -7022,8 +7029,7 @@ out:
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
- * vfs_test_lock. (Arguably perhaps test_lock should be done with an
- * inode operation.)
+ * vfs_test_lock.
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
@@ -7038,7 +7044,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
NFSD_MAY_READ));
if (err)
goto out;
+ lock->fl_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ lock->fl_file = NULL;
out:
fh_unlock(fhp);
nfsd_file_put(nf);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 60d7c59e7935cc..90fcd6178823b4 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -881,6 +881,7 @@ nfserrno (int errno)
{ nfserr_serverfault, -ENFILE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
};
int i;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index adaec43548d11f..538520957a8151 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -400,18 +400,16 @@ TRACE_EVENT(nfsd_dirent,
TP_STRUCT__entry(
__field(u32, fh_hash)
__field(u64, ino)
- __field(int, len)
- __dynamic_array(unsigned char, name, namlen)
+ __string_len(name, name, namlen)
),
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
- __entry->len = namlen;
- memcpy(__get_str(name), name, namlen);
+ __assign_str_len(name, name, namlen)
),
- TP_printk("fh_hash=0x%08x ino=%llu name=%.*s",
- __entry->fh_hash, __entry->ino,
- __entry->len, __get_str(name))
+ TP_printk("fh_hash=0x%08x ino=%llu name=%s",
+ __entry->fh_hash, __entry->ino, __get_str(name)
+ )
)
#include "state.h"
@@ -608,7 +606,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
__field(unsigned long, flavor)
__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
- __dynamic_array(char, name, clp->cl_name.len + 1)
+ __string_len(name, name, clp->cl_name.len)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -618,8 +616,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__entry->flavor = clp->cl_cred.cr_flavor;
memcpy(__entry->verifier, (void *)&clp->cl_verifier,
NFS4_VERIFIER_SIZE);
- memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len);
- __get_str(name)[clp->cl_name.len] = '\0';
+ __assign_str_len(name, clp->cl_name.data, clp->cl_name.len);
),
TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
__entry->addr, __get_str(name),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 92e77f92268aff..738d564ca4ce34 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -244,7 +244,6 @@ out_nfserr:
* returned. Otherwise the covered directory is returned.
* NOTE: this mountpoint crossing is not supported properly by all
* clients and is explicitly disallowed for NFSv3
- * NeilBrown <neilb@cse.unsw.edu.au>
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
@@ -826,26 +825,16 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page **pp = rqstp->rq_next_page;
struct page *page = buf->page;
- size_t size;
-
- size = sd->len;
if (rqstp->rq_res.page_len == 0) {
- get_page(page);
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
+ svc_rqst_replace_page(rqstp, page);
rqstp->rq_res.page_base = buf->offset;
- rqstp->rq_res.page_len = size;
} else if (page != pp[-1]) {
- get_page(page);
- if (*rqstp->rq_next_page)
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
- rqstp->rq_res.page_len += size;
- } else
- rqstp->rq_res.page_len += size;
+ svc_rqst_replace_page(rqstp, page);
+ }
+ rqstp->rq_res.page_len += sd->len;
- return size;
+ return sd->len;
}
static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
diff --git a/include/linux/errno.h b/include/linux/errno.h
index d73f597a248491..8b0c754bab0256 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -31,5 +31,6 @@
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
#define ERECALLCONFLICT 530 /* conflict with recalled state */
+#define ENOGRACE 531 /* NFS file lock reclaim refused */
#endif
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fe848901fcc3ac..3260fe71484626 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -221,6 +221,8 @@ struct export_operations {
#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
atomic attribute updates
*/
+#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
+ asychronous blocking locks */
unsigned long flags;
};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c2bcfb12fef9d..1c01f9f2b5742f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1037,6 +1037,7 @@ static inline struct file *get_file(struct file *f)
#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
#define FL_LAYOUT 2048 /* outstanding pNFS layout */
+#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 0520c0cd73f42e..3bc9f7410e2138 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -27,7 +27,8 @@ struct rpc_task;
struct nlmsvc_binding {
__be32 (*fopen)(struct svc_rqst *,
struct nfs_fh *,
- struct file **);
+ struct file **,
+ int mode);
void (*fclose)(struct file *);
};
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 666f5f310a0419..c4ae6506b8b360 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -10,6 +10,8 @@
#ifndef LINUX_LOCKD_LOCKD_H
#define LINUX_LOCKD_LOCKD_H
+/* XXX: a lot of this should really be under fs/lockd. */
+
#include <linux/in.h>
#include <linux/in6.h>
#include <net/ipv6.h>
@@ -154,7 +156,8 @@ struct nlm_rqst {
struct nlm_file {
struct hlist_node f_list; /* linked list */
struct nfs_fh f_handle; /* NFS file handle */
- struct file * f_file; /* VFS file pointer */
+ struct file * f_file[2]; /* VFS file pointers,
+ indexed by O_ flags */
struct nlm_share * f_shares; /* DOS shares */
struct list_head f_blocks; /* blocked locks */
unsigned int f_locks; /* guesstimate # of locks */
@@ -267,6 +270,7 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
/*
* Server-side lock handling
*/
+int lock_to_openmode(struct file_lock *);
__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *, int,
struct nlm_cookie *, int);
@@ -286,7 +290,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
* File handling for the server personality
*/
__be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
- struct nfs_fh *);
+ struct nlm_lock *);
void nlm_release_file(struct nlm_file *);
void nlmsvc_release_lockowner(struct nlm_lock *);
void nlmsvc_mark_resources(struct net *);
@@ -301,7 +305,8 @@ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
- return locks_inode(file->f_file);
+ return locks_inode(file->f_file[O_RDONLY] ?
+ file->f_file[O_RDONLY] : file->f_file[O_WRONLY]);
}
static inline int __nlm_privileged_request4(const struct sockaddr *sap)
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 938c2bf29db88a..02117ed0fa2ee5 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -20,6 +20,7 @@ enum rpc_auth_flavors {
RPC_AUTH_DES = 3,
RPC_AUTH_KRB = 4,
RPC_AUTH_GSS = 6,
+ RPC_AUTH_TLS = 7,
RPC_AUTH_MAXFLAVOR = 8,
/* pseudoflavors: */
RPC_AUTH_GSS_KRB5 = 390003,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e91d51ea028bb7..f0f846fa396e62 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -19,6 +19,7 @@
#include <linux/sunrpc/svcauth.h>
#include <linux/wait.h>
#include <linux/mm.h>
+#include <linux/pagevec.h>
/* statistics for svc_pool structures */
struct svc_pool_stats {
@@ -256,6 +257,7 @@ struct svc_rqst {
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
+ struct pagevec rq_pvec;
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
@@ -502,6 +504,8 @@ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool, int node);
+void svc_rqst_replace_page(struct svc_rqst *rqstp,
+ struct page *page);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
unsigned int svc_pool_map_get(void);
@@ -523,6 +527,7 @@ void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
+const char * svc_proc_name(const struct svc_rqst *rqstp);
int svc_encode_result_payload(struct svc_rqst *rqstp,
unsigned int offset,
unsigned int length);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3184465de3a0bd..24aa159d29a7f8 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -90,9 +90,9 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
spinlock_t sc_send_lock;
- struct list_head sc_send_ctxts;
+ struct llist_head sc_send_ctxts;
spinlock_t sc_rw_ctxt_lock;
- struct list_head sc_rw_ctxts;
+ struct llist_head sc_rw_ctxts;
u32 sc_pending_recvs;
u32 sc_recv_batch;
@@ -150,7 +150,7 @@ struct svc_rdma_recv_ctxt {
};
struct svc_rdma_send_ctxt {
- struct list_head sc_list;
+ struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
struct ib_send_wr sc_send_wr;
@@ -207,6 +207,7 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
struct svc_rdma_recv_ctxt *rctxt,
int status);
+extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a965cbc136addd..b519609af1d020 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -95,6 +95,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX)
#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT)
#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)
+#define rpc_auth_tls cpu_to_be32(RPC_AUTH_TLS)
#define rpc_call cpu_to_be32(RPC_CALL)
#define rpc_reply cpu_to_be32(RPC_REPLY)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c8c39f22d3b169..b15c1f07162d09 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -288,7 +288,6 @@ struct rpc_xprt {
const char *address_strings[RPC_DISPLAY_MAX];
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct dentry *debugfs; /* debugfs directory */
- atomic_t inject_disconnect;
#endif
struct rcu_head rcu;
const struct xprt_class *xprt_class;
@@ -502,21 +501,4 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
return test_and_set_bit(XPRT_BINDING, &xprt->state);
}
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-extern unsigned int rpc_inject_disconnect;
-static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
-{
- if (!rpc_inject_disconnect)
- return;
- if (atomic_dec_return(&xprt->inject_disconnect))
- return;
- atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
- xprt->ops->inject_disconnect(xprt);
-}
-#else
-static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
-{
-}
-#endif
-
#endif /* _LINUX_SUNRPC_XPRT_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d99ca99837debe..1fa2b69c6fc3d2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -48,6 +48,8 @@ typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dobool(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos);
int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 861f199896c6aa..d323f5a049c885 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1642,7 +1642,7 @@ TRACE_EVENT(svc_process,
__field(u32, vers)
__field(u32, proc)
__string(service, name)
- __string(procedure, rqst->rq_procinfo->pc_name)
+ __string(procedure, svc_proc_name(rqst))
__string(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)")
),
@@ -1652,7 +1652,7 @@ TRACE_EVENT(svc_process,
__entry->vers = rqst->rq_vers;
__entry->proc = rqst->rq_proc;
__assign_str(service, name);
- __assign_str(procedure, rqst->rq_procinfo->pc_name);
+ __assign_str(procedure, svc_proc_name(rqst));
__assign_str(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)");
),
@@ -1918,7 +1918,7 @@ TRACE_EVENT(svc_stats_latency,
TP_STRUCT__entry(
__field(u32, xid)
__field(unsigned long, execute)
- __string(procedure, rqst->rq_procinfo->pc_name)
+ __string(procedure, svc_proc_name(rqst))
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
@@ -1926,7 +1926,7 @@ TRACE_EVENT(svc_stats_latency,
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_stime));
- __assign_str(procedure, rqst->rq_procinfo->pc_name);
+ __assign_str(procedure, svc_proc_name(rqst));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index acc17194c16029..08810a4638805d 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -102,6 +102,9 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
@@ -197,6 +200,9 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
@@ -459,6 +465,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
@@ -507,6 +516,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
#define __string(item, src) __dynamic_array(char, item, \
strlen((src) ? (const char *)(src) : "(null)") + 1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
+
/*
* __bitmask_size_in_bytes_raw is the number of bytes needed to hold
* num_possible_cpus().
@@ -670,10 +682,20 @@ static inline notrace int trace_event_get_offsets_##call( \
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __assign_str
#define __assign_str(dst, src) \
strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
+#undef __assign_str_len
+#define __assign_str_len(dst, src, len) \
+ do { \
+ memcpy(__get_str(dst), (src), (len)); \
+ __get_str(dst)[len] = '\0'; \
+ } while(0)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
index 427294dd56a1b8..e29e8accc4f4d2 100644
--- a/include/uapi/linux/nfsd/nfsfh.h
+++ b/include/uapi/linux/nfsd/nfsfh.h
@@ -33,7 +33,6 @@ struct nfs_fhbase_old {
/*
* This is the new flexible, extensible style NFSv2/v3/v4 file handle.
- * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
*
* The file handle starts with a sequence of four-byte words.
* The first word contains a version number (1) and three descriptor bytes
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 272f4a272f8cf8..25e49b4d8049b8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -536,6 +536,21 @@ static void proc_put_char(void **buf, size_t *size, char c)
}
}
+static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ *(bool *)valp = *lvalp;
+ } else {
+ int val = *(bool *)valp;
+
+ *lvalp = (unsigned long)val;
+ *negp = false;
+ }
+ return 0;
+}
+
static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
@@ -799,6 +814,26 @@ static int do_proc_douintvec(struct ctl_table *table, int write,
}
/**
+ * proc_dobool - read/write a bool
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_dobool(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ do_proc_dobool_conv, NULL);
+}
+
+/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
@@ -1630,6 +1665,12 @@ int proc_dostring(struct ctl_table *table, int write,
return -ENOSYS;
}
+int proc_dobool(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -3425,6 +3466,7 @@ int __init sysctl_init(void)
* No sense putting this after each symbol definition, twice,
* exception granted :-)
*/
+EXPORT_SYMBOL(proc_dobool);
EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 73604bff3d2ce1..12b805dabbc993 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1945,6 +1945,13 @@ config FAIL_MMC_REQUEST
and to test how the mmc host driver handles retries from
the block device.
+config FAIL_SUNRPC
+ bool "Fault-injection capability for SunRPC"
+ depends on FAULT_INJECTION_DEBUG_FS && SUNRPC_DEBUG
+ help
+ Provide fault-injection capability for SunRPC and
+ its consumers.
+
config FAULT_INJECTION_STACKTRACE_FILTER
bool "stacktrace filter for fault-injection capabilities"
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a81be45f40d9f5..3d685fe328fad0 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1980,7 +1980,7 @@ gss_svc_init_net(struct net *net)
goto out2;
return 0;
out2:
- destroy_use_gss_proxy_proc_entry(net);
+ rsi_cache_destroy_net(net);
out1:
rsc_cache_destroy_net(net);
return rv;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 56029e3af6ff0b..827bf3a2817888 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -8,14 +8,14 @@
#include <linux/debugfs.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
+
#include "netns.h"
+#include "fail.h"
static struct dentry *topdir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
-unsigned int rpc_inject_disconnect;
-
static int
tasks_show(struct seq_file *f, void *v)
{
@@ -235,8 +235,6 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
/* make tasks file */
debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
&xprt_info_fops);
-
- atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
@@ -246,56 +244,30 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
-static int
-fault_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = kmalloc(128, GFP_KERNEL);
- if (!filp->private_data)
- return -ENOMEM;
- return 0;
-}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+struct fail_sunrpc_attr fail_sunrpc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+};
+EXPORT_SYMBOL_GPL(fail_sunrpc);
-static int
-fault_release(struct inode *inode, struct file *filp)
+static void fail_sunrpc_init(void)
{
- kfree(filp->private_data);
- return 0;
-}
+ struct dentry *dir;
-static ssize_t
-fault_disconnect_read(struct file *filp, char __user *user_buf,
- size_t len, loff_t *offset)
-{
- char *buffer = (char *)filp->private_data;
- size_t size;
+ dir = fault_create_debugfs_attr("fail_sunrpc", NULL,
+ &fail_sunrpc.attr);
- size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
- return simple_read_from_buffer(user_buf, len, offset, buffer, size);
-}
+ debugfs_create_bool("ignore-client-disconnect", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_client_disconnect);
-static ssize_t
-fault_disconnect_write(struct file *filp, const char __user *user_buf,
- size_t len, loff_t *offset)
+ debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_server_disconnect);
+}
+#else
+static void fail_sunrpc_init(void)
{
- char buffer[16];
-
- if (len >= sizeof(buffer))
- len = sizeof(buffer) - 1;
- if (copy_from_user(buffer, user_buf, len))
- return -EFAULT;
- buffer[len] = '\0';
- if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
- return -EINVAL;
- return len;
}
-
-static const struct file_operations fault_disconnect_fops = {
- .owner = THIS_MODULE,
- .open = fault_open,
- .read = fault_disconnect_read,
- .write = fault_disconnect_write,
- .release = fault_release,
-};
+#endif
void __exit
sunrpc_debugfs_exit(void)
@@ -309,16 +281,11 @@ sunrpc_debugfs_exit(void)
void __init
sunrpc_debugfs_init(void)
{
- struct dentry *rpc_fault_dir;
-
topdir = debugfs_create_dir("sunrpc", NULL);
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
- rpc_fault_dir = debugfs_create_dir("inject_fault", topdir);
-
- debugfs_create_file("disconnect", S_IFREG | 0400, rpc_fault_dir, NULL,
- &fault_disconnect_fops);
+ fail_sunrpc_init();
}
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
new file mode 100644
index 00000000000000..69dc30cc44b844
--- /dev/null
+++ b/net/sunrpc/fail.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Oracle. All rights reserved.
+ */
+
+#ifndef _NET_SUNRPC_FAIL_H_
+#define _NET_SUNRPC_FAIL_H_
+
+#include <linux/fault-inject.h>
+
+#if IS_ENABLED(CONFIG_FAULT_INJECTION)
+
+struct fail_sunrpc_attr {
+ struct fault_attr attr;
+
+ bool ignore_client_disconnect;
+
+ bool ignore_server_disconnect;
+};
+
+extern struct fail_sunrpc_attr fail_sunrpc;
+
+#endif /* CONFIG_FAULT_INJECTION */
+
+#endif /* _NET_SUNRPC_FAIL_H_ */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 0de918cb3d90da..bfcbaf7b3822fb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -31,6 +31,8 @@
#include <trace/events/sunrpc.h>
+#include "fail.h"
+
#define RPCDBG_FACILITY RPCDBG_SVCDSP
static void svc_unregister(const struct svc_serv *serv, struct net *net);
@@ -838,6 +840,27 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
}
EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+/**
+ * svc_rqst_replace_page - Replace one page in rq_pages[]
+ * @rqstp: svc_rqst with pages to replace
+ * @page: replacement page
+ *
+ * When replacing a page in rq_pages, batch the release of the
+ * replaced pages to avoid hammering the page allocator.
+ */
+void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
+{
+ if (*rqstp->rq_next_page) {
+ if (!pagevec_space(&rqstp->rq_pvec))
+ __pagevec_release(&rqstp->rq_pvec);
+ pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
+ }
+
+ get_page(page);
+ *(rqstp->rq_next_page++) = page;
+}
+EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
+
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
@@ -1503,6 +1526,12 @@ svc_process(struct svc_rqst *rqstp)
struct svc_serv *serv = rqstp->rq_server;
u32 dir;
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+ if (!fail_sunrpc.ignore_server_disconnect &&
+ should_fail(&fail_sunrpc.attr, 1))
+ svc_xprt_deferred_close(rqstp->rq_xprt);
+#endif
+
/*
* Setup response xdr_buf.
* Initially it has just one page
@@ -1630,6 +1659,21 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
EXPORT_SYMBOL_GPL(svc_max_payload);
/**
+ * svc_proc_name - Return RPC procedure name in string form
+ * @rqstp: svc_rqst to operate on
+ *
+ * Return value:
+ * Pointer to a NUL-terminated string
+ */
+const char *svc_proc_name(const struct svc_rqst *rqstp)
+{
+ if (rqstp && rqstp->rq_procinfo)
+ return rqstp->rq_procinfo->pc_name;
+ return "unknown";
+}
+
+
+/**
* svc_encode_result_payload - mark a range of bytes as a result payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in rqstp->rq_res
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dbb41821b1b859..e1153cba9cc606 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -539,6 +539,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
+ pagevec_release(&rqstp->rq_pvec);
svc_free_res_pages(rqstp);
rqstp->rq_res.page_len = 0;
rqstp->rq_res.page_base = 0;
@@ -664,6 +665,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
struct xdr_buf *arg = &rqstp->rq_arg;
unsigned long pages, filled;
+ pagevec_init(&rqstp->rq_pvec);
+
pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
if (pages > RPCSVC_MAXPAGES) {
pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index fb6db09725c76f..05abe344a269b7 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -56,6 +56,7 @@
#include "sunrpc.h"
#include "sysfs.h"
+#include "fail.h"
/*
* Local variables
@@ -855,6 +856,19 @@ xprt_init_autodisconnect(struct timer_list *t)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+ if (!fail_sunrpc.ignore_client_disconnect &&
+ should_fail(&fail_sunrpc.attr, 1))
+ xprt->ops->inject_disconnect(xprt);
+}
+#else
+static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+}
+#endif
+
bool xprt_lock_connect(struct rpc_xprt *xprt,
struct rpc_task *task,
void *cookie)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 1e651447dc4ebf..e27433f08ca7f1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -35,6 +35,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
* controlling svcxprt_rdma is destroyed.
*/
struct svc_rdma_rw_ctxt {
+ struct llist_node rw_node;
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
@@ -53,19 +54,19 @@ static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
struct svc_rdma_rw_ctxt *ctxt;
+ struct llist_node *node;
spin_lock(&rdma->sc_rw_ctxt_lock);
-
- ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
- if (ctxt) {
- list_del(&ctxt->rw_list);
- spin_unlock(&rdma->sc_rw_ctxt_lock);
+ node = llist_del_first(&rdma->sc_rw_ctxts);
+ spin_unlock(&rdma->sc_rw_ctxt_lock);
+ if (node) {
+ ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out_noctx;
+
INIT_LIST_HEAD(&ctxt->rw_list);
}
@@ -83,14 +84,18 @@ out_noctx:
return NULL;
}
-static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
- struct svc_rdma_rw_ctxt *ctxt)
+static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt,
+ struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
+ llist_add(&ctxt->rw_node, list);
+}
- spin_lock(&rdma->sc_rw_ctxt_lock);
- list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
- spin_unlock(&rdma->sc_rw_ctxt_lock);
+static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt)
+{
+ __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
}
/**
@@ -101,9 +106,10 @@ static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
{
struct svc_rdma_rw_ctxt *ctxt;
+ struct llist_node *node;
- while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
- list_del(&ctxt->rw_list);
+ while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) {
+ ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
kfree(ctxt);
}
}
@@ -171,20 +177,35 @@ static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
cc->cc_sqecount = 0;
}
+/*
+ * The consumed rw_ctx's are cleaned and placed on a local llist so
+ * that only one atomic llist operation is needed to put them all
+ * back on the free list.
+ */
static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir)
{
struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
+ LLIST_HEAD(free);
+ first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
- svc_rdma_put_rw_ctxt(rdma, ctxt);
+ __svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
+
+ ctxt->rw_node.next = first;
+ first = &ctxt->rw_node;
+ if (!last)
+ last = first;
}
+ if (first)
+ llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
/* State for sending a Write or Reply chunk.
@@ -248,8 +269,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write(wc, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
@@ -304,9 +324,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read(wc, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
-
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
cc->cc_status = wc->status;
complete(&cc->cc_done);
return;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index d6bbafb773e131..599021b2391d16 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -113,13 +113,6 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
-static inline struct svc_rdma_send_ctxt *
-svc_rdma_next_send_ctxt(struct list_head *list)
-{
- return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
- sc_list);
-}
-
static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
struct rpc_rdma_cid *cid)
{
@@ -182,9 +175,10 @@ fail0:
void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
+ struct llist_node *node;
- while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
- list_del(&ctxt->sc_list);
+ while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
+ ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
ib_dma_unmap_single(rdma->sc_pd->device,
ctxt->sc_sges[0].addr,
rdma->sc_max_req_size,
@@ -204,12 +198,13 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
+ struct llist_node *node;
spin_lock(&rdma->sc_send_lock);
- ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
- if (!ctxt)
+ node = llist_del_first(&rdma->sc_send_ctxts);
+ if (!node)
goto out_empty;
- list_del(&ctxt->sc_list);
+ ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
spin_unlock(&rdma->sc_send_lock);
out:
@@ -253,9 +248,21 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
ctxt->sc_sges[i].length);
}
- spin_lock(&rdma->sc_send_lock);
- list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
- spin_unlock(&rdma->sc_send_lock);
+ llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
+}
+
+/**
+ * svc_rdma_wake_send_waiters - manage Send Queue accounting
+ * @rdma: controlling transport
+ * @avail: Number of additional SQEs that are now available
+ *
+ */
+void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
+{
+ atomic_add(avail, &rdma->sc_sq_avail);
+ smp_mb__after_atomic();
+ if (unlikely(waitqueue_active(&rdma->sc_send_wait)))
+ wake_up(&rdma->sc_send_wait);
}
/**
@@ -275,11 +282,9 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
- atomic_inc(&rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
-
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d94b7759ada131..94b20fb4713563 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -136,9 +136,9 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
- INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
+ init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
- INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
+ init_llist_head(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
@@ -545,7 +545,6 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- struct svc_xprt *xprt = &rdma->sc_xprt;
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -553,12 +552,6 @@ static void __svc_rdma_free(struct work_struct *work)
svc_rdma_flush_recv_queues(rdma);
- /* Final put of backchannel client transport */
- if (xprt->xpt_bc_xprt) {
- xprt_put(xprt->xpt_bc_xprt);
- xprt->xpt_bc_xprt = NULL;
- }
-
svc_rdma_destroy_rw_ctxts(rdma);
svc_rdma_send_ctxts_destroy(rdma);
svc_rdma_recv_ctxts_destroy(rdma);
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 13a35f7cbe661c..e61471ab7d1418 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -141,6 +141,33 @@
* In most cases, the __assign_str() macro will take the same
* parameters as the __string() macro had to declare the string.
*
+ * __string_len: This is a helper to a __dynamic_array, but it understands
+ * that the array has characters in it, and with the combined
+ * use of __assign_str_len(), it will allocate 'len' + 1 bytes
+ * in the ring buffer and add a '\0' to the string. This is
+ * useful if the string being saved has no terminating '\0' byte.
+ * It requires that the length of the string is known as it acts
+ * like a memcpy().
+ *
+ * Declared with:
+ *
+ * __string_len(foo, bar, len)
+ *
+ * To assign this string, use the helper macro __assign_str_len().
+ *
+ * __assign_str(foo, bar, len);
+ *
+ * Then len + 1 is allocated to the ring buffer, and a nul terminating
+ * byte is added. This is similar to:
+ *
+ * memcpy(__get_str(foo), bar, len);
+ * __get_str(foo)[len] = 0;
+ *
+ * The advantage of using this over __dynamic_array, is that it
+ * takes care of allocating the extra byte on the ring buffer
+ * for the '\0' terminating byte, and __get_str(foo) can be used
+ * in the TP_printk().
+ *
* __bitmask: This is another kind of __dynamic_array, but it expects
* an array of longs, and the number of bits to parse. It takes
* two parameters (name, nr_bits), where name is the name of the