diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/Config.in linux-2.4.17-svc_tcp/fs/Config.in --- linux-2.4.17-rpc_bkl/fs/Config.in Mon Nov 12 18:34:16 2001 +++ linux-2.4.17-svc_tcp/fs/Config.in Wed Jan 2 12:52:23 2002 @@ -99,6 +99,7 @@ dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD + dep_mbool ' Provide NFS server over TCP support (EXPERIMENTAL)' CONFIG_NFSD_TCP $CONFIG_NFSD $CONFIG_EXPERIMENTAL if [ "$CONFIG_NFS_FS" = "y" -o "$CONFIG_NFSD" = "y" ]; then define_tristate CONFIG_SUNRPC y diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/lockd/svc.c linux-2.4.17-svc_tcp/fs/lockd/svc.c --- linux-2.4.17-rpc_bkl/fs/lockd/svc.c Sun Oct 21 19:32:33 2001 +++ linux-2.4.17-svc_tcp/fs/lockd/svc.c Sat Jan 5 15:27:03 2002 @@ -36,7 +36,7 @@ #include #define NLMDBG_FACILITY NLMDBG_SVC -#define LOCKD_BUFSIZE (1024 + NLMSSVC_XDRSIZE) +#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) #define ALLOWED_SIGS (sigmask(SIGKILL)) extern struct svc_program nlmsvc_program; @@ -237,7 +237,7 @@ "lockd_up: no pid, %d users??\n", nlmsvc_users); error = -ENOMEM; - serv = svc_create(&nlmsvc_program, 0, NLMSVC_XDRSIZE); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NLMSVC_XDRSIZE); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); goto out; diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/lockd/xdr.c linux-2.4.17-svc_tcp/fs/lockd/xdr.c --- linux-2.4.17-rpc_bkl/fs/lockd/xdr.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.17-svc_tcp/fs/lockd/xdr.c Sun Dec 30 22:29:57 2001 @@ -230,7 +230,7 @@ static inline int xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) { - struct svc_buf *buf = &rqstp->rq_resbuf; + struct svc_buf *buf = rqstp->rq_resbuf; buf->len = p - buf->base; return (buf->len <= buf->buflen); diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/lockd/xdr4.c linux-2.4.17-svc_tcp/fs/lockd/xdr4.c --- linux-2.4.17-rpc_bkl/fs/lockd/xdr4.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.17-svc_tcp/fs/lockd/xdr4.c Sun Dec 30 22:29:57 2001 @@ -236,7 +236,7 @@ static int xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) { - struct svc_buf *buf = &rqstp->rq_resbuf; + struct svc_buf *buf = rqstp->rq_resbuf; buf->len = p - buf->base; return (buf->len <= buf->buflen); diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/nfsd/nfs3proc.c linux-2.4.17-svc_tcp/fs/nfsd/nfs3proc.c --- linux-2.4.17-rpc_bkl/fs/nfsd/nfs3proc.c Fri Sep 21 06:02:01 2001 +++ linux-2.4.17-svc_tcp/fs/nfsd/nfs3proc.c Sun Dec 30 22:29:57 2001 @@ -152,7 +152,7 @@ dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); /* Reserve room for status, post_op_attr, and path length */ - svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, + svcbuf_reserve(rqstp->rq_resbuf, &path, &dummy, 1 + NFS3_POST_OP_ATTR_WORDS + 1); /* Read the symlink. */ @@ -181,7 +181,7 @@ * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, + svcbuf_reserve(rqstp->rq_resbuf, &buffer, &avail, 1 + NFS3_POST_OP_ATTR_WORDS + 3); resp->count = argp->count; @@ -448,7 +448,7 @@ argp->count, (u32) argp->cookie); /* Reserve buffer space for status, attributes and verifier */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, + svcbuf_reserve(rqstp->rq_resbuf, &buffer, &count, 1 + NFS3_POST_OP_ATTR_WORDS + 2); /* Make sure we've room for the NULL ptr & eof flag, and shrink to @@ -483,7 +483,7 @@ argp->count, (u32) argp->cookie); /* Reserve buffer space for status, attributes and verifier */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, + svcbuf_reserve(rqstp->rq_resbuf, &buffer, &count, 1 + NFS3_POST_OP_ATTR_WORDS + 2); /* Make sure we've room for the NULL ptr & eof flag, and shrink to diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/nfsd/nfs3xdr.c linux-2.4.17-svc_tcp/fs/nfsd/nfs3xdr.c --- linux-2.4.17-rpc_bkl/fs/nfsd/nfs3xdr.c Thu Oct 4 07:27:48 2001 +++ linux-2.4.17-svc_tcp/fs/nfsd/nfs3xdr.c Sun Dec 30 22:29:57 2001 @@ -268,7 +268,7 @@ static inline int xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) { - struct svc_buf *buf = &rqstp->rq_resbuf; + struct svc_buf *buf = rqstp->rq_resbuf; buf->len = p - buf->base; dprintk("nfsd: ressize_check p %p base %p len %d\n", diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/nfsd/nfscache.c linux-2.4.17-svc_tcp/fs/nfsd/nfscache.c --- linux-2.4.17-rpc_bkl/fs/nfsd/nfscache.c Thu Feb 15 19:56:29 2001 +++ linux-2.4.17-svc_tcp/fs/nfsd/nfscache.c Sun Dec 30 22:29:57 2001 @@ -265,7 +265,7 @@ case RC_NOCACHE: return RC_DOIT; case RC_REPLSTAT: - svc_putlong(&rqstp->rq_resbuf, rp->c_replstat); + svc_putlong(rqstp->rq_resbuf, rp->c_replstat); break; case RC_REPLBUFF: if (!nfsd_cache_append(rqstp, &rp->c_replbuf)) @@ -300,7 +300,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) { struct svc_cacherep *rp; - struct svc_buf *resp = &rqstp->rq_resbuf, *cachp; + struct svc_buf *resp = rqstp->rq_resbuf, *cachp; int len; if (!(rp = rqstp->rq_cacherep) || cache_disabled) @@ -347,7 +347,7 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data) { - struct svc_buf *resp = &rqstp->rq_resbuf; + struct svc_buf *resp = rqstp->rq_resbuf; if (resp->len + data->len > resp->buflen) { printk(KERN_WARNING "nfsd: cached reply too large (%d).\n", diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/nfsd/nfsproc.c linux-2.4.17-svc_tcp/fs/nfsd/nfsproc.c --- linux-2.4.17-rpc_bkl/fs/nfsd/nfsproc.c Sun Oct 21 19:40:36 2001 +++ linux-2.4.17-svc_tcp/fs/nfsd/nfsproc.c Sun Dec 30 22:29:57 2001 @@ -110,7 +110,7 @@ dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); /* Reserve room for status and path length */ - svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 2); + svcbuf_reserve(rqstp->rq_resbuf, &path, &dummy, 2); /* Read the symlink. */ resp->len = NFS_MAXPATHLEN; @@ -138,7 +138,7 @@ /* Obtain buffer pointer for payload. 19 is 1 word for * status, 17 words for fattr, and 1 word for the byte count. */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 19); + svcbuf_reserve(rqstp->rq_resbuf, &buffer, &avail, 19); if ((avail << 2) < argp->count) { printk(KERN_NOTICE @@ -477,7 +477,7 @@ argp->count, argp->cookie); /* Reserve buffer space for status */ - svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1); + svcbuf_reserve(rqstp->rq_resbuf, &buffer, &count, 1); /* Shrink to the client read size */ if (count > (argp->count >> 2)) diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/nfsd/nfssvc.c linux-2.4.17-svc_tcp/fs/nfsd/nfssvc.c --- linux-2.4.17-rpc_bkl/fs/nfsd/nfssvc.c Wed Oct 17 23:16:34 2001 +++ linux-2.4.17-svc_tcp/fs/nfsd/nfssvc.c Sun Dec 30 22:29:57 2001 @@ -94,7 +94,8 @@ if (error < 0) goto failure; -#if 0 /* Don't even pretend that TCP works. It doesn't. */ +#if CONFIG_NFSD_TCP + /* Don't even pretend that TCP works. It doesn't. */ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); if (error < 0) goto failure; @@ -292,14 +293,14 @@ } if (rqstp->rq_proc != 0) - svc_putlong(&rqstp->rq_resbuf, nfserr); + svc_putlong(rqstp->rq_resbuf, nfserr); /* Encode result. * For NFSv2, additional info is never returned in case of an error. */ if (!(nfserr && rqstp->rq_vers == 2)) { xdr = proc->pc_encode; - if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { + if (xdr && !xdr(rqstp, rqstp->rq_resbuf->buf, rqstp->rq_resp)) { /* Failed to encode result. Release cache entry */ dprintk("nfsd: failed to encode result!\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); diff -u --recursive --new-file linux-2.4.17-rpc_bkl/fs/nfsd/nfsxdr.c linux-2.4.17-svc_tcp/fs/nfsd/nfsxdr.c --- linux-2.4.17-rpc_bkl/fs/nfsd/nfsxdr.c Wed Oct 17 23:16:34 2001 +++ linux-2.4.17-svc_tcp/fs/nfsd/nfsxdr.c Sun Dec 30 22:29:57 2001 @@ -179,7 +179,7 @@ static inline int xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) { - struct svc_buf *buf = &rqstp->rq_resbuf; + struct svc_buf *buf = rqstp->rq_resbuf; buf->len = p - buf->base; dprintk("nfsd: ressize_check p %p base %p len %d\n", diff -u --recursive --new-file linux-2.4.17-rpc_bkl/include/linux/sunrpc/svc.h linux-2.4.17-svc_tcp/include/linux/sunrpc/svc.h --- linux-2.4.17-rpc_bkl/include/linux/sunrpc/svc.h Sat Dec 22 19:28:36 2001 +++ linux-2.4.17-svc_tcp/include/linux/sunrpc/svc.h Mon Dec 31 10:34:28 2001 @@ -69,11 +69,18 @@ */ #define RPCSVC_MAXIOV ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1) struct svc_buf { + struct svc_buf * prev; /* svc_sock send queue */ + struct svc_buf * next; u32 * area; /* allocated memory */ u32 * base; /* base of RPC datagram */ - int buflen; /* total length of buffer */ + unsigned int buflen; /* total length of buffer */ u32 * buf; /* read/write pointer */ - int len; /* current end of buffer */ + unsigned int len; /* current end of buffer */ + + unsigned int sent; /* number of bytes sent */ + + /* UDP responses should have peer addresses */ + struct sockaddr_in raddr; /* peer address */ /* iovec for zero-copy NFS READs */ struct iovec iov[RPCSVC_MAXIOV]; @@ -100,7 +107,7 @@ struct sk_buff * rq_skbuff; /* fast recv inet buffer */ struct svc_buf rq_defbuf; /* default buffer */ struct svc_buf rq_argbuf; /* argument buffer */ - struct svc_buf rq_resbuf; /* result buffer */ + struct svc_buf * rq_resbuf; /* result buffer */ u32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ u32 rq_vers; /* program version */ @@ -179,5 +186,7 @@ int svc_process(struct svc_serv *, struct svc_rqst *); int svc_register(struct svc_serv *, int, unsigned short); void svc_wake_up(struct svc_serv *); +struct svc_buf * svc_resbuf_alloc(struct svc_serv *); +int svc_resbuf_free(struct svc_buf *); #endif /* SUNRPC_SVC_H */ diff -u --recursive --new-file linux-2.4.17-rpc_bkl/include/linux/sunrpc/svcsock.h linux-2.4.17-svc_tcp/include/linux/sunrpc/svcsock.h --- linux-2.4.17-rpc_bkl/include/linux/sunrpc/svcsock.h Sat Dec 22 19:28:45 2001 +++ linux-2.4.17-svc_tcp/include/linux/sunrpc/svcsock.h Mon Dec 31 11:26:43 2001 @@ -11,6 +11,9 @@ #include +#define SK_SENDING 0 +#define SK_WSPACE 1 +#define SK_INSEND 2 /* * RPC server socket. * NOTE: First two items must be prev/next. @@ -32,16 +35,22 @@ unsigned int sk_temp : 1, /* temp socket */ sk_qued : 1, /* on serv->sk_sockets */ sk_dead : 1; /* socket closed */ - int (*sk_recvfrom)(struct svc_rqst *rqstp); - int (*sk_sendto)(struct svc_rqst *rqstp); + int (*sk_recvfrom)(struct svc_rqst *); + int (*sk_sendto)(struct svc_sock *, struct svc_buf *); /* We keep the old state_change and data_ready CB's here */ void (*sk_ostate)(struct sock *); - void (*sk_odata)(struct sock *, int bytes); + void (*sk_odata)(struct sock *, int); + void (*sk_owspace)(struct sock *); + /* send stuff */ + spinlock_t sk_sendlk; + struct svc_buf * sk_sendq; /* send-queue of resbuf's */ + unsigned int sk_sendstate; /* private TCP part */ - int sk_reclen; /* length of record */ - int sk_tcplen; /* current read length */ + unsigned int sk_reclen; /* length of record */ + unsigned int sk_tcplen; /* current read length */ + struct sockaddr_in sk_raddr; /* peer address */ /* Debugging */ struct svc_rqst * sk_rqstp; diff -u --recursive --new-file linux-2.4.17-rpc_bkl/net/sunrpc/clnt.c linux-2.4.17-svc_tcp/net/sunrpc/clnt.c --- linux-2.4.17-rpc_bkl/net/sunrpc/clnt.c Sat Dec 22 18:57:48 2001 +++ linux-2.4.17-svc_tcp/net/sunrpc/clnt.c Tue Jan 8 16:27:33 2002 @@ -78,10 +78,6 @@ dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); -#ifdef RPC_DEBUG - rpc_register_sysctl(); -#endif - if (!xprt) goto out; if (vers >= program->nrvers || !(version = program->version[vers])) diff -u --recursive --new-file linux-2.4.17-rpc_bkl/net/sunrpc/stats.c linux-2.4.17-svc_tcp/net/sunrpc/stats.c --- linux-2.4.17-rpc_bkl/net/sunrpc/stats.c Thu Oct 11 20:17:22 2001 +++ linux-2.4.17-svc_tcp/net/sunrpc/stats.c Tue Jan 8 16:34:46 2002 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,8 @@ static struct proc_dir_entry *proc_net_rpc = NULL; +kmem_cache_t *rpc_rbcachep = NULL; + /* * Get RPC client stats */ @@ -181,25 +184,53 @@ } } -#ifdef MODULE +static int +svc_rbcache_create(void) +{ + if (!(rpc_rbcachep = kmem_cache_create("rpc_rbcache", + sizeof (struct svc_buf), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL))) + return -ENOMEM; + return 0; +} -int -init_module(void) +static int +svc_rbcache_destroy(void) +{ + if (rpc_rbcachep && kmem_cache_destroy(rpc_rbcachep)) { + printk(KERN_WARNING "RPC: Unable to destroy rpc_rbcache (%p).\n" + "This module cannot be reloaded.\n", + rpc_rbcachep); + return -EBUSY; + } + return 0; +} + +/* + * Initialize sunrpc + */ +static int __init init_sunrpc(void) { + int err = 0; + #ifdef RPC_DEBUG rpc_register_sysctl(); #endif rpc_proc_init(); - return 0; + + err = svc_rbcache_create(); + return err; } -void -cleanup_module(void) +static void __exit exit_sunrpc(void) { #ifdef RPC_DEBUG rpc_unregister_sysctl(); #endif rpc_proc_exit(); + svc_rbcache_destroy(); } -#endif + MODULE_LICENSE("GPL"); +module_init(init_sunrpc) +module_exit(exit_sunrpc) diff -u --recursive --new-file linux-2.4.17-rpc_bkl/net/sunrpc/svc.c linux-2.4.17-svc_tcp/net/sunrpc/svc.c --- linux-2.4.17-rpc_bkl/net/sunrpc/svc.c Fri Sep 7 19:48:39 2001 +++ linux-2.4.17-svc_tcp/net/sunrpc/svc.c Tue Jan 8 16:26:42 2002 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP #define RPC_PARANOIA 1 +extern kmem_cache_t *rpc_rbcachep; + /* * Create an RPC service */ @@ -31,10 +34,6 @@ { struct svc_serv *serv; -#ifdef RPC_DEBUG - rpc_register_sysctl(); -#endif - if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) return NULL; @@ -112,6 +111,36 @@ bufp->area = 0; } +struct svc_buf * +svc_resbuf_alloc(struct svc_serv *serv) +{ + struct svc_buf *resbufp; + + if (!(resbufp = kmem_cache_alloc(rpc_rbcachep, SLAB_NFS))) + return NULL; + + memset(resbufp, 0, sizeof (struct svc_buf)); + + if (!svc_init_buffer(resbufp, serv->sv_bufsz)) { + kmem_cache_free(rpc_rbcachep, resbufp); + return NULL; + } + + dprintk("rpc: allocated resbuf %p for %s\n", resbufp, serv->sv_name); + return resbufp; +} + +int +svc_resbuf_free(struct svc_buf *bufp) +{ + if (bufp) { + svc_release_buffer(bufp); + kmem_cache_free(rpc_rbcachep, bufp); + dprintk("rpc: freed resbuf %p\n", bufp); + } + return 0; +} + /* * Create a server thread */ @@ -218,7 +247,7 @@ struct svc_version *versp = NULL; /* compiler food */ struct svc_procedure *procp = NULL; struct svc_buf * argp = &rqstp->rq_argbuf; - struct svc_buf * resp = &rqstp->rq_resbuf; + struct svc_buf * resp = rqstp->rq_resbuf; kxdrproc_t xdr; u32 *bufp, *statp; u32 dir, prog, vers, proc, @@ -307,7 +336,7 @@ /* Encode reply */ if (*statp == rpc_success && (xdr = procp->pc_encode) - && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { + && !xdr(rqstp, rqstp->rq_resbuf->buf, rqstp->rq_resp)) { dprintk("svc: failed to encode reply\n"); /* serv->sv_stats->rpcsystemerr++; */ *statp = rpc_system_err; diff -u --recursive --new-file linux-2.4.17-rpc_bkl/net/sunrpc/svcauth.c linux-2.4.17-svc_tcp/net/sunrpc/svcauth.c --- linux-2.4.17-rpc_bkl/net/sunrpc/svcauth.c Sat Apr 29 07:50:39 2000 +++ linux-2.4.17-svc_tcp/net/sunrpc/svcauth.c Sun Dec 30 22:29:57 2001 @@ -85,7 +85,7 @@ svcauth_null(struct svc_rqst *rqstp, u32 *statp, u32 *authp) { struct svc_buf *argp = &rqstp->rq_argbuf; - struct svc_buf *resp = &rqstp->rq_resbuf; + struct svc_buf *resp = rqstp->rq_resbuf; if ((argp->len -= 3) < 0) { *statp = rpc_garbage_args; @@ -117,7 +117,7 @@ svcauth_unix(struct svc_rqst *rqstp, u32 *statp, u32 *authp) { struct svc_buf *argp = &rqstp->rq_argbuf; - struct svc_buf *resp = &rqstp->rq_resbuf; + struct svc_buf *resp = rqstp->rq_resbuf; struct svc_cred *cred = &rqstp->rq_cred; u32 *bufp = argp->buf, slen, i; int len = argp->len; diff -u --recursive --new-file linux-2.4.17-rpc_bkl/net/sunrpc/svcauth_des.c linux-2.4.17-svc_tcp/net/sunrpc/svcauth_des.c --- linux-2.4.17-rpc_bkl/net/sunrpc/svcauth_des.c Mon Apr 7 20:35:33 1997 +++ linux-2.4.17-svc_tcp/net/sunrpc/svcauth_des.c Sun Dec 30 22:29:57 2001 @@ -57,7 +57,7 @@ svcauth_des(struct svc_rqst *rqstp, u32 *statp, u32 *authp) { struct svc_buf *argp = &rqstp->rq_argbuf; - struct svc_buf *resp = &rqstp->rq_resbuf; + struct svc_buf *resp = rqstp->rq_resbuf; struct svc_cred *cred = &rqstp->rq_cred; struct des_cred *data = NULL; u32 cryptkey[2]; diff -u --recursive --new-file linux-2.4.17-rpc_bkl/net/sunrpc/svcsock.c linux-2.4.17-svc_tcp/net/sunrpc/svcsock.c --- linux-2.4.17-rpc_bkl/net/sunrpc/svcsock.c Wed Jul 4 20:50:38 2001 +++ linux-2.4.17-svc_tcp/net/sunrpc/svcsock.c Wed Jan 2 17:42:56 2002 @@ -52,13 +52,16 @@ #define RPCDBG_FACILITY RPCDBG_SVCSOCK +#define SVC_TCP_DEFAULT_SOCKSIZE (64*1024) +#define SVC_UDP_DEFAULT_SOCKSIZE (128*1024) +#define SVC_MIN_WRITE_SPACE (35000) static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int pmap_reg); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); -static int svc_udp_sendto(struct svc_rqst *); - +static int svc_udp_sendto(struct svc_sock *, struct svc_buf *); +static int svc_empty_sendq(struct svc_sock *); /* * Queue up an idle server thread. Must have serv->sv_lock held. @@ -208,7 +211,7 @@ /* * Release a socket after use. */ -static inline void +static void svc_sock_release(struct svc_rqst *rqstp) { struct svc_sock *svsk = rqstp->rq_sock; @@ -216,6 +219,8 @@ svc_release_skb(rqstp); rqstp->rq_sock = NULL; + if (svsk->sk_sendq) + svc_empty_sendq(svsk); spin_lock_bh(&serv->sv_lock); if (!--(svsk->sk_inuse) && svsk->sk_dead) { @@ -252,10 +257,9 @@ * Generic sendto routine */ static int -svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr) +svc_sendto(struct svc_sock *svsk, struct svc_buf *bufp, struct iovec *iov, int nr) { mm_segment_t oldfs; - struct svc_sock *svsk = rqstp->rq_sock; struct socket *sock = svsk->sk_sock; struct msghdr msg; int i, buflen, len; @@ -263,8 +267,14 @@ for (i = buflen = 0; i < nr; i++) buflen += iov[i].iov_len; - msg.msg_name = &rqstp->rq_addr; - msg.msg_namelen = sizeof(rqstp->rq_addr); + if (sock->type == SOCK_STREAM) { + msg.msg_name = &svsk->sk_raddr; + msg.msg_namelen = sizeof (svsk->sk_raddr); + } else { + msg.msg_name = &bufp->raddr; + msg.msg_namelen = sizeof(bufp->raddr); + } + msg.msg_iov = iov; msg.msg_iovlen = nr; msg.msg_control = NULL; @@ -277,7 +287,7 @@ set_fs(oldfs); dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n", - rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); + svsk, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); return len; } @@ -340,6 +350,20 @@ } /* + * Set socket buffer length + */ +static inline void +svc_sock_setbufsize(struct socket *sock, unsigned int size) +{ + mm_segment_t oldfs; + + oldfs = get_fs(); set_fs(KERNEL_DS); + sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *)&size, sizeof(size)); + sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&size, sizeof(size)); + set_fs(oldfs); +} + +/* * INET callback when data has been received on the socket. */ static void @@ -360,6 +384,38 @@ wake_up_interruptible(sk->sleep); } +static void +svc_udp_write_space(struct sock *sk) +{ + struct svc_sock *svsk; + struct socket *sock; + + dprintk("svc: socket %p TCP write space (svsk %p)\n", + sk, sk->user_data); + + if (!(sock = sk->socket)) + return; + + if (!(svsk = (struct svc_sock *) sk->user_data)) + goto out; + + if (sock_wspace(sk) < min_t(int, sk->sndbuf,SVC_MIN_WRITE_SPACE)) + return; + + set_bit(SK_WSPACE, &svsk->sk_sendstate); + + if (!test_bit(SK_INSEND, &svsk->sk_sendstate)) { + spin_lock_bh(&svsk->sk_lock); + svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); + } +out: + clear_bit(SOCK_NOSPACE, &sock->flags); + + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); +} + /* * Receive a datagram from a UDP socket. */ @@ -416,6 +472,9 @@ rqstp->rq_addr.sin_port = skb->h.uh->source; rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; + memcpy(&rqstp->rq_resbuf->raddr, &rqstp->rq_addr, + sizeof (rqstp->rq_resbuf->raddr)); + if (serv->sv_stats) serv->sv_stats->netudpcnt++; @@ -427,9 +486,8 @@ } static int -svc_udp_sendto(struct svc_rqst *rqstp) +svc_udp_sendto(struct svc_sock *svsk, struct svc_buf *bufp) { - struct svc_buf *bufp = &rqstp->rq_resbuf; int error; /* Set up the first element of the reply iovec. @@ -437,16 +495,15 @@ * care of by the server implementation itself. */ /* bufp->base = bufp->area; */ - bufp->iov[0].iov_base = bufp->base; - bufp->iov[0].iov_len = bufp->len << 2; + if (bufp->sent == 0) { + bufp->iov[0].iov_base = bufp->base; + bufp->iov[0].iov_len = bufp->len << 2; + } - error = svc_sendto(rqstp, bufp->iov, bufp->nriov); + error = svc_sendto(svsk, bufp, bufp->iov, bufp->nriov); if (error == -ECONNREFUSED) /* ICMP error on earlier request. */ - error = svc_sendto(rqstp, bufp->iov, bufp->nriov); - else if (error == -EAGAIN) - /* Ignore and wait for re-xmit */ - error = 0; + error = svc_sendto(svsk, bufp, bufp->iov, bufp->nriov); return error; } @@ -455,8 +512,10 @@ svc_udp_init(struct svc_sock *svsk) { svsk->sk_sk->data_ready = svc_udp_data_ready; + svsk->sk_sk->write_space = svc_udp_write_space; svsk->sk_recvfrom = svc_udp_recvfrom; svsk->sk_sendto = svc_udp_sendto; + svc_sock_setbufsize(svsk->sk_sock, SVC_UDP_DEFAULT_SOCKSIZE); return 0; } @@ -532,6 +591,38 @@ wake_up_interruptible(sk->sleep); } +static void +svc_tcp_write_space(struct sock *sk) +{ + struct svc_sock *svsk; + struct socket *sock; + + dprintk("svc: socket %p TCP write space (svsk %p)\n", + sk, sk->user_data); + + if (!(sock = sk->socket)) + return; + + if (!(svsk = (struct svc_sock *) sk->user_data)) + goto out; + + if (!sock_writeable(sk)) + return; + + set_bit(SK_WSPACE, &svsk->sk_sendstate); + + if (!test_bit(SK_INSEND, &svsk->sk_sendstate)) { + spin_lock_bh(&svsk->sk_lock); + svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); + } +out: + clear_bit(SOCK_NOSPACE, &sock->flags); + + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); +} + /* * Accept a TCP connection */ @@ -593,6 +684,9 @@ if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) goto failed; + /* Set up peer address for TCP connection */ + memcpy(&svsk->sk_raddr, &sin, sizeof(sin)); + /* Precharge. Data may have arrived on the socket before we * installed the data_ready callback. */ @@ -661,12 +755,17 @@ * But apparently no known nfs clients send fragmented * records. */ /* FIXME: shutdown socket */ - printk(KERN_NOTICE "RPC: bad TCP reclen %08lx", + printk(KERN_NOTICE "RPC: bad TCP reclen %08lx\n", (unsigned long) svsk->sk_reclen); - return -EIO; + goto err_delete; } svsk->sk_reclen &= 0x7fffffff; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); + if (svsk->sk_reclen > (bufp->buflen << 2)) { + printk(KERN_NOTICE "RPC: bad TCP reclen %d\n", + svsk->sk_reclen); + goto err_delete; + } } /* Check whether enough data is available */ @@ -704,8 +803,8 @@ /* Position reply write pointer immediately after * record length */ - rqstp->rq_resbuf.buf += 1; - rqstp->rq_resbuf.len = 1; + rqstp->rq_resbuf->buf += 1; + rqstp->rq_resbuf->len = 1; rqstp->rq_skbuff = 0; rqstp->rq_argbuf.buf += 1; @@ -722,6 +821,10 @@ return len; +err_delete: + svc_delete_socket(svsk); + return -EAGAIN; + error: if (len == -EAGAIN) { dprintk("RPC: TCP recvfrom got EAGAIN\n"); @@ -741,29 +844,43 @@ * a daemon on a dead client. Requires write queue maintenance. */ static int -svc_tcp_sendto(struct svc_rqst *rqstp) +svc_tcp_sendto(struct svc_sock *svsk, struct svc_buf *bufp) { - struct svc_buf *bufp = &rqstp->rq_resbuf; - int sent; + int sent = bufp->sent; + + struct iovec *iovp = bufp->iov; + int nriov = bufp->nriov; + int tosend = (bufp->len << 2) - sent; /* Set up the first element of the reply iovec. * Any other iovecs that may be in use have been taken * care of by the server implementation itself. */ - bufp->iov[0].iov_base = bufp->base; - bufp->iov[0].iov_len = bufp->len << 2; - bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4)); - - sent = svc_sendto(rqstp, bufp->iov, bufp->nriov); - if (sent != bufp->len<<2) { - printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - should shutdown socket\n", - rqstp->rq_sock->sk_server->sv_name, - sent, bufp->len << 2); - /* FIXME: should shutdown the socket, or allocate more memort - * or wait and try again or something. Otherwise - * client will get confused - */ + + if (bufp->sent == 0) { + bufp->iov[0].iov_base = bufp->base; + bufp->iov[0].iov_len = bufp->len << 2; + bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4)); + } + + while (1) { + sent = svc_sendto(svsk, bufp, iovp, nriov); + if (sent < 0) + break; + + if (sent != tosend) { + bufp->sent += sent; + + iovp->iov_base = ((unsigned char *) iovp->iov_base) + + sent; + iovp->iov_len -= sent; + tosend -= sent; + } else { + sent = 0; + break; + } } + return sent; } @@ -782,10 +899,12 @@ dprintk("setting up TCP socket for reading\n"); sk->state_change = svc_tcp_state_change; sk->data_ready = svc_tcp_data_ready; + sk->write_space = svc_tcp_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; } + svc_sock_setbufsize(svsk->sk_sock, SVC_TCP_DEFAULT_SOCKSIZE); return 0; } @@ -797,7 +916,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) { struct svc_sock *svsk; - int len; + int len, err = 0; DECLARE_WAITQUEUE(wait, current); dprintk("svc: server %p waiting for data (to = %ld)\n", @@ -814,7 +933,6 @@ /* Initialize the buffers */ rqstp->rq_argbuf = rqstp->rq_defbuf; - rqstp->rq_resbuf = rqstp->rq_defbuf; if (signalled()) return -EINTR; @@ -849,6 +967,12 @@ } spin_unlock_bh(&serv->sv_lock); + if (!(rqstp->rq_resbuf = svc_resbuf_alloc(serv))) { + printk (KERN_WARNING "RPC: Unable to allocate resbuf from cache!\n"); + err = -EAGAIN; + goto out_noresbuf; + } + dprintk("svc: server %p, socket %p, inuse=%d\n", rqstp, svsk, svsk->sk_inuse); len = svsk->sk_recvfrom(rqstp); @@ -856,8 +980,8 @@ /* No data, incomplete (TCP) read, or accept() */ if (len == 0 || len == -EAGAIN) { - svc_sock_release(rqstp); - return -EAGAIN; + err = -EAGAIN; + goto out_error; } rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; @@ -865,14 +989,21 @@ rqstp->rq_verfed = 0; svc_getlong(&rqstp->rq_argbuf, rqstp->rq_xid); - svc_putlong(&rqstp->rq_resbuf, rqstp->rq_xid); + svc_putlong(rqstp->rq_resbuf, rqstp->rq_xid); /* Assume that the reply consists of a single buffer. */ - rqstp->rq_resbuf.nriov = 1; + rqstp->rq_resbuf->nriov = 1; if (serv->sv_stats) serv->sv_stats->netcnt++; return len; + +out_error: + svc_resbuf_free(rqstp->rq_resbuf); + rqstp->rq_resbuf = NULL; +out_noresbuf: + svc_sock_release(rqstp); + return err; } /* @@ -882,9 +1013,48 @@ svc_drop(struct svc_rqst *rqstp) { dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); + svc_resbuf_free(rqstp->rq_resbuf); + rqstp->rq_resbuf = NULL; svc_sock_release(rqstp); } +static int +svc_empty_sendq(struct svc_sock *svsk) +{ + int ret = -EINVAL; + struct svc_buf *resbufp; + + if (!svsk) + return -EINVAL; + + if (test_and_set_bit(SK_SENDING, &svsk->sk_sendstate)) + return 0; + + spin_lock(&svsk->sk_sendlk); + while ((resbufp = svsk->sk_sendq) != NULL) { + spin_unlock(&svsk->sk_sendlk); + + /* work with resbufp */ + clear_bit(SK_WSPACE, &svsk->sk_sendstate); + set_bit(SK_INSEND, &svsk->sk_sendstate); + ret = svsk->sk_sendto(svsk, resbufp); + clear_bit(SK_INSEND, &svsk->sk_sendstate); + spin_lock(&svsk->sk_sendlk); + if (ret >= 0) { + rpc_remove_list(&svsk->sk_sendq, resbufp); + svc_resbuf_free(resbufp); + continue; + } + if ( ret != -EAGAIN) + break; + if (!test_bit(SK_WSPACE, &svsk->sk_sendstate)) + break; + } + spin_unlock(&svsk->sk_sendlk); + clear_bit(SK_SENDING, &svsk->sk_sendstate); + return 0; +} + /* * Return reply to client. */ @@ -892,23 +1062,21 @@ svc_send(struct svc_rqst *rqstp) { struct svc_sock *svsk; - int len; - if ((svsk = rqstp->rq_sock) == NULL) { - printk(KERN_WARNING "NULL socket pointer in %s:%d\n", - __FILE__, __LINE__); - return -EFAULT; - } + if ((svsk = rqstp->rq_sock) == NULL) + BUG(); /* release the receive skb before sending the reply */ svc_release_skb(rqstp); - len = svsk->sk_sendto(rqstp); - svc_sock_release(rqstp); + /* Append to the send queue */ + spin_lock(&svsk->sk_sendlk); + rpc_append_list(&svsk->sk_sendq, rqstp->rq_resbuf); + rqstp->rq_resbuf = NULL; + spin_unlock(&svsk->sk_sendlk); - if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - return 0; - return len; + svc_sock_release(rqstp); + return 0; } /* @@ -935,9 +1103,14 @@ svsk->sk_sk = inet; svsk->sk_ostate = inet->state_change; svsk->sk_odata = inet->data_ready; + svsk->sk_owspace = inet->write_space; svsk->sk_server = serv; spin_lock_init(&svsk->sk_lock); + spin_lock_init(&svsk->sk_sendlk); + clear_bit(SK_SENDING, &svsk->sk_sendstate); + svsk->sk_sendq = NULL; + /* Initialize the socket */ if (sock->type == SOCK_DGRAM) *errp = svc_udp_init(svsk); @@ -1022,6 +1195,7 @@ struct svc_sock **rsk; struct svc_serv *serv; struct sock *sk; + struct svc_buf *resbufp; dprintk("svc: svc_delete_socket(%p)\n", svsk); @@ -1030,6 +1204,7 @@ sk->state_change = svsk->sk_ostate; sk->data_ready = svsk->sk_odata; + sk->write_space = svsk->sk_owspace; spin_lock_bh(&serv->sv_lock); @@ -1050,6 +1225,14 @@ if (!svsk->sk_inuse) { spin_unlock_bh(&serv->sv_lock); + + spin_lock(&svsk->sk_sendlk); + while ((resbufp = svsk->sk_sendq) != NULL) { + rpc_remove_list(&svsk->sk_sendq, resbufp); + svc_resbuf_free(resbufp); + } + spin_unlock(&svsk->sk_sendlk); + sock_release(svsk->sk_sock); kfree(svsk); } else {