diff -u --recursive --new-file linux-2.4.18-svc_tcp/fs/nfs/inode.c linux-2.4.18-rpc_tweak/fs/nfs/inode.c --- linux-2.4.18-svc_tcp/fs/nfs/inode.c Mon Jan 21 12:21:08 2002 +++ linux-2.4.18-rpc_tweak/fs/nfs/inode.c Mon Jan 28 12:30:26 2002 @@ -486,7 +486,8 @@ goto failure_kill_reqlist; } - /* We're airborne */ + /* We're airborne. Set socket buffersize */ + xprt_setbufsize(xprt, server->wsize + 1024, server->rsize + 1024); /* Check whether to start the lockd process */ if (!(server->flags & NFS_MOUNT_NONLM)) diff -u --recursive --new-file linux-2.4.18-svc_tcp/fs/nfs/write.c linux-2.4.18-rpc_tweak/fs/nfs/write.c --- linux-2.4.18-svc_tcp/fs/nfs/write.c Sun Jan 20 18:52:06 2002 +++ linux-2.4.18-rpc_tweak/fs/nfs/write.c Sun Jan 20 18:52:23 2002 @@ -740,6 +740,8 @@ if (dirty >= NFS_STRATEGY_PAGES * wpages) nfs_flush_file(inode, NULL, 0, 0, 0); #endif + if (current->need_resched) + schedule(); } int diff -u --recursive --new-file linux-2.4.18-svc_tcp/include/linux/nfsd/const.h linux-2.4.18-rpc_tweak/include/linux/nfsd/const.h --- linux-2.4.18-svc_tcp/include/linux/nfsd/const.h Sat Apr 1 18:04:27 2000 +++ linux-2.4.18-rpc_tweak/include/linux/nfsd/const.h Fri Jan 11 23:09:30 2002 @@ -21,7 +21,7 @@ /* * Maximum blocksize supported by daemon currently at 8K */ -#define NFSSVC_MAXBLKSIZE 8192 +#define NFSSVC_MAXBLKSIZE (32*1024) #ifdef __KERNEL__ diff -u --recursive --new-file linux-2.4.18-svc_tcp/include/linux/sunrpc/xprt.h linux-2.4.18-rpc_tweak/include/linux/sunrpc/xprt.h --- linux-2.4.18-svc_tcp/include/linux/sunrpc/xprt.h Fri Jan 11 23:40:53 2002 +++ linux-2.4.18-rpc_tweak/include/linux/sunrpc/xprt.h Mon Jan 28 12:29:15 2002 @@ -132,7 +132,8 @@ unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned long congtime; /* hold cwnd until then */ + int sndsize; /* length send buffer */ + int rcvsize; /* length receive buffer */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue pending; /* requests in flight */ @@ -175,6 +176,7 @@ struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *toparms); +void xprt_setbufsize(struct rpc_xprt *, int, int); int xprt_destroy(struct rpc_xprt *); void xprt_shutdown(struct rpc_xprt *); void xprt_default_timeout(struct rpc_timeout *, int); diff -u --recursive --new-file linux-2.4.18-svc_tcp/net/sunrpc/clnt.c linux-2.4.18-rpc_tweak/net/sunrpc/clnt.c --- linux-2.4.18-svc_tcp/net/sunrpc/clnt.c Fri Jan 11 23:08:48 2002 +++ linux-2.4.18-rpc_tweak/net/sunrpc/clnt.c Mon Jan 28 01:13:59 2002 @@ -371,7 +371,6 @@ task->tk_status = 0; task->tk_action = call_reserveresult; task->tk_timeout = clnt->cl_timeout.to_resrvval; - clnt->cl_stats->rpccnt++; xprt_reserve(task); } @@ -395,21 +394,20 @@ task->tk_status, task->tk_rqstp); if (task->tk_status >= 0) { + task->tk_client->cl_stats->rpccnt++; task->tk_action = call_allocate; return; } task->tk_status = 0; switch (status) { + case -ETIMEDOUT: + dprintk("RPC: task timed out\n"); case -EAGAIN: case -ENOBUFS: task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; task->tk_action = call_reserve; break; - case -ETIMEDOUT: - dprintk("RPC: task timed out\n"); - task->tk_action = call_timeout; - break; default: if (!task->tk_rqstp) { printk(KERN_INFO "RPC: task has no request, exit EIO\n"); @@ -444,8 +442,7 @@ printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) { - xprt_release(task); - task->tk_action = call_reserve; + task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; } @@ -620,6 +617,7 @@ } case -ENOMEM: case -EAGAIN: + case -ENOBUFS: task->tk_action = call_transmit; clnt->cl_stats->rpcretrans++; break; diff -u --recursive --new-file linux-2.4.18-svc_tcp/net/sunrpc/sched.c linux-2.4.18-rpc_tweak/net/sunrpc/sched.c --- linux-2.4.18-svc_tcp/net/sunrpc/sched.c Sun Jan 13 15:20:45 2002 +++ linux-2.4.18-rpc_tweak/net/sunrpc/sched.c Sun Jan 20 17:38:03 2002 @@ -104,7 +104,11 @@ static inline void __rpc_disable_timer(struct rpc_task *task) { + struct timer_list *timer = &task->tk_timer; + dprintk("RPC: %4d disabling timer\n", task->tk_pid); + if (timer_pending(timer)) + del_timer(timer); task->tk_timeout_fn = NULL; task->tk_timeout = 0; } @@ -625,6 +629,11 @@ rpc_wake_up_task(task); } } +#if 0 + /* Note: sync_page() is called with TASK_UNINTERRUPTIBLE set */ + if (current->need_resched && current->state == TASK_RUNNING) + schedule(); +#endif } if (task->tk_exit) { @@ -1075,7 +1084,7 @@ } __rpc_schedule(); - if (++rounds >= 64) { /* safeguard */ + if (++rounds >= 64 || current->need_resched) { /* safeguard */ schedule(); rounds = 0; } @@ -1216,12 +1225,12 @@ spin_unlock(&rpc_sched_lock); return; } - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " + printk("-pid- proc flgs runs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- --exit--\n"); for (; t; t = next) { next = t->tk_next_task; - printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", - t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_status, + printk("%05d %04d %04x %04lx %06d %8p %6d %8p %08ld %8s %8p %8p\n", + t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_runstate, t->tk_status, t->tk_client, t->tk_client->cl_prog, t->tk_rqstp, t->tk_timeout, t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " ", diff -u --recursive --new-file linux-2.4.18-svc_tcp/net/sunrpc/sunrpc_syms.c linux-2.4.18-rpc_tweak/net/sunrpc/sunrpc_syms.c --- linux-2.4.18-svc_tcp/net/sunrpc/sunrpc_syms.c Fri Sep 21 06:02:01 2001 +++ linux-2.4.18-rpc_tweak/net/sunrpc/sunrpc_syms.c Mon Jan 28 12:00:37 2002 @@ -55,6 +55,7 @@ EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_destroy); EXPORT_SYMBOL(xprt_set_timeout); +EXPORT_SYMBOL(xprt_setbufsize); /* Client credential cache */ EXPORT_SYMBOL(rpcauth_register); diff -u --recursive --new-file linux-2.4.18-svc_tcp/net/sunrpc/svcsock.c linux-2.4.18-rpc_tweak/net/sunrpc/svcsock.c --- linux-2.4.18-svc_tcp/net/sunrpc/svcsock.c Mon Jan 14 21:32:51 2002 +++ linux-2.4.18-rpc_tweak/net/sunrpc/svcsock.c Sun Jan 27 15:52:43 2002 @@ -52,8 +52,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCSOCK -#define SVC_TCP_DEFAULT_SOCKSIZE (64*1024) -#define SVC_UDP_DEFAULT_SOCKSIZE (128*1024) +#define SVC_TCP_DEFAULT_SOCKSIZE (16*35000) +#define SVC_UDP_DEFAULT_SOCKSIZE (16*35000) #define SVC_MIN_WRITE_SPACE (35000) static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, @@ -442,7 +442,7 @@ struct svc_serv *serv = svsk->sk_server; struct sk_buff *skb; u32 *data; - int err, len; + int err, len = 0; svsk->sk_data = 0; while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { @@ -455,28 +455,25 @@ /* Sorry. */ if (skb_is_nonlinear(skb)) { - if (skb_linearize(skb, GFP_KERNEL) != 0) { - kfree_skb(skb); - svc_sock_received(svsk, 0); - return 0; - } + if (skb_linearize(skb, GFP_KERNEL) != 0) + goto out; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) { - if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { - skb_free_datagram(svsk->sk_sk, skb); - svc_sock_received(svsk, 0); - return 0; - } + if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) + goto out; } + dst_confirm(skb->dst); + + if (!(rqstp->rq_skbuff = skb_clone(skb, GFP_KERNEL))) + goto out; /* There may be more data */ svsk->sk_data = 1; len = skb->len - sizeof(struct udphdr); data = (u32 *) (skb->data + sizeof(struct udphdr)); - rqstp->rq_skbuff = skb; rqstp->rq_argbuf.base = data; rqstp->rq_argbuf.buf = data; rqstp->rq_argbuf.len = (len >> 2); @@ -496,6 +493,8 @@ /* One down, maybe more to go... */ svsk->sk_sk->stamp = skb->stamp; +out: + skb_free_datagram(svsk->sk_sk, skb); svc_sock_received(svsk, 0); return len; diff -u --recursive --new-file linux-2.4.18-svc_tcp/net/sunrpc/xprt.c linux-2.4.18-rpc_tweak/net/sunrpc/xprt.c --- linux-2.4.18-svc_tcp/net/sunrpc/xprt.c Fri Jan 11 23:07:27 2002 +++ linux-2.4.18-rpc_tweak/net/sunrpc/xprt.c Mon Jan 28 12:43:00 2002 @@ -90,6 +90,7 @@ static void xprt_reconn_status(struct rpc_task *task); static struct socket *xprt_create_socket(int, struct rpc_timeout *); static int xprt_bind_socket(struct rpc_xprt *, struct socket *); +static void xprt_sock_setbufsize(struct rpc_xprt *xprt); static void xprt_remove_pending(struct rpc_xprt *); #ifdef RPC_DEBUG_DATA @@ -255,6 +256,7 @@ * prompts ECONNREFUSED. */ break; + case -ENOBUFS: case -EAGAIN: if (test_bit(SOCK_NOSPACE, &sock->flags)) result = -ENOMEM; @@ -324,29 +326,17 @@ */ spin_lock(&xprt->xprt_lock); cwnd = xprt->cwnd; - if (result >= 0) { - if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) - goto out; - /* The (cwnd >> 1) term makes sure - * the result gets rounded properly. */ - cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; - if (cwnd > RPC_MAXCWND) - cwnd = RPC_MAXCWND; - else - pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd); - xprt->congtime = jiffies + ((cwnd * HZ) << 2) / RPC_CWNDSCALE; - dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx, " - "time %ld ms\n", xprt->cong, xprt->cwnd, cwnd, - (xprt->congtime-jiffies)*1000/HZ); - } else if (result == -ETIMEDOUT) { - if ((cwnd >>= 1) < RPC_CWNDSCALE) - cwnd = RPC_CWNDSCALE; - xprt->congtime = jiffies + ((cwnd * HZ) << 3) / RPC_CWNDSCALE; - dprintk("RPC: cong %ld, cwnd was %ld, now %ld, " - "time %ld ms\n", xprt->cong, xprt->cwnd, cwnd, - (xprt->congtime-jiffies)*1000/HZ); - pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd); - } + /* Ignore if the window is overfull */ + if (xprt->cong > cwnd) + goto out; + if (result >= 0 && cwnd < RPC_MAXCWND) { + cwnd += RPC_CWNDSCALE; + xprt_clear_backlog(xprt); + } else if (result == -ETIMEDOUT) + cwnd = ((cwnd + RPC_CWNDSCALE) >> 1) & ~(RPC_CWNDSCALE - 1); + + dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx\n", + xprt->cong, xprt->cwnd, cwnd); xprt->cwnd = cwnd; out: @@ -466,6 +456,7 @@ if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout))) goto defer; xprt_bind_socket(xprt, sock); + xprt_sock_setbufsize(xprt); inet = sock->sk; } @@ -729,6 +720,22 @@ } /* + * Check input queue length + */ +static int +tcp_recv_available(struct socket *sock) +{ + mm_segment_t oldfs; + int avail, err; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); + set_fs(oldfs); + + return (err >= 0)? avail : err; +} + +/* * TCP read fragment marker */ static inline int @@ -883,6 +890,9 @@ if (!(avail = result)) goto out_ok; + if (tcp_recv_available(xprt->sock) < avail) + return -EAGAIN; + /* Find and lock the request corresponding to this xid */ req = xprt_lookup_rqst(xprt, xprt->tcp_xid); if (req) { @@ -1247,6 +1257,11 @@ task->tk_status = status; switch (status) { + case -ENOBUFS: + case -EAGAIN: + /* Emulation of wait_for_tcp_memory() */ + rpc_delay(task, (net_random()%(HZ/5))+2); + return; case -ENOMEM: /* Protect against (udp|tcp)_write_space */ spin_lock_bh(&xprt->sock_lock); @@ -1255,7 +1270,6 @@ rpc_sleep_on(&xprt->sending, task, NULL, NULL); } spin_unlock_bh(&xprt->sock_lock); - case -EAGAIN: return; default: if (xprt->stream) @@ -1365,9 +1379,6 @@ req->rq_next = NULL; task->tk_rqstp = req; xprt_request_init(task, xprt); - - if (xprt->free) - xprt_clear_backlog(xprt); return; out_nofree: @@ -1494,7 +1505,6 @@ xprt->nocong = 1; } else xprt->cwnd = RPC_INITCWND; - xprt->congtime = jiffies; spin_lock_init(&xprt->sock_lock); spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); @@ -1587,6 +1597,38 @@ } /* + * Set socket buffer length + */ +static void +xprt_sock_setbufsize(struct rpc_xprt *xprt) +{ + struct sock *sk = xprt->inet; + + if (xprt->rcvsize) { + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = xprt->rcvsize; + } + if (xprt->sndsize) { + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = xprt->sndsize; + sk->write_space(sk); + } + +} + +void +xprt_setbufsize(struct rpc_xprt *xprt, int sndsize, int rcvsize) +{ + xprt->sndsize = sndsize * RPC_MAXCONG; + if (xprt->sndsize && xprt->sndsize < SOCK_MIN_SNDBUF) + xprt->sndsize = SOCK_MIN_SNDBUF; + xprt->rcvsize = rcvsize * RPC_MAXCONG; + if (xprt->rcvsize && xprt->rcvsize < SOCK_MIN_RCVBUF) + xprt->rcvsize = SOCK_MIN_RCVBUF; + xprt_sock_setbufsize(xprt); +} + +/* * Create a client socket given the protocol and peer address. */ static struct socket *