diff -urN nfs-ref/fs/nfs/inode.c nfs/fs/nfs/inode.c --- nfs-ref/fs/nfs/inode.c Fri Mar 29 21:42:28 2002 +++ nfs/fs/nfs/inode.c Fri Mar 29 21:42:44 2002 @@ -488,7 +488,8 @@ goto failure_kill_reqlist; } - /* We're airborne */ + /* We're airborne. Set socket buffersize */ + xprt_setbufsize(xprt, server->wsize + 1024, server->rsize + 1024); /* Check whether to start the lockd process */ if (!(server->flags & NFS_MOUNT_NONLM)) diff -urN nfs-ref/fs/nfs/write.c nfs/fs/nfs/write.c --- nfs-ref/fs/nfs/write.c Fri Mar 29 21:42:28 2002 +++ nfs/fs/nfs/write.c Fri Mar 29 21:42:44 2002 @@ -757,6 +757,8 @@ if (dirty >= NFS_STRATEGY_PAGES * wpages) nfs_flush_file(inode, NULL, 0, 0, 0); #endif + if (current->need_resched) + schedule(); } int diff -urN nfs-ref/include/linux/sunrpc/xprt.h nfs/include/linux/sunrpc/xprt.h --- nfs-ref/include/linux/sunrpc/xprt.h Fri Mar 29 21:42:28 2002 +++ nfs/include/linux/sunrpc/xprt.h Fri Mar 29 21:42:44 2002 @@ -132,7 +132,8 @@ unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned long congtime; /* hold cwnd until then */ + int sndsize; /* length send buffer */ + int rcvsize; /* length receive buffer */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue pending; /* requests in flight */ @@ -175,6 +176,7 @@ struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *toparms); +void xprt_setbufsize(struct rpc_xprt *, int, int); int xprt_destroy(struct rpc_xprt *); void xprt_shutdown(struct rpc_xprt *); void xprt_default_timeout(struct rpc_timeout *, int); diff -urN nfs-ref/net/sunrpc/clnt.c nfs/net/sunrpc/clnt.c --- nfs-ref/net/sunrpc/clnt.c Fri Mar 29 21:42:28 2002 +++ nfs/net/sunrpc/clnt.c Fri Mar 29 21:42:44 2002 @@ -371,7 +371,6 @@ task->tk_status = 0; task->tk_action = call_reserveresult; task->tk_timeout = clnt->cl_timeout.to_resrvval; - clnt->cl_stats->rpccnt++; xprt_reserve(task); } @@ -395,21 +394,20 @@ task->tk_status, task->tk_rqstp); if (task->tk_status >= 0) { + task->tk_client->cl_stats->rpccnt++; task->tk_action = call_allocate; return; } task->tk_status = 0; switch (status) { + case -ETIMEDOUT: + dprintk("RPC: task timed out\n"); case -EAGAIN: case -ENOBUFS: task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; task->tk_action = call_reserve; break; - case -ETIMEDOUT: - dprintk("RPC: task timed out\n"); - task->tk_action = call_timeout; - break; default: if (!task->tk_rqstp) { printk(KERN_INFO "RPC: task has no request, exit EIO\n"); @@ -444,8 +442,7 @@ printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) { - xprt_release(task); - task->tk_action = call_reserve; + task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; } @@ -620,6 +617,7 @@ } case -ENOMEM: case -EAGAIN: + case -ENOBUFS: task->tk_action = call_transmit; clnt->cl_stats->rpcretrans++; break; diff -urN nfs-ref/net/sunrpc/sched.c nfs/net/sunrpc/sched.c --- nfs-ref/net/sunrpc/sched.c Fri Mar 29 21:42:28 2002 +++ nfs/net/sunrpc/sched.c Fri Mar 29 21:42:44 2002 @@ -105,7 +105,11 @@ static inline void __rpc_disable_timer(struct rpc_task *task) { + struct timer_list *timer = &task->tk_timer; + dprintk("RPC: %4d disabling timer\n", task->tk_pid); + if (timer_pending(timer)) + del_timer(timer); task->tk_timeout_fn = NULL; task->tk_timeout = 0; } @@ -626,6 +630,11 @@ rpc_wake_up_task(task); } } +#if 0 + /* Note: sync_page() is called with TASK_UNINTERRUPTIBLE set */ + if (current->need_resched && current->state == TASK_RUNNING) + schedule(); +#endif } if (task->tk_exit) { @@ -1076,7 +1085,7 @@ } __rpc_schedule(); - if (++rounds >= 64) { /* safeguard */ + if (++rounds >= 64 || current->need_resched) { /* safeguard */ schedule(); rounds = 0; } @@ -1217,12 +1226,12 @@ spin_unlock(&rpc_sched_lock); return; } - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " + printk("-pid- proc flgs runs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- --exit--\n"); for (; t; t = next) { next = t->tk_next_task; - printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", - t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_status, + printk("%05d %04d %04x %04lx %06d %8p %6d %8p %08ld %8s %8p %8p\n", + t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_runstate, t->tk_status, t->tk_client, t->tk_client->cl_prog, t->tk_rqstp, t->tk_timeout, t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " ", diff -urN nfs-ref/net/sunrpc/sunrpc_syms.c nfs/net/sunrpc/sunrpc_syms.c --- nfs-ref/net/sunrpc/sunrpc_syms.c Fri Mar 29 21:42:28 2002 +++ nfs/net/sunrpc/sunrpc_syms.c Fri Mar 29 21:42:44 2002 @@ -55,6 +55,7 @@ EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_destroy); EXPORT_SYMBOL(xprt_set_timeout); +EXPORT_SYMBOL(xprt_setbufsize); /* Client credential cache */ EXPORT_SYMBOL(rpcauth_register); diff -urN nfs-ref/net/sunrpc/xprt.c nfs/net/sunrpc/xprt.c --- nfs-ref/net/sunrpc/xprt.c Fri Mar 29 21:42:28 2002 +++ nfs/net/sunrpc/xprt.c Fri Mar 29 21:42:44 2002 @@ -87,6 +87,7 @@ static void xprt_reconn_status(struct rpc_task *task); static struct socket *xprt_create_socket(int, struct rpc_timeout *); static int xprt_bind_socket(struct rpc_xprt *, struct socket *); +static void xprt_sock_setbufsize(struct rpc_xprt *xprt); static void xprt_remove_pending(struct rpc_xprt *); #ifdef RPC_DEBUG_DATA @@ -252,6 +253,7 @@ * prompts ECONNREFUSED. */ break; + case -ENOBUFS: case -EAGAIN: if (test_bit(SOCK_NOSPACE, &sock->flags)) result = -ENOMEM; @@ -321,32 +323,16 @@ */ spin_lock(&xprt->xprt_lock); cwnd = xprt->cwnd; - if (result >= 0) { - if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) - goto out; - /* The (cwnd >> 1) term makes sure - * the result gets rounded properly. */ - cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; - if (cwnd > RPC_MAXCWND) - cwnd = RPC_MAXCWND; - else - pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd); - xprt->congtime = jiffies + ((cwnd * HZ) << 2) / RPC_CWNDSCALE; - dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx, " - "time %ld ms\n", xprt->cong, xprt->cwnd, cwnd, - (xprt->congtime-jiffies)*1000/HZ); - } else if (result == -ETIMEDOUT) { - if ((cwnd >>= 1) < RPC_CWNDSCALE) - cwnd = RPC_CWNDSCALE; - xprt->congtime = jiffies + ((cwnd * HZ) << 3) / RPC_CWNDSCALE; - dprintk("RPC: cong %ld, cwnd was %ld, now %ld, " - "time %ld ms\n", xprt->cong, xprt->cwnd, cwnd, - (xprt->congtime-jiffies)*1000/HZ); - pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd); - } + if (result >= 0 && cwnd < RPC_MAXCWND && xprt->cong == cwnd) { + cwnd += RPC_CWNDSCALE; + xprt_clear_backlog(xprt); + } else if (result == -ETIMEDOUT && cwnd > RPC_CWNDSCALE) + cwnd -= RPC_CWNDSCALE; + + dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx\n", + xprt->cong, xprt->cwnd, cwnd); xprt->cwnd = cwnd; - out: spin_unlock(&xprt->xprt_lock); } @@ -463,6 +449,7 @@ if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout))) goto defer; xprt_bind_socket(xprt, sock); + xprt_sock_setbufsize(xprt); inet = sock->sk; } @@ -726,6 +713,22 @@ } /* + * Check input queue length + */ +static int +tcp_recv_available(struct socket *sock) +{ + mm_segment_t oldfs; + int avail, err; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); + set_fs(oldfs); + + return (err >= 0)? avail : err; +} + +/* * TCP read fragment marker */ static inline int @@ -880,6 +883,9 @@ if (!(avail = result)) goto out_ok; + if (tcp_recv_available(xprt->sock) < avail) + return -EAGAIN; + /* Find and lock the request corresponding to this xid */ req = xprt_lookup_rqst(xprt, xprt->tcp_xid); if (req) { @@ -1243,6 +1249,11 @@ task->tk_status = status; switch (status) { + case -ENOBUFS: + case -EAGAIN: + /* Emulation of wait_for_tcp_memory() */ + rpc_delay(task, (net_random()%(HZ/5))+2); + return; case -ENOMEM: /* Protect against (udp|tcp)_write_space */ spin_lock_bh(&xprt->sock_lock); @@ -1251,7 +1262,6 @@ rpc_sleep_on(&xprt->sending, task, NULL, NULL); } spin_unlock_bh(&xprt->sock_lock); - case -EAGAIN: return; default: if (xprt->stream) @@ -1361,9 +1371,6 @@ req->rq_next = NULL; task->tk_rqstp = req; xprt_request_init(task, xprt); - - if (xprt->free) - xprt_clear_backlog(xprt); return; out_nofree: @@ -1490,7 +1497,6 @@ xprt->nocong = 1; } else xprt->cwnd = RPC_INITCWND; - xprt->congtime = jiffies; spin_lock_init(&xprt->sock_lock); spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); @@ -1580,6 +1586,38 @@ rpciod_up(); return 0; +} + +/* + * Set socket buffer length + */ +static void +xprt_sock_setbufsize(struct rpc_xprt *xprt) +{ + struct sock *sk = xprt->inet; + + if (xprt->rcvsize) { + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = xprt->rcvsize; + } + if (xprt->sndsize) { + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = xprt->sndsize; + sk->write_space(sk); + } + +} + +void +xprt_setbufsize(struct rpc_xprt *xprt, int sndsize, int rcvsize) +{ + xprt->sndsize = sndsize * RPC_MAXCONG; + if (xprt->sndsize && xprt->sndsize < SOCK_MIN_SNDBUF) + xprt->sndsize = SOCK_MIN_SNDBUF; + xprt->rcvsize = rcvsize * RPC_MAXCONG; + if (xprt->rcvsize && xprt->rcvsize < SOCK_MIN_RCVBUF) + xprt->rcvsize = SOCK_MIN_RCVBUF; + xprt_sock_setbufsize(xprt); } /*