From: Trond Myklebust I can never guarantee you perfect service with soft mounts (a 5 second network patition/server congestion is all it takes) but I do have a patch that just went into 2.4.22 that backs out some of the Van Jacobson exponential backoff changes. This helps stabilize things a lot. I haven't yet had time to port that patch to 2.5.x, but the code should be pretty much identical, so if you want to give it a go, then here it is... net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprt.c | 30 ++++-------------------------- 2 files changed, 6 insertions(+), 28 deletions(-) diff -puN net/sunrpc/clnt.c~nfs-revert-backoff net/sunrpc/clnt.c --- 25/net/sunrpc/clnt.c~nfs-revert-backoff 2003-07-31 22:08:57.000000000 -0700 +++ 25-akpm/net/sunrpc/clnt.c 2003-07-31 22:08:57.000000000 -0700 @@ -744,14 +744,14 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); if (clnt->cl_softrtry) { - if (clnt->cl_chatty && !task->tk_exit) + if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN) && rpc_ntimeo(&clnt->cl_rtt) > 7) { + if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); diff -puN net/sunrpc/xprt.c~nfs-revert-backoff net/sunrpc/xprt.c --- 25/net/sunrpc/xprt.c~nfs-revert-backoff 2003-07-31 22:08:57.000000000 -0700 +++ 25-akpm/net/sunrpc/xprt.c 2003-07-31 22:08:57.000000000 -0700 @@ -1040,21 +1040,6 @@ out: } /* - * Exponential backoff for UDP retries - */ -static inline int -xprt_expbackoff(struct rpc_task *task, struct rpc_rqst *req) -{ - int backoff; - - req->rq_ntimeo++; - backoff = min(rpc_ntimeo(&task->tk_client->cl_rtt), XPRT_MAX_BACKOFF); - if (req->rq_ntimeo < (1 << backoff)) - return 1; - return 0; -} - -/* * RPC receive timeout handler. */ static void @@ -1067,14 +1052,7 @@ xprt_timer(struct rpc_task *task) if (req->rq_received) goto out; - if (!xprt->nocong) { - if (xprt_expbackoff(task, req)) { - rpc_add_timer(task, xprt_timer); - goto out_unlock; - } - rpc_inc_timeo(&task->tk_client->cl_rtt); - xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); - } + xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); req->rq_nresend++; dprintk("RPC: %4d xprt_timer (%s request)\n", @@ -1084,7 +1062,6 @@ xprt_timer(struct rpc_task *task) out: task->tk_timeout = 0; rpc_wake_up_task(task); -out_unlock: spin_unlock(&xprt->sock_lock); } @@ -1220,16 +1197,17 @@ xprt_transmit(struct rpc_task *task) return; out_receive: dprintk("RPC: %4d xmit complete\n", task->tk_pid); + spin_lock_bh(&xprt->sock_lock); /* Set the task's receive timeout value */ if (!xprt->nocong) { task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, task->tk_msg.rpc_proc->p_timer); - req->rq_ntimeo = 0; + task->tk_timeout <<= clnt->cl_timeout.to_retries + - req->rq_timeout.to_retries; if (task->tk_timeout > req->rq_timeout.to_maxval) task->tk_timeout = req->rq_timeout.to_maxval; } else task->tk_timeout = req->rq_timeout.to_current; - spin_lock_bh(&xprt->sock_lock); /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; _