tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Attempt to fix nfs client



TLDR: nfs_reqq crrptin, see diff below.

I've been trying to track down the source of NFS client hangs and
crashes over the last weeks, in order to get back to the things I really
wanted to do...

No special setup, the NFS Server is an OpenBSD machine serving files
(pkgsrc, src, xsrc) from SSD (= fast replies). gbit net, udp intr
mounts.

There are any number of bug reports in gnats about this.
1) Can't interrupt hanging process on 'intr' mount.

2) Client goes dead silent and hangs despite tcpdump showing correct
   reply from server.

3) Client crashes in all places dealing with nfsreq/nfsmount pointers.
   Mostly nfs_timer.

The first was easy to fix. The main loop of nfs_receive() doesn't check
for signals.

The second issue is looping in nfs_receive, waiting for any reply to
arrive. This is supposed to be fulfilled by nfs_timer re-sending the
request, but investigation shows nfs_timer isn't scheduled because
nfs_reqq is empty(!)

I'm not 100% certain multiple lwps are racing to add and remove their
requests to the global list. I could never spot contention between them
(using mutex_trylock), but I assume they're doing so.  Once insert and
nfsreq removal is locked, crashes were confined to nfs_timer.

Before one could observe funny situations where nfs_receive() would
return the desired reply but nfs_reply() could not find the outstanding
request in the nfs_reqq, then discarding the valid reply as unexptected
data.

Finally, the crashes in nfs_timer. Allocating zero'ed nfsreq and
overwriting with junk before freeing will quickly crash the machine.

Sprinkling a few printfs around, it's clear that nfs_timer (callout,
kernel_lock'ed, blocked from starting by splsoftnet) is giving up its
lock at some point (presumably when calling into pr_send). Lwps
will then modify and free their nfsreq in between nfs_timer
operating on the nfs_reqq.

PR 40491 (http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=40491)
basically spells it out in one sentence, it just doesn't go far
enough. Both nfs_reqq and nfsreq can evaporate from under you.

Below is my attempt at patching this up. It seems a bit heavy handed but
so far it's been 100% reliable (even serving nfs mounts) and perfomance is
unchanged.  Contention is low because nfs_timer is mostly blocked from
running by splsoftnet, and it's a recovery thing anyway.

Suggestions? Ideas?

Index: nfs/nfs.h
===================================================================
RCS file: /home/vcs/cvs/netbsd/src/sys/nfs/nfs.h,v
retrieving revision 1.75
diff -u -p -r1.75 nfs.h
--- nfs/nfs.h	20 Apr 2015 13:12:24 -0000	1.75
+++ nfs/nfs.h	20 Jan 2018 11:29:17 -0000
@@ -341,6 +341,7 @@ struct nfsreq {
  * Queue head for nfsreq's
  */
 extern TAILQ_HEAD(nfsreqhead, nfsreq) nfs_reqq;
+extern kmutex_t nfs_reqq_lock;
 
 /* Flag values for r_flags */
 #define R_TIMING	0x01		/* timing request (in mntp) */
Index: nfs/nfs_clntsocket.c
===================================================================
RCS file: /home/vcs/cvs/netbsd/src/sys/nfs/nfs_clntsocket.c,v
retrieving revision 1.5
diff -u -p -r1.5 nfs_clntsocket.c
--- nfs/nfs_clntsocket.c	17 Jun 2016 14:28:29 -0000	1.5
+++ nfs/nfs_clntsocket.c	20 Jan 2018 11:29:17 -0000
@@ -294,9 +294,11 @@ errout:
 			rcvflg = 0;
 			error =  (*so->so_receive)(so, getnam, &auio, mp,
 				NULL, &rcvflg);
-			if (error == EWOULDBLOCK &&
-			    (rep->r_flags & R_SOFTTERM))
-				return (EINTR);
+			if (error == EWOULDBLOCK) {
+				int intr = nfs_sigintr(rep->r_nmp, rep, l);
+				if (intr)
+					error = intr;
+			}
 		} while (error == EWOULDBLOCK);
 		len -= auio.uio_resid;
 		if (!error && *mp == NULL)
@@ -403,6 +405,7 @@ nfsmout:
 		 * Iff no match, just drop the datagram
 		 */
 		s = splsoftnet();
+		mutex_enter(&nfs_reqq_lock);
 		TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
 			if (rep->r_mrep != NULL || rxid != rep->r_xid)
 				continue;
@@ -468,6 +471,7 @@ nfsmout:
 			nmp->nm_timeouts = 0;
 			break;
 		}
+		mutex_exit(&nfs_reqq_lock);
 		splx(s);
 		nfs_rcvunlock(nmp);
 		/*
@@ -653,7 +657,9 @@ tryagain:
 	 * to put it LAST so timer finds oldest requests first.
 	 */
 	s = splsoftnet();
+	mutex_enter(&nfs_reqq_lock);
 	TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
+	mutex_exit(&nfs_reqq_lock);
 	nfs_timer_start();
 
 	/*
@@ -695,7 +701,9 @@ tryagain:
 	 * RPC done, unlink the request.
 	 */
 	s = splsoftnet();
+	mutex_enter(&nfs_reqq_lock);
 	TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
+	mutex_exit(&nfs_reqq_lock);
 
 	/*
 	 * Decrement the outstanding request count.
Index: nfs/nfs_socket.c
===================================================================
RCS file: /home/vcs/cvs/netbsd/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.198
diff -u -p -r1.198 nfs_socket.c
--- nfs/nfs_socket.c	17 Jun 2016 14:28:29 -0000	1.198
+++ nfs/nfs_socket.c	20 Jan 2018 11:29:17 -0000
@@ -166,6 +166,7 @@ int nfsrtton = 0;  
 struct nfsrtt nfsrtt;
 static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
 struct nfsreqhead nfs_reqq;
+kmutex_t nfs_reqq_lock;
 static callout_t nfs_timer_ch;
 static struct evcnt nfs_timer_ev;
 static struct evcnt nfs_timer_start_ev;
@@ -385,6 +386,7 @@ nfs_reconnect(struct nfsreq *rep)
 	 * on old socket.
 	 */
 	s = splsoftnet();
+	mutex_enter(&nfs_reqq_lock);
 	TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
 		if (rp->r_nmp == nmp) {
 			if ((rp->r_flags & R_MUSTRESEND) == 0)
@@ -392,6 +394,7 @@ nfs_reconnect(struct nfsreq *rep)
 			rp->r_rexmit = 0;
 		}
 	}
+	mutex_exit(&nfs_reqq_lock);
 	splx(s);
 	return (0);
 }
@@ -759,7 +762,7 @@ nfs_timer(void *arg)
 
 	nfs_timer_ev.ev_count++;
 
-	mutex_enter(softnet_lock);	/* XXX PR 40491 */
+	mutex_enter(&nfs_reqq_lock);
 	TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
 		more = true;
 		nmp = rep->r_nmp;
@@ -813,7 +816,7 @@ nfs_timer(void *arg)
 		 *	Resend it
 		 * Set r_rtt to -1 in case we fail to send it now.
 		 */
-		/* solock(so);		XXX PR 40491 */
+		solock(so);
 		rep->r_rtt = -1;
 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
@@ -858,9 +861,9 @@ nfs_timer(void *arg)
 				rep->r_rtt = 0;
 			}
 		}
-		/* sounlock(so);	XXX PR 40491 */
+		sounlock(so);
 	}
-	mutex_exit(softnet_lock);	/* XXX PR 40491 */
+	mutex_exit(&nfs_reqq_lock);
 
 	mutex_enter(&nfs_timer_lock);
 	if (nfs_timer_srvvec != NULL) {
Index: nfs/nfs_subs.c
===================================================================
RCS file: /home/vcs/cvs/netbsd/src/sys/nfs/nfs_subs.c,v
retrieving revision 1.229
diff -u -p -r1.229 nfs_subs.c
--- nfs/nfs_subs.c	1 Apr 2017 19:35:57 -0000	1.229
+++ nfs/nfs_subs.c	20 Jan 2018 11:29:17 -0000
@@ -1495,6 +1495,7 @@ nfs_init0(void)
 	 * Initialize reply list and start timer
 	 */
 	TAILQ_INIT(&nfs_reqq);
+	mutex_init(&nfs_reqq_lock, MUTEX_DEFAULT, IPL_NONE);
 	nfs_timer_init();
 	MOWNER_ATTACH(&nfs_mowner);
 


Home | Main Index | Thread Index | Old Index