tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: how can the nfs timer work?



On Thu Dec 11 2008 at 01:55:33 +0200, Antti Kantee wrote:
> I see crashes pretty much immediately if I stress rump_nfs.  If I add some
> primitive form of synchronization, the problems go away.  But I'm really
> curious why this doesn't trigger in the kernel pretty much instantly.
> Note: I don't run a -current kernel, but I can't remember seeing any
> great complaints about a non-functional nfs client on the lists either.

So I got an out-of-band communication stating that there have been
problems.  You can try this patch to see if it makes things better
(it's ugly and can't be committed as such, but the purpose of it was
testing my hypothesis).
Index: nfs_socket.c
===================================================================
RCS file: /cvsroot/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.175
diff -p -u -r1.175 nfs_socket.c
--- nfs_socket.c        23 Nov 2008 08:22:07 -0000      1.175
+++ nfs_socket.c        11 Dec 2008 00:47:55 -0000
@@ -955,6 +955,13 @@ nfsmout:
        }
 }
 
+#if 0
+#define DPRINTF(x) printf x
+#else
+#define DPRINTF(x)
+#endif
+
+static volatile int timerrestart;
 /*
  * nfs_request - goes something like this
  *     - fill in request struct
@@ -1131,6 +1138,7 @@ tryagain:
         * Chain request into list of outstanding requests. Be sure
         * to put it LAST so timer finds oldest requests first.
         */
+       DPRINTF(("chaining %p into the list\n", rep));
        s = splsoftnet();
        TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
        nfs_timer_start();
@@ -1165,6 +1173,9 @@ tryagain:
         */
        if (!error || error == EPIPE)
                error = nfs_reply(rep, lwp);
+       timerrestart = 1;
+
+       DPRINTF(("got %p, taking off\n", rep));
 
        /*
         * RPC done, unlink the request.
@@ -1635,7 +1646,6 @@ nfs_timer_srvfini(void)
        mutex_exit(&nfs_timer_lock);
 }
 
-
 /*
  * Nfs timer routine
  * Scan the nfsreq list and retranmit any requests that have timed out
@@ -1655,8 +1665,12 @@ nfs_timer(void *arg)
 
        nfs_timer_ev.ev_count++;
 
+       DPRINTF(("running timer\n"));
        s = splsoftnet();
+ again:
        TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
+               DPRINTF(("timer processing %p\n", rep));
+               timerrestart = 0;
                more = true;
                nmp = rep->r_nmp;
                if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
@@ -1673,8 +1687,10 @@ nfs_timer(void *arg)
                                timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
                        if (nmp->nm_timeouts > 0)
                                timeo *= nfs_backoff[nmp->nm_timeouts - 1];
-                       if (rep->r_rtt <= timeo)
+                       if (rep->r_rtt <= timeo) {
+                               DPRINTF(("tmo %d %d\n", rep->r_rtt, timeo));
                                continue;
+                       }
                        if (nmp->nm_timeouts <
                            (sizeof(nfs_backoff) / sizeof(nfs_backoff[0])))
                                nmp->nm_timeouts++;
@@ -1702,12 +1718,20 @@ nfs_timer(void *arg)
                if ((so = nmp->nm_so) == NULL)
                        continue;
 
+               DPRINTF(("before sblock\n"));
+               solock(so);
+               if (timerrestart) {
+                       sounlock(so);
+                       printf("oh no, more lemmings\n");
+                       goto again;
+               }
+
+               DPRINTF(("processing %p, next %p\n", rep, TAILQ_NEXT(rep, 
r_chain)));
                /*
                 * If there is enough space and the window allows..
                 *      Resend it
                 * Set r_rtt to -1 in case we fail to send it now.
                 */
-               solock(so);
                rep->r_rtt = -1;
                if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
                   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
@@ -1720,6 +1744,11 @@ nfs_timer(void *arg)
                        else
                            error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
                            nmp->nm_nam, (struct mbuf *)0, (struct lwp *)0);
+                       if (timerrestart) {
+                               printf("restart 2\n");
+                               sounlock(so);
+                               goto again;
+                       }
                        if (error) {
                                if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
 #ifdef DEBUG
@@ -1754,6 +1783,7 @@ nfs_timer(void *arg)
                sounlock(so);
        }
        splx(s);
+       DPRINTF(("exit\n"));
 
        mutex_enter(&nfs_timer_lock);
        if (nfs_timer_srvvec != NULL) {


Home | Main Index | Thread Index | Old Index