tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

nfsd "serializing" patch



Hello,
while working on nfs performance issues with overquota writes (which
turned out to be a ffs issue), I came up with the attached patch.
What this does it, for nfs over TCP, restrict a socket buffer processing
to a single thread (right now, all pending requests are processed
by all threads in parallel). This has two advantages:
- if a single client sends lots of request (like writes comming from a
  linux client), a single thread is busy and other threads will be
  available to serve other client's requests quickly
- by avoiding CPU cache sharing and lock contention at the vnode level
  (if all requests are for the same vnode, which is the common case),
  we get sighly better performances.

My testbed is a linux box with 2 Opteron 2431 (12 core total) and 32GB RAM
writing over gigabit ethernet to a NetBSD server (dual
Intel(R) Xeon(TM) CPU 3.00GHz, 4 hyperthread cores total) running nfsd -tun4.
Without the patch, the server processes about 1230 writes per second,
with this patch it processes about 1250 writes/s.

Comments ?

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: nfs.h
===================================================================
RCS file: /cvsroot/src/sys/nfs/nfs.h,v
retrieving revision 1.72
diff -u -p -r1.72 nfs.h
--- nfs.h       2 Mar 2010 23:19:09 -0000       1.72
+++ nfs.h       22 Nov 2012 11:49:55 -0000
@@ -476,7 +476,8 @@ struct nfssvc_sock {
 #define        SLP_A_DISCONN   0x04
 
 /* Bits for "ns_gflags" */
-#define        SLP_G_DOREC     0x02    /* on nfssvc_sockpending queue */
+#define        SLP_G_MOREREQ   0x01    /* the thread should keep processing 
this slp */
+#define        SLP_G_INQUEUE   0x02    /* on nfssvc_sockpending queue */
 
 /* Bits for "ns_sflags" */
 #define        SLP_S_LASTFRAG  0x40
Index: nfs_srvsocket.c
===================================================================
RCS file: /cvsroot/src/sys/nfs/nfs_srvsocket.c,v
retrieving revision 1.4
diff -u -p -r1.4 nfs_srvsocket.c
--- nfs_srvsocket.c     3 Sep 2009 20:59:12 -0000       1.4
+++ nfs_srvsocket.c     22 Nov 2012 11:49:55 -0000
@@ -447,10 +447,21 @@ nfsrv_wakenfsd_locked(struct nfssvc_sock
 
        if ((slp->ns_flags & SLP_VALID) == 0)
                return;
-       if (slp->ns_gflags & SLP_G_DOREC)
+       if (slp->ns_gflags & (SLP_G_MOREREQ|SLP_G_INQUEUE))
                return;
+       if (slp->ns_sref > 0 && slp->ns_so->so_type == SOCK_STREAM) {
+               /*
+                * a nfsd thread is already dealing with this socket,
+                * don't split it accross multiple threads
+                */
+               slp->ns_gflags |= SLP_G_MOREREQ;
+               return;
+       }
+
        nd = SLIST_FIRST(&nfsd_idle_head);
        if (nd) {
+               KASSERT((nfsd_head_flag & NFSD_CHECKSLP) == 0);
+               KASSERT(TAILQ_FIRST(&nfssvc_sockpending) == NULL);
                SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
                if (nd->nfsd_slp)
                        panic("nfsd wakeup");
@@ -459,7 +470,7 @@ nfsrv_wakenfsd_locked(struct nfssvc_sock
                nd->nfsd_slp = slp;
                cv_signal(&nd->nfsd_cv);
        } else {
-               slp->ns_gflags |= SLP_G_DOREC;
+               slp->ns_gflags |= SLP_G_INQUEUE;
                nfsd_head_flag |= NFSD_CHECKSLP;
                TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
        }
Index: nfs_syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/nfs/nfs_syscalls.c,v
retrieving revision 1.153
diff -u -p -r1.153 nfs_syscalls.c
--- nfs_syscalls.c      31 Dec 2009 20:01:33 -0000      1.153
+++ nfs_syscalls.c      22 Nov 2012 11:49:55 -0000
@@ -99,6 +99,7 @@ struct nfssvc_sock *nfs_udp6sock;
 static struct nfssvc_sock *nfsrv_sockalloc(void);
 static void nfsrv_sockfree(struct nfssvc_sock *);
 static void nfsd_rt(int, struct nfsrv_descript *, int);
+static void nfsrv_slpderef_locked(struct nfssvc_sock *);
 
 /*
  * NFS server system calls
@@ -451,86 +452,89 @@ nfssvc_nfsd(struct nfsd_srvargs *nsd, vo
         * Loop getting rpc requests until SIGKILL.
         */
        for (;;) {
-               bool dummy;
-
                if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
                    != 0) {
                        preempt();
                }
-               if (nfsd->nfsd_slp == NULL) {
-                       mutex_enter(&nfsd_lock);
-                       while (nfsd->nfsd_slp == NULL &&
-                           (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
-                               SLIST_INSERT_HEAD(&nfsd_idle_head, nfsd,
-                                   nfsd_idle);
-                               error = cv_wait_sig(&nfsd->nfsd_cv, &nfsd_lock);
-                               if (error) {
-                                       slp = nfsd->nfsd_slp;
-                                       nfsd->nfsd_slp = NULL;
-                                       if (!slp)
-                                               SLIST_REMOVE(&nfsd_idle_head,
-                                                   nfsd, nfsd, nfsd_idle);
-                                       mutex_exit(&nfsd_lock);
-                                       if (slp) {
-                                               nfsrv_wakenfsd(slp);
-                                               nfsrv_slpderef(slp);
-                                       }
-                                       goto done;
-                               }
+               mutex_enter(&nfsd_lock);
+               if (nfsd->nfsd_slp != NULL) {
+                       slp = nfsd->nfsd_slp;
+                       if ((slp->ns_gflags & SLP_G_MOREREQ) == 0) {
+                               nfsd->nfsd_slp = NULL;
+                               nfsrv_slpderef_locked(slp);
+                               mutex_enter(&nfsd_lock);
+                       } else {
+                               slp->ns_gflags &= ~SLP_G_MOREREQ;
                        }
-                       if (nfsd->nfsd_slp == NULL &&
-                           (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
-                               slp = TAILQ_FIRST(&nfssvc_sockpending);
+               }
+               while (nfsd->nfsd_slp == NULL &&
+                   (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
+                       SLIST_INSERT_HEAD(&nfsd_idle_head, nfsd,
+                           nfsd_idle);
+                       error = cv_wait_sig(&nfsd->nfsd_cv, &nfsd_lock);
+                       if (error) {
+                               slp = nfsd->nfsd_slp;
+                               nfsd->nfsd_slp = NULL;
+                               if (!slp)
+                                       SLIST_REMOVE(&nfsd_idle_head,
+                                           nfsd, nfsd, nfsd_idle);
+                               mutex_exit(&nfsd_lock);
                                if (slp) {
-                                       KASSERT((slp->ns_gflags & SLP_G_DOREC)
-                                           != 0);
-                                       TAILQ_REMOVE(&nfssvc_sockpending, slp,
-                                           ns_pending);
-                                       slp->ns_gflags &= ~SLP_G_DOREC;
-                                       slp->ns_sref++;
-                                       nfsd->nfsd_slp = slp;
-                               } else
-                                       nfsd_head_flag &= ~NFSD_CHECKSLP;
+                                       nfsrv_wakenfsd(slp);
+                                       nfsrv_slpderef(slp);
+                               }
+                               goto done;
                        }
-                       KASSERT(nfsd->nfsd_slp == NULL ||
-                           nfsd->nfsd_slp->ns_sref > 0);
-                       mutex_exit(&nfsd_lock);
-                       if ((slp = nfsd->nfsd_slp) == NULL)
-                               continue;
-                       if (slp->ns_flags & SLP_VALID) {
-                               bool more;
+               }
+               if (nfsd->nfsd_slp == NULL &&
+                   (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
+                       slp = TAILQ_FIRST(&nfssvc_sockpending);
+                       if (slp) {
+                               KASSERT((slp->ns_gflags & SLP_G_INQUEUE)
+                                   != 0);
+                               TAILQ_REMOVE(&nfssvc_sockpending, slp,
+                                   ns_pending);
+                               slp->ns_gflags &= ~SLP_G_INQUEUE;
+                               slp->ns_sref++;
+                               nfsd->nfsd_slp = slp;
+                       } else
+                               nfsd_head_flag &= ~NFSD_CHECKSLP;
+               }
+               KASSERT(nfsd->nfsd_slp == NULL ||
+                   nfsd->nfsd_slp->ns_sref > 0);
+               mutex_exit(&nfsd_lock);
+               if ((slp = nfsd->nfsd_slp) == NULL)
+                       continue;
+               if (slp->ns_flags & SLP_VALID) {
+                       bool more;
 
-                               if (nfsdsock_testbits(slp, SLP_A_NEEDQ)) {
-                                       nfsrv_rcv(slp);
-                               }
-                               if (nfsdsock_testbits(slp, SLP_A_DISCONN)) {
-                                       nfsrv_zapsock(slp);
-                               }
-                               error = nfsrv_dorec(slp, nfsd, &nd, &more);
-                               getmicrotime(&tv);
-                               cur_usec = (u_quad_t)tv.tv_sec * 1000000 +
-                                       (u_quad_t)tv.tv_usec;
-                               writes_todo = 0;
-                               if (error) {
-                                       struct nfsrv_descript *nd2;
+                       if (nfsdsock_testbits(slp, SLP_A_NEEDQ)) {
+                               nfsrv_rcv(slp);
+                       }
+                       if (nfsdsock_testbits(slp, SLP_A_DISCONN)) {
+                               nfsrv_zapsock(slp);
+                       }
+                       error = nfsrv_dorec(slp, nfsd, &nd, &more);
+                       getmicrotime(&tv);
+                       cur_usec = (u_quad_t)tv.tv_sec * 1000000 +
+                               (u_quad_t)tv.tv_usec;
+                       writes_todo = 0;
+                       if (error) {
+                               struct nfsrv_descript *nd2;
 
-                                       mutex_enter(&nfsd_lock);
-                                       nd2 = LIST_FIRST(&slp->ns_tq);
-                                       if (nd2 != NULL &&
-                                           nd2->nd_time <= cur_usec) {
-                                               error = 0;
-                                               cacherep = RC_DOIT;
-                                               writes_todo = 1;
-                                       }
-                                       mutex_exit(&nfsd_lock);
-                               }
-                               if (error == 0 && more) {
-                                       nfsrv_wakenfsd(slp);
+                               mutex_enter(&nfsd_lock);
+                               nd2 = LIST_FIRST(&slp->ns_tq);
+                               if (nd2 != NULL &&
+                                   nd2->nd_time <= cur_usec) {
+                                       error = 0;
+                                       cacherep = RC_DOIT;
+                                       writes_todo = 1;
                                }
+                               mutex_exit(&nfsd_lock);
+                       }
+                       if (error == 0 && more) {
+                               nfsrv_wakenfsd(slp);
                        }
-               } else {
-                       error = 0;
-                       slp = nfsd->nfsd_slp;
                }
                KASSERT(slp != NULL);
                KASSERT(nfsd->nfsd_slp == slp);
@@ -539,8 +543,10 @@ nfssvc_nfsd(struct nfsd_srvargs *nsd, vo
                                nfsdreq_free(nd);
                                nd = NULL;
                        }
-                       nfsd->nfsd_slp = NULL;
-                       nfsrv_slpderef(slp);
+                       if ((slp->ns_flags & SLP_VALID) == 0) {
+                               nfsd->nfsd_slp = NULL;
+                               nfsrv_slpderef(slp);
+                       }
                        continue;
                }
                sotype = slp->ns_so->so_type;
@@ -683,10 +689,6 @@ nfssvc_nfsd(struct nfsd_srvargs *nsd, vo
                                writes_todo = 0;
                        mutex_exit(&nfsd_lock);
                } while (writes_todo);
-               if (nfsrv_dorec(slp, nfsd, &nd, &dummy)) {
-                       nfsd->nfsd_slp = NULL;
-                       nfsrv_slpderef(slp);
-               }
        }
 done:
        mutex_enter(&nfsd_lock);
@@ -724,9 +726,9 @@ nfsrv_zapsock(struct nfssvc_sock *slp)
                return;
        }
        mutex_enter(&nfsd_lock);
-       if (slp->ns_gflags & SLP_G_DOREC) {
+       if (slp->ns_gflags & SLP_G_INQUEUE) {
                TAILQ_REMOVE(&nfssvc_sockpending, slp, ns_pending);
-               slp->ns_gflags &= ~SLP_G_DOREC;
+               slp->ns_gflags &= ~SLP_G_INQUEUE;
        }
        mutex_exit(&nfsd_lock);
 
@@ -775,15 +777,22 @@ nfsrv_zapsock(struct nfssvc_sock *slp)
 void
 nfsrv_slpderef(struct nfssvc_sock *slp)
 {
+       mutex_enter(&nfsd_lock);
+       nfsrv_slpderef_locked(slp);
+}
+
+static void
+nfsrv_slpderef_locked(struct nfssvc_sock *slp)
+{
        uint32_t ref;
 
-       mutex_enter(&nfsd_lock);
        KASSERT(slp->ns_sref > 0);
+       KASSERT(mutex_owned(&nfsd_lock));
        ref = --slp->ns_sref;
        if (ref == 0 && (slp->ns_flags & SLP_VALID) == 0) {
                file_t *fp;
 
-               KASSERT((slp->ns_gflags & SLP_G_DOREC) == 0);
+               KASSERT((slp->ns_gflags & SLP_G_INQUEUE) == 0);
                TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
                mutex_exit(&nfsd_lock);
 


Home | Main Index | Thread Index | Old Index