Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src - Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inact...



details:   https://anonhg.NetBSD.org/src/rev/fc207dc9d333
branches:  trunk
changeset: 780359:fc207dc9d333
user:      manu <manu%NetBSD.org@localhost>
date:      Sat Jul 21 05:17:10 2012 +0000

description:
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.

The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.

We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.

- Fix lookup/reclaim race condition.

The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.

We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.

diffstat:

 lib/libpuffs/dispatcher.c   |   28 +++++++-
 lib/libpuffs/puffs.h        |    3 +-
 sys/fs/puffs/puffs_msgif.c  |   84 +++++++++++++++++++++--
 sys/fs/puffs/puffs_msgif.h  |    4 +-
 sys/fs/puffs/puffs_sys.h    |   23 ++++-
 sys/fs/puffs/puffs_vfsops.c |   11 +-
 sys/fs/puffs/puffs_vnops.c  |  156 ++++++++++++++++++++++++++++++++-----------
 7 files changed, 244 insertions(+), 65 deletions(-)

diffs (truncated from 688 to 300 lines):

diff -r 2192cc62450b -r fc207dc9d333 lib/libpuffs/dispatcher.c
--- a/lib/libpuffs/dispatcher.c Sat Jul 21 05:11:45 2012 +0000
+++ b/lib/libpuffs/dispatcher.c Sat Jul 21 05:17:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: dispatcher.c,v 1.41 2012/06/27 13:25:23 manu Exp $     */
+/*     $NetBSD: dispatcher.c,v 1.42 2012/07/21 05:17:10 manu Exp $     */
 
 /*
  * Copyright (c) 2006, 2007, 2008 Antti Kantee.  All Rights Reserved.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 #if !defined(lint)
-__RCSID("$NetBSD: dispatcher.c,v 1.41 2012/06/27 13:25:23 manu Exp $");
+__RCSID("$NetBSD: dispatcher.c,v 1.42 2012/07/21 05:17:10 manu Exp $");
 #endif /* !lint */
 
 #include <sys/types.h>
@@ -301,6 +301,12 @@
                                                    &pcn.pcn_po_full);
                                }
                        }
+
+                       if (!error) {
+                               if (pn == NULL)
+                                       pn = PU_CMAP(pu, auxt->pvnr_newnode);
+                               pn->pn_nlookup++;
+                       }
                        break;
                }
 
@@ -810,8 +816,24 @@
 
                case PUFFS_VN_RECLAIM:
                {
+                       struct puffs_vnmsg_reclaim *auxt = auxbuf;
+                       struct puffs_node *pn;
+               
+                       if (pops->puffs_node_reclaim == NULL) {
+                               error = 0;
+                               break;
+                       }
 
-                       if (pops->puffs_node_reclaim == NULL) {
+                       /*
+                        * This fixes a race condition, 
+                        * where a node in reclaimed by kernel 
+                        * after a lookup request is sent, 
+                        * but before the reply, leaving the kernel
+                        * with a invalid vnode/cookie reference.
+                        */
+                       pn = PU_CMAP(pu, opcookie);
+                       pn->pn_nlookup -= auxt->pvnr_nlookup;
+                       if (pn->pn_nlookup >= 1) {
                                error = 0;
                                break;
                        }
diff -r 2192cc62450b -r fc207dc9d333 lib/libpuffs/puffs.h
--- a/lib/libpuffs/puffs.h      Sat Jul 21 05:11:45 2012 +0000
+++ b/lib/libpuffs/puffs.h      Sat Jul 21 05:17:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: puffs.h,v 1.122 2012/06/27 13:25:23 manu Exp $ */
+/*     $NetBSD: puffs.h,v 1.123 2012/07/21 05:17:10 manu Exp $ */
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -79,6 +79,7 @@
 struct puffs_node {
        off_t                   pn_size;
        int                     pn_flags;
+       int                     pn_nlookup;
        struct vattr            pn_va;
 
        void                    *pn_data;       /* private data         */
diff -r 2192cc62450b -r fc207dc9d333 sys/fs/puffs/puffs_msgif.c
--- a/sys/fs/puffs/puffs_msgif.c        Sat Jul 21 05:11:45 2012 +0000
+++ b/sys/fs/puffs/puffs_msgif.c        Sat Jul 21 05:17:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: puffs_msgif.c,v 1.89 2011/10/19 01:39:29 manu Exp $    */
+/*     $NetBSD: puffs_msgif.c,v 1.90 2012/07/21 05:17:10 manu Exp $    */
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -30,9 +30,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.89 2011/10/19 01:39:29 manu Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.90 2012/07/21 05:17:10 manu Exp $");
 
 #include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/atomic.h>
 #include <sys/kmem.h>
 #include <sys/kthread.h>
@@ -94,6 +95,8 @@
 static int totalpark;
 #endif
 
+int puffs_sopreq_expire_timeout = PUFFS_SOPREQ_EXPIRE_TIMEOUT;
+
 static int
 makepark(void *arg, void *obj, int flags)
 {
@@ -839,6 +842,30 @@
        puffs_msgpark_release1(park, 2);
 }
 
+/*
+ * Node expiry. We come here after an inactive on an unexpired node.
+ * The expiry has been queued and is done in sop thread.
+ */
+static bool
+puffsop_expire(struct puffs_mount *pmp, puffs_cookie_t cookie)
+{
+       struct vnode *vp;
+
+       KASSERT(PUFFS_USE_FS_TTL(pmp));
+
+       /* 
+        * If it still exists and has no reference,
+        * vrele should cause it to be reclaimed.
+        * Otherwise, we have nothing to do.
+        */
+       if (puffs_cookie2vnode(pmp, cookie, 0, 0, &vp) == 0) {
+               VPTOPP(vp)->pn_stat &= ~PNODE_SOPEXP;
+               vrele(vp); 
+       }
+
+       return false;
+}
+
 static void
 puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
 {
@@ -959,6 +986,7 @@
                psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
                memcpy(&psopr->psopr_pf, pf, sizeof(*pf));
                psopr->psopr_sopreq = PUFFS_SOPREQ_FLUSH;
+               psopr->psopr_at = hardclock_ticks;
 
                mutex_enter(&pmp->pmp_sopmtx);
                if (pmp->pmp_sopthrcount == 0) {
@@ -966,7 +994,7 @@
                        kmem_free(psopr, sizeof(*psopr));
                        puffs_msg_sendresp(pmp, preq, ENXIO);
                } else {
-                       TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
+                       TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs,
                            psopr, psopr_entries);
                        cv_signal(&pmp->pmp_sopcv);
                        mutex_exit(&pmp->pmp_sopmtx);
@@ -983,6 +1011,7 @@
                psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
                psopr->psopr_preq = *preq;
                psopr->psopr_sopreq = PUFFS_SOPREQ_UNMOUNT;
+               psopr->psopr_at = hardclock_ticks;
 
                mutex_enter(&pmp->pmp_sopmtx);
                if (pmp->pmp_sopthrcount == 0) {
@@ -990,7 +1019,7 @@
                        kmem_free(psopr, sizeof(*psopr));
                        puffs_msg_sendresp(pmp, preq, ENXIO);
                } else {
-                       TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
+                       TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs,
                            psopr, psopr_entries);
                        cv_signal(&pmp->pmp_sopcv);
                        mutex_exit(&pmp->pmp_sopmtx);
@@ -1014,6 +1043,8 @@
  * server, i.e. a long-term kernel lock which will be released only
  * once the file server acknowledges a request
  */
+#define TIMED_OUT(expire) \
+    ((int)((unsigned int)hardclock_ticks - (unsigned int)expire) > 0)
 void
 puffs_sop_thread(void *arg)
 {
@@ -1022,12 +1053,36 @@
        struct puffs_sopreq *psopr;
        bool keeprunning;
        bool unmountme = false;
+       int timeo;
+
+       timeo = PUFFS_USE_FS_TTL(pmp) ? puffs_sopreq_expire_timeout : 0;
 
        mutex_enter(&pmp->pmp_sopmtx);
        for (keeprunning = true; keeprunning; ) {
-               while ((psopr = TAILQ_FIRST(&pmp->pmp_sopreqs)) == NULL)
-                       cv_wait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx);
-               TAILQ_REMOVE(&pmp->pmp_sopreqs, psopr, psopr_entries);
+               /*
+                * We have a higher priority queue for flush and umount
+                * and a lower priority queue for reclaims. Request are
+                * not honoured before clock reaches psopr_at. This code
+                * assumes that requests are ordered by psopr_at in queues.
+                */
+               do {
+                       psopr = TAILQ_FIRST(&pmp->pmp_sopfastreqs);
+                       if ((psopr != NULL) && TIMED_OUT(psopr->psopr_at)) {
+                               TAILQ_REMOVE(&pmp->pmp_sopfastreqs,
+                                            psopr, psopr_entries);
+                               break;
+                       }
+
+                       psopr = TAILQ_FIRST(&pmp->pmp_sopslowreqs);
+                       if ((psopr != NULL) && TIMED_OUT(psopr->psopr_at)) {
+                               TAILQ_REMOVE(&pmp->pmp_sopslowreqs,
+                                            psopr, psopr_entries);
+                               break;
+                       }
+
+                       cv_timedwait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx, timeo);
+               } while (1 /* CONSTCOND */);
+
                mutex_exit(&pmp->pmp_sopmtx);
 
                switch (psopr->psopr_sopreq) {
@@ -1037,6 +1092,9 @@
                case PUFFS_SOPREQ_FLUSH:
                        puffsop_flush(pmp, &psopr->psopr_pf);
                        break;
+               case PUFFS_SOPREQ_EXPIRE:
+                       puffsop_expire(pmp, psopr->psopr_ck);
+                       break;
                case PUFFS_SOPREQ_UNMOUNT:
                        puffs_msg_sendresp(pmp, &psopr->psopr_preq, 0);
 
@@ -1058,8 +1116,16 @@
        /*
         * Purge remaining ops.
         */
-       while ((psopr = TAILQ_FIRST(&pmp->pmp_sopreqs)) != NULL) {
-               TAILQ_REMOVE(&pmp->pmp_sopreqs, psopr, psopr_entries);
+       while ((psopr = TAILQ_FIRST(&pmp->pmp_sopfastreqs)) != NULL) {
+               TAILQ_REMOVE(&pmp->pmp_sopfastreqs, psopr, psopr_entries);
+               mutex_exit(&pmp->pmp_sopmtx);
+               puffs_msg_sendresp(pmp, &psopr->psopr_preq, ENXIO);
+               kmem_free(psopr, sizeof(*psopr));
+               mutex_enter(&pmp->pmp_sopmtx);
+       }
+
+       while ((psopr = TAILQ_FIRST(&pmp->pmp_sopslowreqs)) != NULL) {
+               TAILQ_REMOVE(&pmp->pmp_sopslowreqs, psopr, psopr_entries);
                mutex_exit(&pmp->pmp_sopmtx);
                puffs_msg_sendresp(pmp, &psopr->psopr_preq, ENXIO);
                kmem_free(psopr, sizeof(*psopr));
diff -r 2192cc62450b -r fc207dc9d333 sys/fs/puffs/puffs_msgif.h
--- a/sys/fs/puffs/puffs_msgif.h        Sat Jul 21 05:11:45 2012 +0000
+++ b/sys/fs/puffs/puffs_msgif.h        Sat Jul 21 05:17:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: puffs_msgif.h,v 1.78 2012/04/08 15:04:41 manu Exp $    */
+/*     $NetBSD: puffs_msgif.h,v 1.79 2012/07/21 05:17:10 manu Exp $    */
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -567,6 +567,8 @@
 
 struct puffs_vnmsg_reclaim {
        struct puffs_req        pvn_pr;
+
+       int                     pvnr_nlookup;           /* OUT */
 };
 
 struct puffs_vnmsg_inactive {
diff -r 2192cc62450b -r fc207dc9d333 sys/fs/puffs/puffs_sys.h
--- a/sys/fs/puffs/puffs_sys.h  Sat Jul 21 05:11:45 2012 +0000
+++ b/sys/fs/puffs/puffs_sys.h  Sat Jul 21 05:17:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: puffs_sys.h,v 1.79 2012/04/08 15:04:41 manu Exp $      */
+/*     $NetBSD: puffs_sys.h,v 1.80 2012/07/21 05:17:10 manu Exp $      */
 
 /*
  * Copyright (c) 2005, 2006  Antti Kantee.  All Rights Reserved.
@@ -101,23 +101,30 @@
        LIST_ENTRY(puffs_newcookie) pnc_entries;
 };
 
+#define PUFFS_SOPREQ_EXPIRE_TIMEOUT 1000
+extern int puffs_sopreq_expire_timeout;
+
 enum puffs_sopreqtype {
        PUFFS_SOPREQSYS_EXIT,
        PUFFS_SOPREQ_FLUSH,
        PUFFS_SOPREQ_UNMOUNT,
+       PUFFS_SOPREQ_EXPIRE,
 };
 
 struct puffs_sopreq {
        union {
                struct puffs_req preq;
                struct puffs_flush pf;
+               puffs_cookie_t ck;
        } psopr_u;
 
        enum puffs_sopreqtype psopr_sopreq;
        TAILQ_ENTRY(puffs_sopreq) psopr_entries;
+       int psopr_at;
 };
 #define psopr_preq psopr_u.preq



Home | Main Index | Thread Index | Old Index