[src/ad-namecache]: src/sys - Put all the namecache stuff back into vnode_imp...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/ad-namecache]: src/sys - Put all the namecache stuff back into vnode_imp...
From: ad <ad%NetBSD.org@localhost>
Date: Tue, 07 Apr 2020 14:27:16 +0000
details:   https://anonhg.NetBSD.org/src/rev/8dec8647e97f
branches:  ad-namecache
changeset: 850549:8dec8647e97f
user:      ad <ad%NetBSD.org@localhost>
date:      Fri Jan 24 16:48:58 2020 +0000

description:
- Put all the namecache stuff back into vnode_impl_t.
- Tidy vfs_cache.c up, finish the comments.
- Finalise how ID information is entered to the cache.
- Handle very small/old systems.

diffstat:

 sys/fs/tmpfs/tmpfs_subr.c   |   10 +-
 sys/fs/tmpfs/tmpfs_vfsops.c |    7 +-
 sys/kern/vfs_cache.c        |  793 +++++++++++++++++++++----------------------
 sys/sys/namei.src           |   12 +-
 sys/ufs/ffs/ffs_vfsops.c    |   11 +-
 sys/ufs/ufs/ufs_vnops.c     |   12 +-
 6 files changed, 407 insertions(+), 438 deletions(-)

diffs (truncated from 1599 to 300 lines):

diff -r 8870c6bb8453 -r 8dec8647e97f sys/fs/tmpfs/tmpfs_subr.c
--- a/sys/fs/tmpfs/tmpfs_subr.c Fri Jan 24 16:05:37 2020 +0000
+++ b/sys/fs/tmpfs/tmpfs_subr.c Fri Jan 24 16:48:58 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tmpfs_subr.c,v 1.105.2.1 2020/01/17 22:26:25 ad Exp $  */
+/*     $NetBSD: tmpfs_subr.c,v 1.105.2.2 2020/01/24 16:48:58 ad Exp $  */
 
 /*
  * Copyright (c) 2005-2013 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.105.2.1 2020/01/17 22:26:25 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.105.2.2 2020/01/24 16:48:58 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/cprng.h>
@@ -148,7 +148,7 @@
        node->tn_vnode = vp;
        uvm_vnp_setsize(vp, node->tn_size);
        KASSERT(node->tn_mode != VNOVAL);
-       cache_set_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
+       cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
 }
 
 /*
@@ -1039,7 +1039,7 @@
        node->tn_mode = (mode & ALLPERMS);
        tmpfs_update(vp, TMPFS_UPDATE_CTIME);
        VN_KNOTE(vp, NOTE_ATTRIB);
-       cache_update_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
+       cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
        return 0;
 }
 
@@ -1084,7 +1084,7 @@
        node->tn_gid = gid;
        tmpfs_update(vp, TMPFS_UPDATE_CTIME);
        VN_KNOTE(vp, NOTE_ATTRIB);
-       cache_update_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
+       cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
        return 0;
 }
 
diff -r 8870c6bb8453 -r 8dec8647e97f sys/fs/tmpfs/tmpfs_vfsops.c
--- a/sys/fs/tmpfs/tmpfs_vfsops.c       Fri Jan 24 16:05:37 2020 +0000
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c       Fri Jan 24 16:48:58 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tmpfs_vfsops.c,v 1.75.2.2 2020/01/19 21:21:54 ad Exp $ */
+/*     $NetBSD: tmpfs_vfsops.c,v 1.75.2.3 2020/01/24 16:48:58 ad Exp $ */
 
 /*
  * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.75.2.2 2020/01/19 21:21:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.75.2.3 2020/01/24 16:48:58 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -182,7 +182,8 @@
        mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN;
        mp->mnt_fs_bshift = PAGE_SHIFT;
        mp->mnt_dev_bshift = DEV_BSHIFT;
-       mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP;
+       mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP |
+           IMNT_NCLOOKUP;
        vfs_getnewfsid(mp);
 
        /* Allocate the tmpfs mount structure and fill it. */
diff -r 8870c6bb8453 -r 8dec8647e97f sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c      Fri Jan 24 16:05:37 2020 +0000
+++ b/sys/kern/vfs_cache.c      Fri Jan 24 16:48:58 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vfs_cache.c,v 1.126.2.10 2020/01/23 12:33:18 ad Exp $  */
+/*     $NetBSD: vfs_cache.c,v 1.126.2.11 2020/01/24 16:48:59 ad Exp $  */
 
 /*-
  * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -64,45 +64,46 @@
  * Name caching:
  *
  *     Names found by directory scans are retained in a cache for future
- *     reference.  It is managed pseudo-LRU, so frequently used names will
- *     hang around.  The cache is indexed by directory.
+ *     reference.  It is managed LRU, so frequently used names will hang
+ *     around.  The cache is indexed by hash value obtained from the name.
  *
- * Background:
+ *     The name cache (or directory name lookup cache) is the brainchild of
+ *     Robert Elz and made its first appearance in 4.3BSD.
  *
- *     XXX add a bit of history
- *
- * Data structures
+ * Data structures:
  *
- *     Typically DNLCs use a global hash table for lookup, which works very
- *     well but can cause challenges for the CPU cache on modern CPUs, and
- *     concurrency headaches for the kernel hacker.  The focus of interest
- *     in this implementation is the directory itself: a per-directory data
- *     structure is used to index names.  Currently this is a red-black
- *     tree.  There are no special concurrency requirements placed on the
- *     index, so it should not be difficult to experiment with other
- *     structures.
+ *     Most Unix namecaches very sensibly use a global hash table to index
+ *     names.  The global hash table works well, but can cause concurrency
+ *     headaches for the kernel hacker.  In the NetBSD 10.0 implementation
+ *     we are not sensible, and use a per-directory data structure to index
+ *     names, but the cache otherwise functions the same.
+ *
+ *     The index is a red-black tree.  There are no special concurrency
+ *     requirements placed on it, because it's per-directory and protected
+ *     by the namecache's per-directory locks.  It should therefore not be
+ *     difficult to experiment with other data structures.
  *
  *     Each cached name is stored in a struct namecache, along with a
- *     pointer the associated vnode (nc_vp).  For simplicity (and economy
- *     of storage), names longer than a maximum length of NCHNAMLEN are
- *     allocated with kmem_alloc(); they occur infrequently.  Names shorter
- *     than this are stored directly in struct namecache.  If it is a
- *     "negative" entry, (i.e.  for a name that is known NOT to exist) the
- *     vnode pointer will be NULL.
+ *     pointer to the associated vnode (nc_vp).  Names longer than a
+ *     maximum length of NCHNAMLEN are allocated with kmem_alloc(); they
+ *     occur infrequently, and names shorter than this are stored directly
+ *     in struct namecache.  If it is a "negative" entry, (i.e. for a name
+ *     that is known NOT to exist) the vnode pointer will be NULL.
  *
- *     The nchnode (XXXAD vnode) and namecache structures are connected together thusly
+ *     For a directory with 3 cached names for 3 distinct vnodes, the
+ *     various vnodes and namecache structs would be connected like this
  *     (the root is at the bottom of the diagram):
  *
  *          ...
  *           ^
- *           |- nn_tree
+ *           |- vi_nc_tree
  *           |                                                           
  *      +----o----+               +---------+               +---------+
  *      |  VDIR   |               |  VCHR   |               |  VREG   |
- *      | nchnode o-----+         | nchnode o-----+         | nchnode o------+
+ *      |  vnode  o-----+         |  vnode  o-----+         |  vnode  o------+
  *      +---------+     |         +---------+     |         +---------+      |
  *           ^          |              ^          |              ^           |
- *           |- nc_nn   |- nn_list     |- nc_nn   |- nn_list     |- nc_nn    |
+ *           |- nc_vp   |- vi_nc_list  |- nc_vp   |- vi_nc_list  |- nc_vp    |
  *           |          |              |          |              |           |
  *      +----o----+     |         +----o----+     |         +----o----+      |
  *  +---onamecache|<----+     +---onamecache|<----+     +---onamecache|<-----+
@@ -110,12 +111,12 @@
  *  |        ^                |        ^                |        ^
  *  |        |                |        |                |        |
  *  |        |  +----------------------+                |        |
- *  |-nc_dnn | +-------------------------------------------------+
- *  |        |/- nn_tree      |                         |
- *  |        |                |- nc_dnn                 |- nc_dnn
+ *  |-nc_dvp | +-------------------------------------------------+
+ *  |        |/- vi_nc_tree   |                         |
+ *  |        |                |- nc_dvp                 |- nc_dvp
  *  |   +----o----+           |                         |
  *  +-->|  VDIR   |<----------+                         |
- *      | nchnode |<------------------------------------+
+ *      |  vnode  |<------------------------------------+
  *      +---------+
  *
  *      START HERE
@@ -123,44 +124,53 @@
  * Replacement:
  *
  *     As the cache becomes full, old and unused entries are purged as new
- *     entries are added.  It would be too expensive to maintain a strict
- *     LRU list based on last access, so instead we ape the VM system and
- *     maintain lits of active and inactive entries.  New entries go to the
- *     tail of the active list.  After they age out, they are placed on the
- *     tail of the inactive list.  Any subsequent use of the cache entry
- *     reactivates it, saving it from impending doom; if not reactivated,
- *     the entry will eventually be purged.
+ *     entries are added.  The synchronization overhead in maintaining a
+ *     strict ordering would be prohibitive, so the VM system's "clock" or
+ *     "second chance" page replacement algorithm is aped here.  New
+ *     entries go to the tail of the active list.  After they age out and
+ *     reach the head of the list, they are moved to the tail of the
+ *     inactive list.  Any use of the deactivated cache entry reactivates
+ *     it, saving it from impending doom; if not reactivated, the entry
+ *     eventually reaches the head of the inactive list and is purged.
  *
  * Concurrency:
  *
- *     From a performance perspective, cache_lookup(nameiop == LOOKUP) and
- *     its siblings are what really matters; insertion of new entries with
- *     cache_enter() is comparatively infrequent.  This dictates the
- *     concurrency strategy: lookups must be easy, never difficult.
+ *     From a performance perspective, cache_lookup(nameiop == LOOKUP) is
+ *     what really matters; insertion of new entries with cache_enter() is
+ *     comparatively infrequent, and overshadowed by the cost of expensive
+ *     file system metadata operations (which may involve disk I/O).  We
+ *     therefore want to make everything simplest in the lookup path.
  *
  *     struct namecache is mostly stable except for list and tree related
- *     entries, which don't affect the cached name or vnode, and entries
- *     are purged in preference to modifying them.  Read access to
- *     namecache entries is made via tree, list, or LRU list.  A lock
- *     corresponding to the direction of access should be held.  See
- *     definition of "struct namecache" in src/sys/namei.src, and the
- *     definition of "struct nchnode" XXXAD vnode for the particulars.
+ *     entries, changes to which don't affect the cached name or vnode. 
+ *     For changes to name+vnode, entries are purged in preference to
+ *     modifying them.
  *
- *     Per-CPU statistics, and "numcache" are read unlocked, since an
- *     approximate value is OK.  We maintain uintptr_t sized per-CPU
+ *     Read access to namecache entries is made via tree, list, or LRU
+ *     list.  A lock corresponding to the direction of access should be
+ *     held.  See definition of "struct namecache" in src/sys/namei.src,
+ *     and the definition of "struct vnode" for the particulars.
+ *
+ *     Per-CPU statistics, and LRU list totals are read unlocked, since
+ *     an approximate value is OK.  We maintain uintptr_t sized per-CPU
  *     counters and 64-bit global counters under the theory that uintptr_t
  *     sized counters are less likely to be hosed by nonatomic increment.
  *
  *     The lock order is:
  *
- *     1) nn->nn_lock          (tree or parent->child direction)
- *     2) nn->nn_listlock      (list or child->parent direction)
- *     3) cache_lru_lock       (LRU list direction)
- *     4) vp->v_interlock
+ *     1) vi->vi_nc_lock       (tree or parent -> child direction,
+ *                              used during forward lookup)
+ *
+ *     2) vi->vi_nc_listlock   (list or child -> parent direction,
+ *                              used during reverse lookup)
+ *
+ *     3) cache_lru_lock       (LRU list direction, used during reclaim)
+ *
+ *     4) vp->v_interlock      (what the cache entry points to)
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.10 2020/01/23 12:33:18 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.11 2020/01/24 16:48:59 ad Exp $");
 
 #define __NAMECACHE_PRIVATE
 #ifdef _KERNEL_OPT
@@ -186,31 +196,6 @@
 
 #include <miscfs/genfs/genfs.h>
 
-/*
- * Per-vnode state for the namecache.  Field markings and corresponding
- * locks.  XXXAD this needs to be folded back into struct vnode, but
- * struct vnode_impl_t needs to be folded back into struct vnode first.
- *
- *     -       stable throught the lifetime of the vnode
- *     n       protected by nn_lock
- *     l       protected by nn_listlock
- */
-struct nchnode {
-       /* First cache line: frequently used stuff. */
-       rb_tree_t       nn_tree;        /* n  namecache tree */
-       TAILQ_HEAD(,namecache) nn_list; /* l  namecaches (parent) */
-       mode_t          nn_mode;        /* n  cached mode or VNOVAL */
-       uid_t           nn_uid;         /* n  cached UID or VNOVAL */
-       gid_t           nn_gid;         /* n  cached GID or VNOVAL */
-       uint32_t        nn_spare;       /* -  spare (padding) */
-
-       /* Second cache line: locks and infrequenly used stuff. */
-       krwlock_t       nn_lock         /* -  lock on node */
-           __aligned(COHERENCY_UNIT);
-       krwlock_t       nn_listlock;    /* -  lock on nn_list */
-       struct vnode    *nn_vp;         /* -  backpointer to vnode */
-};
-
 static void    cache_activate(struct namecache *);
 static int     cache_compare_key(void *, const void *, const void *);
 static int     cache_compare_nodes(void *, const void *, const void *);
@@ -223,7 +208,6 @@
 
 /* Global pool cache. */
 static pool_cache_t cache_pool __read_mostly;
-static pool_cache_t cache_node_pool __read_mostly;
 
 /* LRU replacement. */
 enum cache_lru_id {
@@ -250,7 +234,7 @@
 
 /* Tunables */
 static const int cache_lru_maxdeact = 2;       /* max # to deactivate */
-static const int cache_lru_maxscan = 128;      /* max # to scan/reclaim */
+static const int cache_lru_maxscan = 64;       /* max # to scan/reclaim */
 static int doingcache = 1;                     /* 1 => enable the cache */
 
 /* sysctl */
@@ -312,23 +296,22 @@
 static int
 cache_compare_key(void *context, const void *n, const void *k)
Prev by Date: [src/ad-namecache]: src/sys/kern Add a missing membar to previous.
Next by Date: [src/ad-namecache]: src/sys Remove unintentional differences to base.
Previous by Thread: [src/ad-namecache]: src/sys/kern Add a missing membar to previous.
Next by Thread: [src/ad-namecache]: src/sys Remove unintentional differences to base.
Indexes:
Home | Main Index | Thread Index | Old Index